# Climate Analysis

### Import all necessary packages and configue SQLAlchemy

In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import datetime as dt

In [42]:
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func

In [31]:
# Create Engine
engine = create_engine("sqlite:///Resources/hawaii.sqlite")

In [32]:
# Declare a base
Base = automap_base()

In [33]:
Base.prepare(engine, reflect=True)

In [34]:
Base.classes.keys()

['measurement', 'station']

In [35]:
# Assign measurement and station classes to variables
measurement = Base.classes.measurement
station = Base.classes.station

In [36]:
# Create session
session = Session(engine)

#### Use inspector to see what columns we will be working with

In [37]:
inspector = inspect(engine)
inspector.get_table_names()

['measurement', 'station']

In [38]:
columns = inspector.get_columns('measurement')
for c in columns:
    print(c['name'], ' : ', c['type'])

id  :  INTEGER
station  :  TEXT
date  :  TEXT
prcp  :  FLOAT
tobs  :  FLOAT


In [39]:
columns2 = inspector.get_columns('station')
for c in columns2:
    print(c['name'], ' : ', c['type'])

id  :  INTEGER
station  :  TEXT
name  :  TEXT
latitude  :  FLOAT
longitude  :  FLOAT
elevation  :  FLOAT


### Engine execute to select datapoints

In [40]:
engine.execute("SELECT * FROM measurement LIMIT 10;").fetchall()

[(1, 'USC00519397', '2010-01-01', 0.08, 65.0),
 (2, 'USC00519397', '2010-01-02', 0.0, 63.0),
 (3, 'USC00519397', '2010-01-03', 0.0, 74.0),
 (4, 'USC00519397', '2010-01-04', 0.0, 76.0),
 (5, 'USC00519397', '2010-01-06', None, 73.0),
 (6, 'USC00519397', '2010-01-07', 0.06, 70.0),
 (7, 'USC00519397', '2010-01-08', 0.0, 64.0),
 (8, 'USC00519397', '2010-01-09', 0.0, 68.0),
 (9, 'USC00519397', '2010-01-10', 0.0, 73.0),
 (10, 'USC00519397', '2010-01-11', 0.01, 64.0)]

### Query data to store into variable to use for analysis.

#### Find the last 12 months of percipitation data (very confused what output graph is supposed to represent)

In [53]:
measurement_results = session.query(measurement.date, measurement.prcp).\
    filter(measurement.date > dt.datetime(year=2016, month=8, day=23)).\
    order_by(measurement.date).all()

In [56]:
measurement_df = pd.DataFrame(measurement_results, columns=['date', 'prcp'])
measurement_df.set_index('date', inplace=True)
measurement_df.head()
len(measurement_df.index)

# Data points are not unique according to date. 

2223

#### Station Analysis

In [71]:
station_count = session.query(func.count(station.station))
for row in station_count:
    print(f"There are {row[0]} stations in the station table")

There are 9 stations in the station table


#### Most Active Stations!

In [78]:
station_active = session.query(measurement.station, func.count(measurement.date)).\
    group_by(measurement.station).\
    order_by(func.count(measurement.date).desc()).all()

In [79]:
for row in station_active:
    print(row[0], row[1])

USC00519281 2772
USC00519397 2724
USC00513117 2709
USC00519523 2669
USC00516128 2612
USC00514830 2202
USC00511918 1979
USC00517948 1372
USC00518838 511


#### Next

In [61]:
station_list = session.query(station.id, station.station, station.name, station.latitude, station.longitude, station.elevation).\
    order_by(station.id).all()

In [73]:
station_df = pd.DataFrame(station_list, columns=['id', 'station', 'name', 'latitude', 'longitude', 'elevation'])
station_df.set_index('id', inplace=True)
station_df.head(20)

Unnamed: 0_level_0,station,name,latitude,longitude,elevation
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
2,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
3,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
4,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
5,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6
6,USC00519523,"WAIMANALO EXPERIMENTAL FARM, HI US",21.33556,-157.71139,19.5
7,USC00519281,"WAIHEE 837.5, HI US",21.45167,-157.84889,32.9
8,USC00511918,"HONOLULU OBSERVATORY 702.2, HI US",21.3152,-157.9992,0.9
9,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4


In [69]:
station_groupby = station_df.groupby(by='station')
station_groupby.count()

Unnamed: 0_level_0,name,latitude,longitude,elevation
station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
USC00511918,1,1,1,1
USC00513117,1,1,1,1
USC00514830,1,1,1,1
USC00516128,1,1,1,1
USC00517948,1,1,1,1
USC00518838,1,1,1,1
USC00519281,1,1,1,1
USC00519397,1,1,1,1
USC00519523,1,1,1,1
