# Climate Analysis



In [None]:
# Dependencies
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy.sql import func
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from datetime import date, timedelta
import pandas as pd
from pandas import DataFrame


In [None]:

# Create engine & connection
engine = create_engine("sqlite:///hawaii.sqlite", echo=False)
conn = engine.connect()


In [None]:
# Create automap
Base = automap_base()
Base.prepare(engine, reflect=True)
Base.classes.keys()
Measurement = Base.classes.measurement
Station = Base.classes.station


In [None]:

# Start a session to query the database
session = Session(engine)

# Precipitation Analysis

In [None]:
# Getting a list of dates for the last 12 months
base_date = datetime.datetime.strptime("2017-08-23", "%Y-%m-%d")
numdays = 365
date_list = [base_date - datetime.timedelta(days=x) for x in range(0, numdays)]

# Converting them to a list of strings
str_dates = []
for date in date_list:
    new_date = date.strftime("%Y-%m-%d")
    str_dates.append(new_date)

In [None]:
# Getting precipitation data
prcp_results = session.query(func.avg(Measurement.prcp), Measurement.date)\
               .filter(Measurement.date.in_(str_dates))\
               .group_by(Measurement.date)

# Converting to a list
prcp_data = []
for res in prcp_results:
    prcp_data.append(res[0])

In [None]:
# Loading the results into a DataFrame
prcp_df = pd.DataFrame([str_dates[::-1], prcp_data[::-1]]).T
prcp_df.rename(columns={0: "Date", 1: "Precipitation"}, inplace=True)
prcp_df.set_index("Date", inplace=True)
prcp_df.head()

In [None]:
# Plotting data
prcp_df.plot()
plt.savefig("Images/precipitation_analysis.png")
plt.show()

In [None]:
# Summary statistics
prcp_df["Precipitation"].describe()

# Station Analysis

In [None]:
# Count the number of stations
stations = session.query(func.count(Station.station))
station_count = stations[0]
station_count

In [None]:
# Query to find observation counts by station
obs_counts = session.query(Measurement.station, func.count(Measurement.prcp))\
             .group_by(Measurement.station)\
             .order_by(func.count(Measurement.prcp).desc())

# Printing observations
for station in obs_counts:
    print(station)
    
# Storing station name
#top_station = obs_counts[0]

In [None]:
# Query to get last 12 months of tobs from top station
tobs_results = session.query(Measurement.tobs)\
               .filter(Measurement.date.in_(str_dates))\
               .filter(Measurement.station == obs_counts)
        
# Converting to a list
tobs_data = []
for row in tobs_results:
    tobs_data.append(row[0])

In [None]:
# Plotting histogram
plt.hist(tobs_data, bins=12, label='tobs')
plt.ylabel("Frequency")
plt.legend()
plt.savefig("Images/station_analysis.png")
plt.show()


# Temperature Analysis

In [None]:
# Defining function calc_temps
def calc_temps (start_date, end_date):
    
    # Converting to date time
    startDate = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    startDate = startDate.replace(startDate.year - 1)
    endDate = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    endDate = endDate.replace(endDate.year - 1)
   
    
    # Getting date range
    delta = endDate - startDate
    date_range = []
    for i in range(delta.days + 1):
        date_range.append(startDate + timedelta(days=i))
        
    # Converting to strings to filter
    str_date_range = []
    for date in date_range:
        new_date = date.strftime("%Y-%m-%d")
        str_date_range.append(new_date)
        
    # Grabbing avg, min & max temps    
    temp_avg = session.query(func.avg(Measurement.tobs))\
               .filter(Measurement.date.in_(str_date_range))[0][0]
    temp_min = session.query(func.min(Measurement.tobs))\
               .filter(Measurement.date.in_(str_date_range))[0][0]
    temp_max = session.query(func.max(Measurement.tobs))\
               .filter(Measurement.date.in_(str_date_range))[0][0]
        
    return temp_avg, temp_min, temp_max

In [None]:
# Calling function
temp_analysis = calc_temps("2016-05-06", "2016-05-12")

# Creating variables
tavg = temp_analysis[0]
tmin = temp_analysis[1]
tmax = temp_analysis[2]

In [None]:
# Plotting bar
plt.figure(figsize=(2,5))
plt.bar(1,height=tavg, yerr=(tmax-tmin), width=0.0001, alpha=0.5)
plt.title("Trip Avg Temp")
plt.xticks([])
plt.ylim(0,100)
plt.ylabel("Temp (F)")
plt.savefig("Images/temperature_analysis.png")
plt.show()