# Surf's Up
Performing Climate Analysis for the climate in Hawaii and also providing an api to share that with others.

## Database Engineering
This code will read the clean data files for our analysis and create a SQLite database from them. We need to ensure that the data_engineering.ipynb is run before running this code.

There are two data files in the Resources folder:
* hawaii_stations.csv
* clean_hawaii_measurements.csv

The SQLite database is created in the following file in the Resources folder:
* hawaii.sqlite

In [186]:
# Import Dependencies
import pandas as pd
import os
import sqlalchemy
from sqlalchemy import Column, Float, Integer, String, Date
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy import MetaData
from sqlalchemy.orm import Session
from sqlalchemy import ForeignKey

In [187]:
# Define file paths
stations_file = os.path.join("Resources", "hawaii_stations.csv")
measurements_file = os.path.join("Resources", "clean_hawaii_measurements.csv")

### Read data files
We read all the clean data from the data files and prepare them for saving into the database

In [188]:
# Read the measurement data from file
measurements_df = pd.read_csv(measurements_file)
measurements_df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-07,0.06,70


In [189]:
measurements_df.count()

station    18103
date       18103
prcp       18103
tobs       18103
dtype: int64

In [190]:
# Read the stations data from file
stations_df = pd.read_csv(stations_file)
stations_df

Unnamed: 0,station,name,latitude,longitude,elevation
0,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6
5,USC00519523,"WAIMANALO EXPERIMENTAL FARM, HI US",21.33556,-157.71139,19.5
6,USC00519281,"WAIHEE 837.5, HI US",21.45167,-157.84889,32.9
7,USC00511918,"HONOLULU OBSERVATORY 702.2, HI US",21.3152,-157.9992,0.9
8,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4


In [191]:
# rename the columns to what you want in the database table
stations_df.rename(columns={"station":"station_id", "name":"station_name"}, inplace=True)
stations_df

Unnamed: 0,station_id,station_name,latitude,longitude,elevation
0,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6
5,USC00519523,"WAIMANALO EXPERIMENTAL FARM, HI US",21.33556,-157.71139,19.5
6,USC00519281,"WAIHEE 837.5, HI US",21.45167,-157.84889,32.9
7,USC00511918,"HONOLULU OBSERVATORY 702.2, HI US",21.3152,-157.9992,0.9
8,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4


In [192]:
# rename the columns to what you want in the database table
measurements_df.rename(columns={"station":"station_id", 
                                "prcp":"precipitation",
                                "tobs":"temperature"}, inplace=True)
measurements_df.head()

Unnamed: 0,station_id,date,precipitation,temperature
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-07,0.06,70


### Create database objects and save data into the database
We now define the classes that correspond to our database tables, we connect to the SQLite database and create tables if they don't exist. We then empty the tables to ensure any existing data is cleaned out.

In [193]:
# Define our base object
Base = declarative_base()

# Define our data object classes
class Station(Base):
    __tablename__ = 'stations'
    id = Column(Integer, primary_key=True)
    station_id = Column(String(255))
    station_name = Column(String(255))
    latitude = Column(Float(3.5))
    longitude = Column(Float(3.5))
    elevation = Column(Float(3.1))

class Measurement(Base):
    __tablename__ = "measurements"
    id = Column(Integer, primary_key=True)
    station_id = Column(String(255), ForeignKey("stations.station_id"), nullable=False)
    date = Column(String(255))
    precipitation = Column(Float(1.2))
    temperature = Column(Float(3.2))
    

In [194]:
# Create (if not already in existence) the tables associated with our classes.
engine = create_engine("sqlite:///Resources/hawaii.sqlite")
conn = engine.connect()
Base.metadata.create_all(engine)

In [195]:
# Use MetaData from SQLAlchemy to reflect the tables so we can get the table references later
metadata = MetaData(bind=engine)
metadata.reflect()

In [196]:
# Get the table references
stations_table = sqlalchemy.Table('stations', metadata, autoload=True)
measurements_table = sqlalchemy.Table('measurements', metadata, autoload=True)

# Delete existing table data if any
conn.execute(stations_table.delete())
conn.execute(measurements_table.delete())

<sqlalchemy.engine.result.ResultProxy at 0x11275d400>

In [197]:
# Create the data lists from the data frames for easy storage into the tables
stations_data = stations_df.to_dict(orient='records')
measurements_data = measurements_df.to_dict(orient='records')
stations_data

[{'elevation': 3.0,
  'latitude': 21.2716,
  'longitude': -157.8168,
  'station_id': 'USC00519397',
  'station_name': 'WAIKIKI 717.2, HI US'},
 {'elevation': 14.6,
  'latitude': 21.4234,
  'longitude': -157.8015,
  'station_id': 'USC00513117',
  'station_name': 'KANEOHE 838.1, HI US'},
 {'elevation': 7.0,
  'latitude': 21.5213,
  'longitude': -157.8374,
  'station_id': 'USC00514830',
  'station_name': 'KUALOA RANCH HEADQUARTERS 886.9, HI US'},
 {'elevation': 11.9,
  'latitude': 21.3934,
  'longitude': -157.9751,
  'station_id': 'USC00517948',
  'station_name': 'PEARL CITY, HI US'},
 {'elevation': 306.6,
  'latitude': 21.4992,
  'longitude': -158.0111,
  'station_id': 'USC00518838',
  'station_name': 'UPPER WAHIAWA 874.3, HI US'},
 {'elevation': 19.5,
  'latitude': 21.33556,
  'longitude': -157.71139,
  'station_id': 'USC00519523',
  'station_name': 'WAIMANALO EXPERIMENTAL FARM, HI US'},
 {'elevation': 32.9,
  'latitude': 21.45167,
  'longitude': -157.84888999999998,
  'station_id': 'US

In [198]:
# insert the clean data
conn.execute(stations_table.insert(), stations_data)
conn.execute(measurements_table.insert(), measurements_data)

<sqlalchemy.engine.result.ResultProxy at 0x11278d400>

### Verify the data exists in the database
We will run simple queries to ensure we have the data correctly saved to the tables

In [199]:
# Query the stations table to verify we have all the data
conn.execute("select * from stations limit 5").fetchall()

[(1, 'USC00519397', 'WAIKIKI 717.2, HI US', 21.2716, -157.8168, 3.0),
 (2, 'USC00513117', 'KANEOHE 838.1, HI US', 21.4234, -157.8015, 14.6),
 (3, 'USC00514830', 'KUALOA RANCH HEADQUARTERS 886.9, HI US', 21.5213, -157.8374, 7.0),
 (4, 'USC00517948', 'PEARL CITY, HI US', 21.3934, -157.9751, 11.9),
 (5, 'USC00518838', 'UPPER WAHIAWA 874.3, HI US', 21.4992, -158.0111, 306.6)]

In [200]:
# Query the stations table to verify we have all the data
conn.execute("select * from measurements limit 5").fetchall()

[(1, 'USC00519397', '2010-01-01', 0.08, 65.0),
 (2, 'USC00519397', '2010-01-02', 0.0, 63.0),
 (3, 'USC00519397', '2010-01-03', 0.0, 74.0),
 (4, 'USC00519397', '2010-01-04', 0.0, 76.0),
 (5, 'USC00519397', '2010-01-07', 0.06, 70.0)]