## Step 2 - Database Engineering

Use SQLAlchemy to model your table schemas and create a sqlite database for your tables. You will need one table for measurements and one for stations.

* Create a Jupyter Notebook called `database_engineering.ipynb` and use this to complete all of your Database Engineering work.

* Use Pandas to read your cleaned measurements and stations CSV data.

* Use the `engine` and connection string to create a database called `hawaii.sqlite`.

* Use `declarative_base` and create ORM classes for each table.

  * You will need a class for `Measurement` and for `Station`.

  * Make sure to define your primary keys.

* Once you have your ORM classes defined, create the tables in the database using `create_all`.


In [1]:
import pandas as pd
import sqlalchemy
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Date, Integer, String, Float
from datetime import datetime

In [2]:
df_hawaii_meas = pd.read_csv('raw_data\clean_hawaii_measurements.csv')
df_hawaii_stat = pd.read_csv('raw_data\hawaii_stations.csv')

In [3]:
print(df_hawaii_meas.shape)
df_hawaii_meas.head(2)

(18103, 4)


Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63


In [4]:
#add a python datetime object column so can import date into db
df_hawaii_meas['datetime'] = df_hawaii_meas['date'].map(lambda x: 
            datetime.strptime(x, '%Y-%m-%d'))
df_hawaii_meas.head(2)

Unnamed: 0,station,date,prcp,tobs,datetime
0,USC00519397,2010-01-01,0.08,65,2010-01-01
1,USC00519397,2010-01-02,0.0,63,2010-01-02


In [5]:
df_hawaii_stat

Unnamed: 0,station,name,latitude,longitude,elevation
0,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6
5,USC00519523,"WAIMANALO EXPERIMENTAL FARM, HI US",21.33556,-157.71139,19.5
6,USC00519281,"WAIHEE 837.5, HI US",21.45167,-157.84889,32.9
7,USC00511918,"HONOLULU OBSERVATORY 702.2, HI US",21.3152,-157.9992,0.9
8,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4


In [6]:
engine = create_engine('sqlite:///raw_data/hawaii.sqlite')
Base = declarative_base()

class Measurement(Base):
    __tablename__ = 'hawaii_measurements'

    obs_id = Column(Integer, primary_key=True)
    station = Column(String)
    datetime = Column(Date)
    date = Column(String)
    prcp = Column(Float)
    tobs = Column(Float)
    
    def __repr__(self):
        return f"hawaii_measurements climate data"

class Station(Base):
    __tablename__ = 'hawaii_stations'

    station_id = Column(String, primary_key=True)
    name = Column(String)
    latitude = Column(Float)
    longitude = Column(Float)
    elevation = Column(Float)

    def __repr__(self):
        return f"hawaii_stations for collecting weather/climate data"
    
#create the tables associated with our classes
Base.metadata.create_all(engine) 

In [7]:
#create a session object to connect to DB and then add/commit to insert data to DB
session = Session(bind=engine)

#go through the entire measurements df and add each row to the DB
for row in range(0, df_hawaii_meas.shape[0]):
    measurement_row = Measurement(station=df_hawaii_meas.station[row], 
                                   datetime=df_hawaii_meas.datetime[row],
                                   date=df_hawaii_meas.date[row],
                                   prcp=df_hawaii_meas.prcp[row],
                                   tobs=df_hawaii_meas.tobs[row])
    
    session.add(measurement_row)

#go through the entire stations df and add each row to the DB
for row in range(0, df_hawaii_stat.shape[0]):
    station_row = Station(station_id=df_hawaii_stat.station[row],
                           name=df_hawaii_stat.name[row],
                           latitude=df_hawaii_stat.latitude[row],
                           longitude=df_hawaii_stat.longitude[row],
                           elevation=df_hawaii_stat.elevation[row])

    session.add(station_row)

#commit the changes when all done
session.commit()

In [8]:
#at very end close the session
session.close_all()