In [1]:
# import dependencies
import datetime as dt
import numpy as np
import pandas as pd

import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func

# PyMySQL 
import pymysql
pymysql.install_as_MySQLdb()

In [2]:
# store the csv data into pandas dataframes
clean_measurements = pd.read_csv("./Resources/clean_hawaii_measurements.csv")
clean_stations = pd.read_csv("./Resources/clean_hawaii_stations.csv")

In [3]:
# double-check the pandas dataframes to ensure it's the correct data
print(clean_measurements.info())
print(clean_stations.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18103 entries, 0 to 18102
Data columns (total 5 columns):
station          18103 non-null object
date             18103 non-null object
prcp             18103 non-null float64
tobs             18103 non-null int64
measurment_id    18103 non-null int64
dtypes: float64(1), int64(2), object(2)
memory usage: 707.2+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 5 columns):
station      9 non-null object
name         9 non-null object
latitude     9 non-null float64
longitude    9 non-null float64
elevation    9 non-null float64
dtypes: float64(3), object(2)
memory usage: 440.0+ bytes
None


In [4]:
# create variables to hold the data from the csv files so we can join them to the database tables later
m_data = clean_measurements.to_dict(orient='records')
s_data = clean_stations.to_dict(orient="records")

In [5]:
# establish a base for the table classes
Base = declarative_base()

In [6]:
# create a class for the measurement table
class Measurements(Base):
    __tablename__ = "measurements"
    measurement_id = Column(Integer, primary_key=True)
    station = Column(String)
    date = Column(String)
    prcp = Column(Float)
    tobs = Column(Integer)

In [7]:
# create a class for the stations table
class Stations(Base):
    __tablename__ = 'stations'
    station = Column(String, primary_key=True)
    name = Column(String)
    latitude = Column(Float)
    longitude = Column(Float)
    elevation = Column(Float)

In [8]:
# create the engine and establish the database
engine = create_engine("sqlite:///hawaii.sqlite")
conn = engine.connect()

In [9]:
# create the tables associated with the classes in the metadata
Base.metadata.create_all(engine)

In [10]:
# check to make sure the classes initialized tables in the database
engine.table_names()

['measurements', 'stations']

In [11]:
# import the data from the csv files into the database
conn.execute(Measurements.__table__.delete())
conn.execute(Stations.__table__.delete())
conn.execute(Measurements.__table__.insert(), m_data)
conn.execute(Stations.__table__.insert(), s_data)

<sqlalchemy.engine.result.ResultProxy at 0x1c6565a49b0>

In [None]:
# create a session to begin database engineering
#session = Session(bind=engine)

In [12]:
# check to make sure df to sql worked for stations
engine.execute("SELECT * FROM stations").fetchall()

[('USC00519397', 'WAIKIKI 717.2, HI US', 21.2716, -157.8168, 3.0),
 ('USC00513117', 'KANEOHE 838.1, HI US', 21.4234, -157.8015, 14.6),
 ('USC00514830', 'KUALOA RANCH HEADQUARTERS 886.9, HI US', 21.5213, -157.8374, 7.0),
 ('USC00517948', 'PEARL CITY, HI US', 21.3934, -157.9751, 11.9),
 ('USC00518838', 'UPPER WAHIAWA 874.3, HI US', 21.4992, -158.0111, 306.6),
 ('USC00519523', 'WAIMANALO EXPERIMENTAL FARM, HI US', 21.33556, -157.71139, 19.5),
 ('USC00519281', 'WAIHEE 837.5, HI US', 21.45167, -157.84888999999995, 32.9),
 ('USC00511918', 'HONOLULU OBSERVATORY 702.2, HI US', 21.3152, -157.9992, 0.9),
 ('USC00516128', 'MANOA LYON ARBO 785.2, HI US', 21.3331, -157.8025, 152.4)]

In [13]:
# check to make sure df to sql worked for measurements
engine.execute("SELECT * FROM measurements LIMIT 10").fetchall()

[(1, 'USC00519397', '2010-01-01', 0.08, 65),
 (2, 'USC00519397', '2010-01-02', 0.0, 63),
 (3, 'USC00519397', '2010-01-03', 0.0, 74),
 (4, 'USC00519397', '2010-01-04', 0.0, 76),
 (5, 'USC00519397', '2010-01-07', 0.06, 70),
 (6, 'USC00519397', '2010-01-08', 0.0, 64),
 (7, 'USC00519397', '2010-01-09', 0.0, 68),
 (8, 'USC00519397', '2010-01-10', 0.0, 73),
 (9, 'USC00519397', '2010-01-11', 0.01, 64),
 (10, 'USC00519397', '2010-01-12', 0.0, 61)]