In [1]:
# import dependencies
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine,MetaData
from sqlalchemy import Column,Integer,String,Float
from sqlalchemy.ext.declarative import declarative_base

In [2]:
# Create an engine to a SQLite database file called `hawaii.sqlite`
engine = create_engine('sqlite:///hawaii.sqlite')

In [3]:
# Create a connection to the engine
conn = engine.connect()

In [4]:
# Use `declarative_base` from SQLAlchemy to model the tables as ORM class
Base = declarative_base()
class Station(Base):
    __tablename__ = 'station'
    station = Column(String(255),primary_key=True)
    name = Column(String(255))
    latitude = Column(Float)
    longitude = Column(Float)
    elevation = Column(Float)

class Measurement(Base):
    __tablename__ = 'measurement'
    id = Column(Integer,primary_key=True)
    station = Column(String(255))
    date = Column(String(255))
    prcp = Column(Float)
    tobs = Column(Integer)

In [5]:
# create tables in the database
Base.metadata.create_all(engine)

In [6]:
# read clean csv files into dataframes
df_station = pd.read_csv('hawaii_stations.csv')
df_measurement = pd.read_csv('clean_hawaii_measurements.csv')

In [7]:
# from dataframe, create lists of data to write to database
dict_station = df_station.to_dict(orient='records')
dict_measurement = df_measurement.to_dict(orient='records')

In [8]:
# Use MetaData from SQLAlchemy to reflect the tables
metadata = MetaData(bind=engine)
metadata.reflect()
# Save the reference to the tables as a variable
table_station = sqlalchemy.Table('station', metadata, autoload=True)
table_measurement = sqlalchemy.Table('measurement',metadata,autoload=True)

In [9]:
# debug use only - by deleting pre-existing tables, it allows to insert data multiple times
conn.execute(table_station.delete())
conn.execute(table_measurement.delete())

<sqlalchemy.engine.result.ResultProxy at 0x220da7ac128>

In [10]:
# insert data into sql tables
conn.execute(table_station.insert(), dict_station)
conn.execute(table_measurement.insert(),dict_measurement)

<sqlalchemy.engine.result.ResultProxy at 0x220da7d0dd8>

In [11]:
# check if data inserted to the sql tables
conn.execute('select * from station limit 5').fetchall()

[('USC00519397', 'WAIKIKI 717.2, HI US', 21.2716, -157.8168, 3.0),
 ('USC00513117', 'KANEOHE 838.1, HI US', 21.4234, -157.8015, 14.6),
 ('USC00514830', 'KUALOA RANCH HEADQUARTERS 886.9, HI US', 21.5213, -157.8374, 7.0),
 ('USC00517948', 'PEARL CITY, HI US', 21.3934, -157.9751, 11.9),
 ('USC00518838', 'UPPER WAHIAWA 874.3, HI US', 21.4992, -158.0111, 306.6)]

In [12]:
# check if data inserted to the sql tables
conn.execute('select * from measurement limit 5').fetchall()

[(1, 'USC00519397', '2010-01-01', 0.08, 65),
 (2, 'USC00519397', '2010-01-02', 0.0, 63),
 (3, 'USC00519397', '2010-01-03', 0.0, 74),
 (4, 'USC00519397', '2010-01-04', 0.0, 76),
 (5, 'USC00519397', '2010-01-07', 0.06, 70)]