In [1]:
#!rm hawaii.sqlite

In [2]:
#import pandas
import pandas as pd

# Imports the method used for connecting to DBs
from sqlalchemy import create_engine

# Imports the methods needed to abstract classes into tables
from sqlalchemy.ext.declarative import declarative_base

# Allow us to declare column types
from sqlalchemy import Column, Integer, String, Float, Date

# import sqlalchemy
import sqlalchemy

# Sets an object to utilize the default declarative base in SQL Alchemy
Base = declarative_base()

In [3]:
measurement_file = "Resources\clean_hawaii_measurements.csv"
station_file = "Resources\clean_hawaii_stations.csv"

In [4]:
measurement_df = pd.read_csv(measurement_file,low_memory=False)
station_df = pd.read_csv(station_file,low_memory=False)

In [5]:
measurement_df.columns

Index(['station', 'date', 'prcp', 'tobs'], dtype='object')

In [6]:
station_df.columns

Index(['station', 'name', 'latitude', 'longitude', 'elevation'], dtype='object')

In [7]:
# Define a measurement and station class

# Creates Classes which will serve as the anchor points for our Tables
class Measurement(Base):
    __tablename__ = 'measurement'
    __table_args__ = {'extend_existing': True}
    id = Column(Integer, primary_key=True)
    station = Column(String(50))
    date = Column(String(10))
    prcp = Column(Float)
    tobs = Column(Float)

# Creates Classes which will serve as the anchor points for our Tables
class Station(Base):
    __tablename__ = 'station'
    __table_args__ = {'extend_existing': True}
    id = Column(Integer, primary_key=True)
    station = Column(String(50))
    name = Column(String(50))
    latitude = Column(Float)
    longitude = Column(Float)
    elevation = Column(Float)


In [8]:
# Creates a connection to our DB
engine = create_engine('sqlite:///hawaii.sqlite', echo=True)
conn = engine.connect()

2018-04-22 22:40:56,403 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2018-04-22 22:40:56,423 INFO sqlalchemy.engine.base.Engine ()
2018-04-22 22:40:56,431 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2018-04-22 22:40:56,435 INFO sqlalchemy.engine.base.Engine ()


In [10]:
# Create a "Metadata" Layer That Abstracts our SQL Database
# ----------------------------------
# Create (if not already in existence) the tables associated with our classes.
Base.metadata.create_all(engine)

2018-04-22 22:41:16,867 INFO sqlalchemy.engine.base.Engine PRAGMA table_info("measurement")
2018-04-22 22:41:16,867 INFO sqlalchemy.engine.base.Engine ()
2018-04-22 22:41:16,871 INFO sqlalchemy.engine.base.Engine PRAGMA table_info("station")
2018-04-22 22:41:16,871 INFO sqlalchemy.engine.base.Engine ()


In [11]:
# Create a Session Object to Connect to DB
# ----------------------------------
# Session is a temporary binding to our DB
from sqlalchemy.orm import Session
session = Session(bind=engine)

In [12]:
metadata = sqlalchemy.schema.MetaData(bind=engine,reflect=True)
station_table = sqlalchemy.Table('station', metadata, autoload=True)
measurement_table = sqlalchemy.Table('measurement', metadata, autoload=True)

2018-04-22 22:41:38,032 INFO sqlalchemy.engine.base.Engine SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
2018-04-22 22:41:38,032 INFO sqlalchemy.engine.base.Engine ()
2018-04-22 22:41:38,052 INFO sqlalchemy.engine.base.Engine PRAGMA table_info("measurement")
2018-04-22 22:41:38,052 INFO sqlalchemy.engine.base.Engine ()
2018-04-22 22:41:38,056 INFO sqlalchemy.engine.base.Engine SELECT sql FROM  (SELECT * FROM sqlite_master UNION ALL   SELECT * FROM sqlite_temp_master) WHERE name = 'measurement' AND type = 'table'
2018-04-22 22:41:38,060 INFO sqlalchemy.engine.base.Engine ()
2018-04-22 22:41:38,080 INFO sqlalchemy.engine.base.Engine PRAGMA foreign_key_list("measurement")
2018-04-22 22:41:38,080 INFO sqlalchemy.engine.base.Engine ()
2018-04-22 22:41:38,084 INFO sqlalchemy.engine.base.Engine SELECT sql FROM  (SELECT * FROM sqlite_master UNION ALL   SELECT * FROM sqlite_temp_master) WHERE name = 'measurement' AND type = 'table'
2018-04-22 22:41:38,084 INFO sqlalchemy.engin

  if __name__ == '__main__':


In [13]:
# The orient='records' is the key of this, it allows to align with the format mentioned in the doc to insert in bulks.
write_station_to_database = station_df.to_dict(orient='records')
print(write_station_to_database)


[{'station': 'USC00519397', 'name': 'WAIKIKI 717.2, HI US', 'latitude': 21.2716, 'longitude': -157.8168, 'elevation': 3.0}, {'station': 'USC00513117', 'name': 'KANEOHE 838.1, HI US', 'latitude': 21.4234, 'longitude': -157.8015, 'elevation': 14.6}, {'station': 'USC00514830', 'name': 'KUALOA RANCH HEADQUARTERS 886.9, HI US', 'latitude': 21.5213, 'longitude': -157.8374, 'elevation': 7.0}, {'station': 'USC00517948', 'name': 'PEARL CITY, HI US', 'latitude': 21.3934, 'longitude': -157.9751, 'elevation': 11.9}, {'station': 'USC00518838', 'name': 'UPPER WAHIAWA 874.3, HI US', 'latitude': 21.4992, 'longitude': -158.0111, 'elevation': 306.6}, {'station': 'USC00519523', 'name': 'WAIMANALO EXPERIMENTAL FARM, HI US', 'latitude': 21.33556, 'longitude': -157.71139, 'elevation': 19.5}, {'station': 'USC00519281', 'name': 'WAIHEE 837.5, HI US', 'latitude': 21.45167, 'longitude': -157.84888999999995, 'elevation': 32.9}, {'station': 'USC00511918', 'name': 'HONOLULU OBSERVATORY 702.2, HI US', 'latitude': 2

In [14]:
write_measurement_to_database = measurement_df.to_dict(orient='records')
print(write_measurement_to_database)

[{'station': 'USC00519397', 'date': '2010-01-01', 'prcp': 0.08, 'tobs': 65}, {'station': 'USC00519397', 'date': '2010-01-02', 'prcp': 0.0, 'tobs': 63}, {'station': 'USC00519397', 'date': '2010-01-03', 'prcp': 0.0, 'tobs': 74}, {'station': 'USC00519397', 'date': '2010-01-04', 'prcp': 0.0, 'tobs': 76}, {'station': 'USC00519397', 'date': '2010-01-07', 'prcp': 0.06, 'tobs': 70}, {'station': 'USC00519397', 'date': '2010-01-08', 'prcp': 0.0, 'tobs': 64}, {'station': 'USC00519397', 'date': '2010-01-09', 'prcp': 0.0, 'tobs': 68}, {'station': 'USC00519397', 'date': '2010-01-10', 'prcp': 0.0, 'tobs': 73}, {'station': 'USC00519397', 'date': '2010-01-11', 'prcp': 0.01, 'tobs': 64}, {'station': 'USC00519397', 'date': '2010-01-12', 'prcp': 0.0, 'tobs': 61}, {'station': 'USC00519397', 'date': '2010-01-14', 'prcp': 0.0, 'tobs': 66}, {'station': 'USC00519397', 'date': '2010-01-15', 'prcp': 0.0, 'tobs': 65}, {'station': 'USC00519397', 'date': '2010-01-16', 'prcp': 0.0, 'tobs': 68}, {'station': 'USC00519

In [15]:
from sqlalchemy.orm import sessionmaker
# Open the session
Session = sessionmaker(bind=engine)
session = Session()

# Inser the dataframe into the database in one bulk
conn.execute(station_table.insert(), write_station_to_database)

# Commit the changes
session.commit()

2018-04-22 22:42:03,803 INFO sqlalchemy.engine.base.Engine INSERT INTO station (station, name, latitude, longitude, elevation) VALUES (?, ?, ?, ?, ?)
2018-04-22 22:42:03,807 INFO sqlalchemy.engine.base.Engine (('USC00519397', 'WAIKIKI 717.2, HI US', 21.2716, -157.8168, 3.0), ('USC00513117', 'KANEOHE 838.1, HI US', 21.4234, -157.8015, 14.6), ('USC00514830', 'KUALOA RANCH HEADQUARTERS 886.9, HI US', 21.5213, -157.8374, 7.0), ('USC00517948', 'PEARL CITY, HI US', 21.3934, -157.9751, 11.9), ('USC00518838', 'UPPER WAHIAWA 874.3, HI US', 21.4992, -158.0111, 306.6), ('USC00519523', 'WAIMANALO EXPERIMENTAL FARM, HI US', 21.33556, -157.71139, 19.5), ('USC00519281', 'WAIHEE 837.5, HI US', 21.45167, -157.84888999999995, 32.9), ('USC00511918', 'HONOLULU OBSERVATORY 702.2, HI US', 21.3152, -157.9992, 0.9), ('USC00516128', 'MANOA LYON ARBO 785.2, HI US', 21.3331, -157.8025, 152.4))
2018-04-22 22:42:03,859 INFO sqlalchemy.engine.base.Engine COMMIT


In [16]:
# Inser the dataframe into the database in one bulk
conn.execute(measurement_table.insert(), write_measurement_to_database)

# Commit the changes
session.commit()

2018-04-22 22:42:09,222 INFO sqlalchemy.engine.base.Engine INSERT INTO measurement (station, date, prcp, tobs) VALUES (?, ?, ?, ?)
2018-04-22 22:42:09,226 INFO sqlalchemy.engine.base.Engine (('USC00519397', '2010-01-01', 0.08, 65.0), ('USC00519397', '2010-01-02', 0.0, 63.0), ('USC00519397', '2010-01-03', 0.0, 74.0), ('USC00519397', '2010-01-04', 0.0, 76.0), ('USC00519397', '2010-01-07', 0.06, 70.0), ('USC00519397', '2010-01-08', 0.0, 64.0), ('USC00519397', '2010-01-09', 0.0, 68.0), ('USC00519397', '2010-01-10', 0.0, 73.0)  ... displaying 10 of 18103 total bound parameter sets ...  ('USC00516128', '2017-08-22', 0.5, 76.0), ('USC00516128', '2017-08-23', 0.45, 76.0))
2018-04-22 22:42:09,310 INFO sqlalchemy.engine.base.Engine COMMIT


In [17]:
# Use the session to query Mesurement table and display the first row
first_row = session.query(Measurement).first()
first_row.__dict__

2018-04-22 22:42:14,871 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2018-04-22 22:42:14,871 INFO sqlalchemy.engine.base.Engine SELECT measurement.id AS measurement_id, measurement.station AS measurement_station, measurement.date AS measurement_date, measurement.prcp AS measurement_prcp, measurement.tobs AS measurement_tobs 
FROM measurement
 LIMIT ? OFFSET ?
2018-04-22 22:42:14,875 INFO sqlalchemy.engine.base.Engine (1, 0)


{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x1d47c4be908>,
 'date': '2010-01-01',
 'id': 1,
 'prcp': 0.08,
 'station': 'USC00519397',
 'tobs': 65.0}

In [18]:
# Use the session to query Station table and display the first row
first_row = session.query(Station).first()
first_row.__dict__

2018-04-22 22:42:19,252 INFO sqlalchemy.engine.base.Engine SELECT station.id AS station_id, station.station AS station_station, station.name AS station_name, station.latitude AS station_latitude, station.longitude AS station_longitude, station.elevation AS station_elevation 
FROM station
 LIMIT ? OFFSET ?
2018-04-22 22:42:19,252 INFO sqlalchemy.engine.base.Engine (1, 0)


{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x1d47c4d12e8>,
 'elevation': 3.0,
 'id': 1,
 'latitude': 21.2716,
 'longitude': -157.8168,
 'name': 'WAIKIKI 717.2, HI US',
 'station': 'USC00519397'}

In [19]:
data = pd.read_sql("SELECT * FROM measurement", conn) 
data

2018-04-22 22:42:22,928 INFO sqlalchemy.engine.base.Engine PRAGMA table_info("SELECT * FROM measurement")
2018-04-22 22:42:22,932 INFO sqlalchemy.engine.base.Engine ()
2018-04-22 22:42:22,932 INFO sqlalchemy.engine.base.Engine SELECT * FROM measurement
2018-04-22 22:42:22,936 INFO sqlalchemy.engine.base.Engine ()


Unnamed: 0,id,station,date,prcp,tobs
0,1,USC00519397,2010-01-01,0.08,65.0
1,2,USC00519397,2010-01-02,0.00,63.0
2,3,USC00519397,2010-01-03,0.00,74.0
3,4,USC00519397,2010-01-04,0.00,76.0
4,5,USC00519397,2010-01-07,0.06,70.0
5,6,USC00519397,2010-01-08,0.00,64.0
6,7,USC00519397,2010-01-09,0.00,68.0
7,8,USC00519397,2010-01-10,0.00,73.0
8,9,USC00519397,2010-01-11,0.01,64.0
9,10,USC00519397,2010-01-12,0.00,61.0


In [20]:
data = pd.read_sql("SELECT * FROM station", conn) 
data

2018-04-22 22:42:32,586 INFO sqlalchemy.engine.base.Engine PRAGMA table_info("SELECT * FROM station")
2018-04-22 22:42:32,590 INFO sqlalchemy.engine.base.Engine ()
2018-04-22 22:42:32,590 INFO sqlalchemy.engine.base.Engine SELECT * FROM station
2018-04-22 22:42:32,594 INFO sqlalchemy.engine.base.Engine ()


Unnamed: 0,id,station,name,latitude,longitude,elevation
0,1,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,2,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,3,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,4,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,5,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6
5,6,USC00519523,"WAIMANALO EXPERIMENTAL FARM, HI US",21.33556,-157.71139,19.5
6,7,USC00519281,"WAIHEE 837.5, HI US",21.45167,-157.84889,32.9
7,8,USC00511918,"HONOLULU OBSERVATORY 702.2, HI US",21.3152,-157.9992,0.9
8,9,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4
