In [1]:
#!rm hawaii.sqlite

##### import the required libraries

In [2]:
#import pandas
import pandas as pd

# Imports the method used for connecting to DBs
from sqlalchemy import create_engine

# Imports the methods needed to abstract classes into tables
from sqlalchemy.ext.declarative import declarative_base

# Allow us to declare column types
from sqlalchemy import Column, Integer, String, Float, Date

# import sqlalchemy
import sqlalchemy

# Sets an object to utilize the default declarative base in SQL Alchemy
Base = declarative_base()

In [3]:
measurement_file = "Resources\clean_hawaii_measurements.csv"
station_file = "Resources\clean_hawaii_stations.csv"

In [4]:
measurement_df = pd.read_csv(measurement_file,low_memory=False)
station_df = pd.read_csv(station_file,low_memory=False)

In [5]:
measurement_df.columns

Index(['station', 'date', 'prcp', 'tobs'], dtype='object')

In [6]:
station_df.columns

Index(['station', 'name', 'latitude', 'longitude', 'elevation'], dtype='object')

###### Define a measurement and station class

In [7]:


# Creates Classes which will serve as the anchor points for Measurement Table
class Measurement(Base):
    __tablename__ = 'measurement'
    __table_args__ = {'extend_existing': True}
    id = Column(Integer, primary_key=True)
    station = Column(String(50))
    date = Column(String(10))
    prcp = Column(Float)
    tobs = Column(Float)

# Creates Classes which will serve as the anchor points for Station Table
class Station(Base):
    __tablename__ = 'station'
    __table_args__ = {'extend_existing': True}
    id = Column(Integer, primary_key=True)
    station = Column(String(50))
    name = Column(String(50))
    latitude = Column(Float)
    longitude = Column(Float)
    elevation = Column(Float)


##### Creates a connection to our DB

In [8]:
engine = create_engine('sqlite:///hawaii.sqlite')
conn = engine.connect()

In [9]:
##### Create a "Metadata" Layer That Abstracts our SQL Database
##### Create (if not already in existence) the tables associated with our classes.

In [10]:
Base.metadata.create_all(engine)

###### Create a Session Object to Connect to DB
###### Session is a temporary binding to our DB

In [11]:
from sqlalchemy.orm import Session
session = Session(bind=engine)

In [12]:
metadata = sqlalchemy.schema.MetaData(bind=engine,reflect=True)

#### define the tables for autoload

station_table = sqlalchemy.Table('station', metadata, autoload=True)
measurement_table = sqlalchemy.Table('measurement', metadata, autoload=True)

  if __name__ == '__main__':


In [13]:
# Conver the dataframe to dictionary which will be used to insert data in the tables
# The orient='records' is the key of this, it allows to align with the format mentioned in the doc to insert in bulks.

write_station_to_database = station_df.to_dict(orient='records')
write_measurement_to_database = measurement_df.to_dict(orient='records')


In [14]:
from sqlalchemy.orm import sessionmaker
# Open the session
Session = sessionmaker(bind=engine)
session = Session()

# Insert the station data into the station table in one bulk
conn.execute(station_table.insert(), write_station_to_database)

# Commit the changes
session.commit()

In [15]:
# Inser the _measurement data into the _measurement table in one bulk
conn.execute(measurement_table.insert(), write_measurement_to_database)

# Commit the changes
session.commit()

In [16]:
# Use the session to query Mesurement table and display the first row
first_row = session.query(Measurement).first()
first_row.__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x1c64c8b3cc0>,
 'date': '2010-01-01',
 'id': 1,
 'prcp': 0.08,
 'station': 'USC00519397',
 'tobs': 65.0}

In [17]:
# Use the session to query Station table and display the first row
first_row = session.query(Station).first()
first_row.__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x1c64d3a1780>,
 'elevation': 3.0,
 'id': 1,
 'latitude': 21.2716,
 'longitude': -157.8168,
 'name': 'WAIKIKI 717.2, HI US',
 'station': 'USC00519397'}

In [18]:
data = pd.read_sql("SELECT * FROM measurement", conn) 
data.head()

Unnamed: 0,id,station,date,prcp,tobs
0,1,USC00519397,2010-01-01,0.08,65.0
1,2,USC00519397,2010-01-02,0.0,63.0
2,3,USC00519397,2010-01-03,0.0,74.0
3,4,USC00519397,2010-01-04,0.0,76.0
4,5,USC00519397,2010-01-07,0.06,70.0


In [19]:
data = pd.read_sql("SELECT * FROM station", conn) 
data.head()

Unnamed: 0,id,station,name,latitude,longitude,elevation
0,1,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,2,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,3,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,4,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,5,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6
