In [24]:
import pandas as pd
import numpy as np
import os

In [25]:
!ls

clean_meas.csv
clean_measurements.csv
clean_station.csv
clean_stations.csv
climate_analysis.ipynb
data_engineering.ipynb
database_engineering.ipynb
hawaii.sqlite
images
merged_hawaii.csv
README.md
Resources
sqlalchemy


In [26]:
clean_meas = pd.read_csv('clean_meas.csv')
len(clean_meas)

9918

In [27]:
clean_meas.rename(columns={'Unnamed: 0': 'id'}, inplace=True)
clean_meas.to_csv('clean_measurements.csv')
len(clean_meas)

9918

In [28]:
clean_meas.head(5)

Unnamed: 0,id,station,date,prcp,tobs
0,0,USC00519397,2010-01-01,0.08,65
1,5,USC00519397,2010-01-07,0.06,70
2,9,USC00519397,2010-01-11,0.01,64
3,21,USC00519397,2010-01-24,0.01,71
4,23,USC00519397,2010-01-26,0.04,76


In [29]:
clean_stations = pd.read_csv('clean_stations.csv')
clean_stations.rename(columns={'Unnamed: 0': 'id'}, inplace=True)
clean_stations.to_csv('clean_station.csv')
clean_stations.head(1)

Unnamed: 0,id,station,name,latitude,longitude,elevation
0,0,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0


In [30]:
import sqlalchemy
import datetime
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float, DateTime

In [31]:
engine = create_engine('sqlite:///hawaii.sqlite')

In [32]:
conn = engine.connect()

In [33]:
Base = declarative_base()

class Measurement(Base):
    __tablename__ = 'measurement'
    
    id = Column(Integer, primary_key=True)
    station = Column(String)
    date = Column(String)
    prcp = Column(Float)
    tobs = Column(Float)
    
        
    def __repr__(self):
        return f"id={self.id}, station={self.station}"

In [34]:
class Station(Base):
    __tablename__ = 'station'
    
    id = Column(Integer, primary_key = True)
    station = Column(String)
    name = Column(String)
    latitude = Column(Float)
    longitude = Column(Float)
    elevation = Column(Float)
    
    def __repr__(self):
        return f"id={self.id}, name={self.name}"

In [35]:
Base.metadata.create_all(engine)

In [36]:
stations_df = pd.read_csv('clean_station.csv')
meas_df = pd.read_csv('clean_measurements.csv')

In [37]:
stations_data = stations_df.to_dict(orient='records')
meas_data = meas_df.to_dict(orient='records')

In [38]:
metadata = MetaData(bind=engine)
metadata.reflect()

In [39]:
table_stations = sqlalchemy.Table('station', metadata, autoload=True)
table_meas = sqlalchemy.Table('measurement', metadata, autoload=True)

In [40]:
table_meas

Table('measurement', MetaData(bind=Engine(sqlite:///hawaii.sqlite)), Column('id', INTEGER(), table=<measurement>, primary_key=True, nullable=False), Column('station', VARCHAR(), table=<measurement>), Column('date', VARCHAR(), table=<measurement>), Column('prcp', FLOAT(), table=<measurement>), Column('tobs', FLOAT(), table=<measurement>), schema=None)

In [41]:
conn.execute(table_stations.delete())

<sqlalchemy.engine.result.ResultProxy at 0x1d1d02d1940>

In [42]:
conn.execute(table_meas.delete())

<sqlalchemy.engine.result.ResultProxy at 0x1d1d100f748>

In [43]:
conn.execute(table_stations.insert(), stations_data)

<sqlalchemy.engine.result.ResultProxy at 0x1d1cfa30748>

In [44]:
conn.execute(table_meas.insert(), meas_data)

<sqlalchemy.engine.result.ResultProxy at 0x1d1d22e0c18>

In [45]:
conn.execute("select * from measurement").fetchall()

[(0, 'USC00519397', '2010-01-01', 0.08, 65.0),
 (5, 'USC00519397', '2010-01-07', 0.06, 70.0),
 (9, 'USC00519397', '2010-01-11', 0.01, 64.0),
 (21, 'USC00519397', '2010-01-24', 0.01, 71.0),
 (23, 'USC00519397', '2010-01-26', 0.04, 76.0),
 (24, 'USC00519397', '2010-01-27', 0.12, 68.0),
 (27, 'USC00519397', '2010-01-31', 0.03, 67.0),
 (28, 'USC00519397', '2010-02-01', 0.01, 66.0),
 (30, 'USC00519397', '2010-02-04', 0.01, 69.0),
 (37, 'USC00519397', '2010-02-12', 0.02, 69.0),
 (38, 'USC00519397', '2010-02-13', 0.01, 69.0),
 (44, 'USC00519397', '2010-02-20', 0.03, 64.0),
 (52, 'USC00519397', '2010-03-01', 0.01, 70.0),
 (55, 'USC00519397', '2010-03-04', 0.12, 70.0),
 (56, 'USC00519397', '2010-03-05', 0.08, 71.0),
 (57, 'USC00519397', '2010-03-06', 0.03, 72.0),
 (59, 'USC00519397', '2010-03-08', 0.43, 69.0),
 (60, 'USC00519397', '2010-03-09', 0.06, 70.0),
 (65, 'USC00519397', '2010-03-15', 0.06, 71.0),
 (79, 'USC00519397', '2010-04-02', 0.01, 74.0),
 (80, 'USC00519397', '2010-04-03', 0.17, 74

In [46]:
conn.execute("select * from station").fetchall()

[(0, 'USC00519397', 'WAIKIKI 717.2, HI US', 21.2716, -157.8168, 3.0),
 (1, 'USC00513117', 'KANEOHE 838.1, HI US', 21.4234, -157.8015, 14.6),
 (2, 'USC00514830', 'KUALOA RANCH HEADQUARTERS 886.9, HI US', 21.5213, -157.8374, 7.0),
 (3, 'USC00517948', 'PEARL CITY, HI US', 21.3934, -157.9751, 11.9),
 (4, 'USC00518838', 'UPPER WAHIAWA 874.3, HI US', 21.4992, -158.0111, 306.6),
 (5, 'USC00519523', 'WAIMANALO EXPERIMENTAL FARM, HI US', 21.33556, -157.71139, 19.5),
 (6, 'USC00519281', 'WAIHEE 837.5, HI US', 21.45167, -157.84888999999995, 32.9),
 (7, 'USC00511918', 'HONOLULU OBSERVATORY 702.2, HI US', 21.3152, -157.9992, 0.9),
 (8, 'USC00516128', 'MANOA LYON ARBO 785.2, HI US', 21.3331, -157.8025, 152.4)]