# Using SQLAlchemy ORM

In [15]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib import rcParams
import datetime as dt
import sqlalchemy as db
import seaborn as sns

In [16]:
#Set Fonts/Plot Style
plt.style.use('ggplot')
rcParams['figure.figsize'] = [12.0, 10.0]
rcParams['figure.dpi'] = 80
rcParams['savefig.dpi'] = 100
rcParams['font.size'] = 20
rcParams['legend.fontsize'] = 'large'
rcParams['figure.titlesize'] = 'medium'
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']
rcParams['xtick.direction'] = 'in'
rcParams['ytick.direction'] = 'in'
rcParams['xtick.top'] = True
rcParams['ytick.right'] = True
rcParams['xtick.major.size'] = 10
rcParams['ytick.major.size'] = 10
rcParams['xtick.minor.size'] = 5
rcParams['ytick.minor.size'] = 5
rcParams['xtick.minor.visible'] = True
rcParams['ytick.minor.visible'] = True


# Reflect Tables into SQLAlchemy ORM

In [18]:
# Python SQL toolkit and Object Relational Mapper
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect, and_, or_

In [19]:
# create engine to hawaii.sqlite
engine = create_engine('postgresql://localhost:5432/tets1')
conn=engine.connect()

In [20]:
# Use the Inspector to explore the database and print the table names
inspector=inspect(engine)
inspector.get_table_names()

['election', 'census', 'vaccine']

In [21]:
# Use Inspector to print the column names and types
print('election table')
print('*'*50)
columns=inspector.get_columns('election')
for column in columns:
    print(column['name'], column['type'])

print('')
print('census table')
print('*'*50)
columns=inspector.get_columns('census')
for column in columns:
    print(column['name'], column['type'])
    
print('')
print('vaccine table')
print('*'*50)
columns=inspector.get_columns('vaccine')
for column in columns:
    print(column['name'], column['type'])

election table
**************************************************
County VARCHAR(100)
State VARCHAR(100)
Republic DOUBLE_PRECISION
Democrate DOUBLE_PRECISION
Win VARCHAR(100)

census table
**************************************************
County VARCHAR
State VARCHAR(20)
Population DOUBLE_PRECISION
Median Age DOUBLE_PRECISION
Household Income DOUBLE_PRECISION
Poverty Rate DOUBLE_PRECISION
Unemployment Rate DOUBLE_PRECISION
High School Rate DOUBLE_PRECISION
College Rate DOUBLE_PRECISION
Uneducated Rate DOUBLE_PRECISION
White Population Rate DOUBLE_PRECISION
Black Population Rate DOUBLE_PRECISION
Hispanic Population Rate DOUBLE_PRECISION
Asian Population Rate DOUBLE_PRECISION

vaccine table
**************************************************
Date DATE
County VARCHAR(100)
State VARCHAR(100)
Series_Complete_Pop_Pct DOUBLE_PRECISION
Fully_Vaccinated DOUBLE_PRECISION
Fully_Vaccinated_12+ DOUBLE_PRECISION
Fully_Vaccinated_18+ DOUBLE_PRECISION
Fully_Vaccinated_65+ DOUBLE_PRECISION
Completeness

In [22]:
# reflect an existing database into a new model
Base = automap_base()
Base.prepare(engine, reflect=True)

In [23]:
# View all of the classes that automap found
Base.classes.keys()

['election', 'census', 'vaccine']

In [24]:
# Save references to each table
Election = Base.classes.election
Census = Base.classes.census
Vaccine=Base.classes.vaccine

In [25]:
# Create our session (link) from Python to the DB
session = Session(engine)

# Exploratory election Analysis

In [26]:
#using pd.read_sql to select and display (also able to see columns name)
df_election=pd.read_sql("SELECT * FROM election", conn)

df_election.head()

Unnamed: 0,County,State,Republic,Democrate,Win
0,Abbeville County,SC,0.661,0.33,R
1,Abbot County,ME,0.691,0.29,R
2,Abington County,MA,0.439,0.537,D
3,Acadia County,LA,0.795,0.191,R
4,Accomack County,VA,0.542,0.447,R


In [36]:
# Perform a query to retrieve the date and precipitation scores
#using groupby to group data from different station for a given date
precp=session.query(Measurement.date,Measurement.prcp).filter(
                                       Measurement.date > query_date)\
                                      .group_by(Measurement.date).all()

In [39]:
# Use Pandas `read_sql_query` to load a query statement directly into the DataFrame
stmt=session.query(Measurement.date,func.avg(Measurement.prcp)).filter(
                                       Measurement.date > query_date)\
                                      .group_by(Measurement.date).statement
df=pd.read_sql_query(stmt, session.bind)
df.rename(columns={'avg_1':'avg_prcp'}, inplace=True)
df

Unnamed: 0,date,avg_prcp
0,2016-08-24,1.555000
1,2016-08-25,0.077143
2,2016-08-26,0.016667
3,2016-08-27,0.064000
4,2016-08-28,0.516667
...,...,...
360,2017-08-19,0.030000
361,2017-08-20,0.005000
362,2017-08-21,0.193333
363,2017-08-22,0.166667


# Exploratory vaccine Analysis

In [30]:
#using pd.read_sql to select and display (also able to see columns name)
df_vaccine=pd.read_sql("SELECT * FROM vaccine", conn)

df_vaccine

Unnamed: 0,Date,County,State,Series_Complete_Pop_Pct,Fully_Vaccinated,Fully_Vaccinated_12+,Fully_Vaccinated_18+,Fully_Vaccinated_65+,Completeness_pct,Administered_Dose1_Recip,Partially_Vaccinated,Partially_Vaccinated_12+,Partially_Vaccinated_18+,Partially_Vaccinated_65+
0,2021-10-28,Marengo County,AL,49.2,9272.0,57.7,60.1,78.0,92.6,11422.0,60.6,71.0,73.5,90.0
1,2021-10-28,Taylor County,KY,48.6,12516.0,57.3,59.3,82.4,94.4,14120.0,54.8,64.7,66.8,89.5
2,2021-10-28,Jefferson County,AR,37.7,25225.0,44.0,45.0,61.7,90.3,32370.0,48.4,56.4,57.2,74.0
3,2021-10-28,Musselshell County,MT,31.1,1443.0,35.2,37.4,53.9,95.7,1574.0,34.0,38.4,40.7,56.4
4,2021-10-28,Powder River County,MT,19.6,330.0,21.9,23.3,37.8,95.7,362.0,21.5,24.0,25.5,39.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3278,2021-10-28,Unknown County,NJ,0.0,379004.0,0.0,0.0,0.0,93.6,0.0,0.0,0.0,0.0,0.0
3279,2021-10-28,Silver Bow County,MT,56.4,19709.0,65.4,66.4,87.7,95.7,21103.0,60.4,70.0,70.9,92.9
3280,2021-10-28,Walla Walla County,WA,57.2,34753.0,66.1,67.6,83.4,96.1,38185.0,62.8,72.6,74.2,92.4
3281,2021-10-28,Franklin County,KS,45.6,11637.0,53.8,56.5,83.0,93.6,12893.0,50.5,59.6,62.5,90.0


# Exploratory census Analysis

In [28]:
#using pd.read_sql to select and display (also able to see columns name)
df_census=pd.read_sql("SELECT * FROM census", conn)

df_census

Unnamed: 0,County,State,Population,Median Age,Household Income,Poverty Rate,Unemployment Rate,High School Rate,College Rate,Uneducated Rate,White Population Rate,Black Population Rate,Hispanic Population Rate,Asian Population Rate
0,Aasco Municipio,PR,26686.000000,42.700000,1.989800e+04,45.731844,3.855954,19.253541,10.488646,2.787979,82.597617,1.727498,95.061081,0.000000
1,Abbeville County,SC,4084.200000,39.900000,3.914320e+04,22.958482,2.146448,20.175639,6.307386,0.731876,65.861217,30.431803,1.097323,0.523793
2,Acadia Parish,LA,6208.700000,36.360000,4.355100e+04,20.894436,3.432778,23.095957,7.351045,0.985701,86.836943,10.600556,2.720125,0.061823
3,Accomack County,VA,990.090909,48.833333,-1.009644e+08,14.984460,1.492080,26.353785,8.400832,0.576869,73.265729,24.703698,5.307683,0.425414
4,Ada County,ID,30571.600000,37.760000,6.859527e+04,10.538547,1.974483,11.349687,17.948706,0.390195,90.713137,1.276617,7.991648,2.490008
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3215,Yuma County,AZ,20955.000000,38.250000,4.233260e+04,18.990481,3.769920,13.993028,5.376302,2.378759,70.144058,0.966517,66.587098,0.678975
3216,Yuma County,CO,1402.142857,40.157143,5.246343e+04,13.854494,1.191867,19.333835,11.042061,0.314244,98.336295,0.346629,14.767461,0.019532
3217,Zapata County,TX,4667.666667,31.433333,6.493133e+04,27.146749,5.689971,15.544681,3.799101,1.059885,94.897257,0.000000,97.133219,0.286540
3218,Zavala County,TX,4017.000000,32.333333,3.260533e+04,39.085347,3.397647,11.545223,3.748947,1.901093,97.666404,0.148230,90.042087,0.040763


In [29]:
# Design a query to calculate the total number County in the dataset
session.query(Census.County).count()

3220

In [45]:
#using join to combined two tables
#using read sql query 
result=session.query(Measurement, Station).filter(Measurement.station == Station.station).statement
df_comb=pd.read_sql_query(result,session.bind)
df_comb

Unnamed: 0,id,station,date,prcp,tobs,id_1,station_1,name,latitude,longitude,elevation
0,1,USC00519397,2010-01-01,0.08,65.0,1,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,2,USC00519397,2010-01-02,0.00,63.0,1,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
2,3,USC00519397,2010-01-03,0.00,74.0,1,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
3,4,USC00519397,2010-01-04,0.00,76.0,1,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
4,5,USC00519397,2010-01-06,,73.0,1,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
...,...,...,...,...,...,...,...,...,...,...,...
19545,19546,USC00516128,2017-08-19,0.09,71.0,9,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4
19546,19547,USC00516128,2017-08-20,,78.0,9,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4
19547,19548,USC00516128,2017-08-21,0.56,76.0,9,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4
19548,19549,USC00516128,2017-08-22,0.50,76.0,9,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4


In [46]:
# Design a query to find the most active stations (i.e. what stations have the most rows?)
# List the stations and the counts in descending order.
session.query(Measurement.station,Station.name,func.count(Measurement.station))\
                    .filter(Measurement.station == Station.station)\
                    .group_by(Measurement.station)\
                    .order_by(func.count(Measurement.station).desc()).all()

[('USC00519281', 'WAIHEE 837.5, HI US', 2772),
 ('USC00519397', 'WAIKIKI 717.2, HI US', 2724),
 ('USC00513117', 'KANEOHE 838.1, HI US', 2709),
 ('USC00519523', 'WAIMANALO EXPERIMENTAL FARM, HI US', 2669),
 ('USC00516128', 'MANOA LYON ARBO 785.2, HI US', 2612),
 ('USC00514830', 'KUALOA RANCH HEADQUARTERS 886.9, HI US', 2202),
 ('USC00511918', 'HONOLULU OBSERVATORY 702.2, HI US', 1979),
 ('USC00517948', 'PEARL CITY, HI US', 1372),
 ('USC00518838', 'UPPER WAHIAWA 874.3, HI US', 511)]

In [47]:
# Using the most active station id from the previous query, calculate the lowest, highest, and average temperature.
session.query(Measurement.station,func.min(Measurement.tobs),
             func.max(Measurement.tobs),
             func.avg(Measurement.tobs))\
                    .filter(Measurement.station == 'USC00519281').all()

[('USC00519281', 54.0, 85.0, 71.66378066378067)]

# Close session

In [None]:
# Close Session
session.close()