# Using SQLAlchemy

In [96]:
import sqlalchemy as db
import pandas as pd
import os
import csv
import seaborn as sns
import matplotlib.pyplot as plt

## Connecting to database

In [97]:
# connecting to database sql_challenge created in postgresql
engine = db.create_engine('postgresql://localhost:5432/tets1')
connection = engine.connect()
metadata = db.MetaData()

In [98]:
#to find all the tables for the database (it should be empty)
engine.table_names()

  engine.table_names()


[]

# Creating Table


In [99]:
#creating new empty tables 
election = db.Table('election', metadata,
              db.Column('County', db.String(100), primary_key=True, nullable=False), 
              db.Column('State', db.String(100),primary_key=True, nullable=False),
                db.Column('Republic', db.Float()), 
                db.Column('Democrate', db.Float()), 
                db.Column('Win', db.String(100)) 
              )

census = db.Table('census', metadata,
              db.Column('County', db.String(), primary_key=True, nullable=False), 
              db.Column('State', db.String(20), primary_key=True, nullable=False),
                  db.Column('Population', db.Float()), 
                  db.Column('Median Age', db.Float()),
                  db.Column('Household Income', db.Float()), 
                  db.Column('Poverty Rate', db.Float()), 
                  db.Column('Unemployment Rate', db.Float()), 
                  db.Column('High School Rate', db.Float()), 
                  db.Column('College Rate', db.Float()), 
                  db.Column('Uneducated Rate', db.Float()), 
                  db.Column('White Population Rate', db.Float()), 
                  db.Column('Black Population Rate', db.Float()),
                  db.Column('Hispanic Population Rate', db.Float()),
                  db.Column('Asian Population Rate', db.Float())                  
              )

vaccine = db.Table('vaccine', metadata,
                   db.Column('Date', db.Date()),
                  db.Column('County', db.String(100), primary_key=True, nullable=False), 
                  db.Column('State', db.String(100),primary_key=True, nullable=False),  
                   db.Column('Series_Complete_Pop_Pct', db.Float()),
                   db.Column('Fully_Vaccinated', db.Float()),
                   db.Column('Fully_Vaccinated_12+', db.Float()),
                   db.Column('Fully_Vaccinated_18+', db.Float()),
                   db.Column('Fully_Vaccinated_65+', db.Float()),
                   db.Column('Completeness_pct', db.Float()),
                   db.Column('Administered_Dose1_Recip', db.Float()),
                   db.Column('Partially_Vaccinated', db.Float()),
                   db.Column('Partially_Vaccinated_12+', db.Float()),
                   db.Column('Partially_Vaccinated_18+', db.Float()),
                   db.Column('Partially_Vaccinated_65+', db.Float()),     
              )

metadata.create_all(engine) #Creates the tables

In [100]:
#to find all the tables for the database(it will show all the tables created)
engine.table_names()

  engine.table_names()


['election', 'census', 'vaccine']

In [101]:
#getting the tables from database
election = db.Table('election', metadata, autoload=True, autoload_with=engine)
census = db.Table('census', metadata, autoload=True, autoload_with=engine)
vaccine = db.Table('vaccine', metadata, autoload=True, autoload_with=engine)

## Inserting values in tables from csv files 


In [102]:
#For table election
path_election=os.path.join('cleaned_data', 'election_data.csv')

#For table cdc_vaccine
path_vaccine=os.path.join('cleaned_data', 'vaccine_data.csv')

with open(path_vaccine, 'r', encoding="utf-8") as vacc:
    next(vacc) #removing header
    csv_vaccine = csv.reader(vacc, delimiter=',')
    engine.execute(
        vaccine.insert(),
        [{'Date': row[0],
          'County': row[1],
          'State': row[2],
          'Series_Complete_Pop_Pct': row[3],
           'Fully_Vaccinated': row[4],
          'Fully_Vaccinated_12+': row[5],
          'Fully_Vaccinated_18+': row[6],
           'Fully_Vaccinated_65+': row[7],
          'Completeness_pct': row[8],
          'Administered_Dose1_Recip': row[9],
           'Partially_Vaccinated': row[10],
          'Partially_Vaccinated_12+': row[11],
           'Partially_Vaccinated_18+': row[12],
          'Partially_Vaccinated_65+': row[13]}
         for row in csv_vaccine]
    )
    
#for table election  
with open(path_election, 'r', encoding="utf-8") as elec:
    next(elec) #removing header
    csv_election = csv.reader(elec, delimiter=',')
    engine.execute(
        election.insert(),
        [{'County': row[0],
         'State': row[1],
         'Republic': row[2],
         'Democrate':row[3],
         'Win': row[4]} 
            for row in csv_election]
    )


#For table census
path_census=os.path.join('cleaned_data', 'census_data.csv')

with open(path_census, 'r', encoding="utf-8") as cens:
    next(cens) #removing header
    csv_census = csv.reader(cens, delimiter=',')
    engine.execute(
        census.insert(),
        [{'County': row[0],
          'State': row[1],
          'Population': row[2],
          'Median Age': row[3],
          'Household Income': row[4],
           'Poverty Rate': row[5],
          'Unemployment Rate': row[6],
          'High School Rate': row[7],
          'College Rate': row[8],
           'Uneducated Rate': row[9], 
          'White Population Rate': row[10],
          'Black Population Rate': row[11],
           'Hispanic Population Rate': row[12],
          'Asian Population Rate': row[13]} 
            for row in csv_census]
    )


In [103]:
# Print full table metadata for employees table
print(repr(metadata.tables['census']))

Table('census', MetaData(), Column('County', String(), table=<census>, primary_key=True, nullable=False), Column('State', String(length=20), table=<census>, primary_key=True, nullable=False), Column('Population', Float(), table=<census>), Column('Median Age', Float(), table=<census>), Column('Household Income', Float(), table=<census>), Column('Poverty Rate', Float(), table=<census>), Column('Unemployment Rate', Float(), table=<census>), Column('High School Rate', Float(), table=<census>), Column('College Rate', Float(), table=<census>), Column('Uneducated Rate', Float(), table=<census>), Column('White Population Rate', Float(), table=<census>), Column('Black Population Rate', Float(), table=<census>), Column('Hispanic Population Rate', Float(), table=<census>), Column('Asian Population Rate', Float(), table=<census>), schema=None)


In [104]:
#can also use c instead of columns
census.c.keys()

['County',
 'State',
 'Population',
 'Median Age',
 'Household Income',
 'Poverty Rate',
 'Unemployment Rate',
 'High School Rate',
 'College Rate',
 'Uneducated Rate',
 'White Population Rate',
 'Black Population Rate',
 'Hispanic Population Rate',
 'Asian Population Rate']

# Querying


### 1. 

In [117]:
### sql
sql_1= '''SELECT e."County", e."Win", v."Completeness_pct", v."Fully_Vaccinated" 
       FROM election AS e
       JOIN vaccine AS v
       ON e."County"=v."County" AND e."State"=v."State" '''

In [118]:
#using text
results_1=connection.execute(db.text(sql_1)).fetchall()
df_1 = pd.DataFrame(results_1, columns=(['County','Win', 'Completeness_pct', 
                                         "Fully_Vaccinated "]))
df_1


Unnamed: 0,County,Win,Completeness_pct,Fully_Vaccinated
0,Marengo County,D,92.6,9272.0
1,Taylor County,R,94.4,12516.0
2,Jefferson County,D,90.3,25225.0
3,Musselshell County,R,95.7,1443.0
4,Powder River County,R,95.7,330.0
...,...,...,...,...
2977,Licking County,R,98.7,87127.0
2978,Silver Bow County,D,95.7,19709.0
2979,Walla Walla County,R,96.1,34753.0
2980,Franklin County,R,93.6,11637.0


In [120]:
#using sql_alchemy
res_1=connection.execute(db.select([election.c.County,
                                   election.c.Win,
                                   vaccine.c.Completeness_pct,
                                   vaccine.c.Fully_Vaccinated])
                         .join(vaccine, db.and_(election.c.County==vaccine.c.County,
                                               election.c.State==vaccine.c.State))
                         ).fetchall()
df_alc1 = pd.DataFrame(res_1, columns=(['County','Win', 'Completeness_pct', 
                                         "Fully_Vaccinated "]))
df_alc1

Unnamed: 0,County,Win,Completeness_pct,Fully_Vaccinated
0,Marengo County,D,92.6,9272.0
1,Taylor County,R,94.4,12516.0
2,Jefferson County,D,90.3,25225.0
3,Musselshell County,R,95.7,1443.0
4,Powder River County,R,95.7,330.0
...,...,...,...,...
2977,Licking County,R,98.7,87127.0
2978,Silver Bow County,D,95.7,19709.0
2979,Walla Walla County,R,96.1,34753.0
2980,Franklin County,R,93.6,11637.0


### 2

In [129]:
sql_2='''SELECT e."County", e."Win", c."Uneducated Rate",c."Household Income",  v."Completeness_pct", v."Fully_Vaccinated" 
       FROM election AS e
       JOIN census AS c
       ON e."County"=c."County" AND e."State"=c."State" 
       JOIN vaccine as v
       ON c."County"=v."County" AND c."State"=v."State" 
       WHERE e."Win"='D' '''

In [130]:
#using text
results_2=connection.execute(db.text(sql_2)).fetchall()
df_2 = pd.DataFrame(results_2, columns=(['County', 'Win', 
                                         'Uneducated Rate','Household Income',
                                         'Completeness_pct', 'Fully_Vaccinated']))
df_2

Unnamed: 0,County,Win,Uneducated Rate,Household Income,Completeness_pct,Fully_Vaccinated
0,Adams County,D,0.781503,3.009400e+04,98.3,14856.0
1,Adams County,D,1.353289,7.180500e+04,97.4,306229.0
2,Addison County,D,0.260660,-4.160257e+07,73.7,23077.0
3,Alachua County,D,0.653820,-3.504008e+07,98.7,161009.0
4,Alameda County,D,1.472004,-2.711064e+07,97.4,1187368.0
...,...,...,...,...,...,...
422,Yazoo County,D,1.001664,-1.110788e+08,98.3,12975.0
423,Yolo County,D,1.733387,-1.110572e+08,97.4,133527.0
424,York County,D,0.285868,-1.845340e+07,96.5,148654.0
425,Zavala County,D,1.901093,3.260533e+04,99.1,5509.0


In [132]:
#using sql_alchemy
res_2=connection.execute(db.select([election.c.County,
                                   election.c.Win,
                                    census.c['Household Income'],
                                    census.c['Uneducated Rate'],
                                   vaccine.c.Completeness_pct,
                                   vaccine.c.Fully_Vaccinated])
                         .join(census, db.and_(election.c.County==census.c.County,
                                               election.c.State==census.c.State))
                         .join(vaccine, db.and_(census.c.County==vaccine.c.County,
                                               census.c.State==vaccine.c.State))
                         .where(election.c.Win=='D')).fetchall()
df_alc2 = pd.DataFrame(res_2, columns=(['County', 'Win', 
                                         'Uneducated Rate','Household Income',
                                         'Completeness_pct', 'Fully_Vaccinated']))
df_alc2

Unnamed: 0,County,Win,Uneducated Rate,Household Income,Completeness_pct,Fully_Vaccinated
0,Adams County,D,3.009400e+04,0.781503,98.3,14856.0
1,Adams County,D,7.180500e+04,1.353289,97.4,306229.0
2,Addison County,D,-4.160257e+07,0.260660,73.7,23077.0
3,Alachua County,D,-3.504008e+07,0.653820,98.7,161009.0
4,Alameda County,D,-2.711064e+07,1.472004,97.4,1187368.0
...,...,...,...,...,...,...
422,Yazoo County,D,-1.110788e+08,1.001664,98.3,12975.0
423,Yolo County,D,-1.110572e+08,1.733387,97.4,133527.0
424,York County,D,-1.845340e+07,0.285868,96.5,148654.0
425,Zavala County,D,3.260533e+04,1.901093,99.1,5509.0
