## State Covid-19 Counts Scrape from NYTimes Github Repo

In [1]:
# Import dependencies
import pandas as pd

In [2]:
# Github URL of page to be scraped 
cases_url = "https://github.com/nytimes/covid-19-data/blob/master/us-states.csv"

In [18]:
# Scrape tabular data from NYTimes github repository
cases = pd.read_html(cases_url)
cases_df = cases[0]

# Drop unnecessary columns and reset index
cases_df = cases_df.drop(columns=["Unnamed: 0"])
cases_df

Unnamed: 0,date,state,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0
...,...,...,...,...,...
1659,2020-04-01,Virginia,51,1511,34
1660,2020-04-01,Washington,53,5588,249
1661,2020-04-01,West Virginia,54,191,2
1662,2020-04-01,Wisconsin,55,1550,33


### Create Database Connection

In [45]:
# Import dependencies
from config import user, password

# Import sqlalchemy to connect to database
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, Column, Integer, String, Float, func, inspect, ForeignKey
import psycopg2

In [5]:
# Create postgresql connection
engine = create_engine(f"postgresql://{user}:{password}@localhost:5432/Twitter_COVID19")
conn = engine.connect
Base = declarative_base()
Base.metadata.create_all(engine)
session = Session(bind=engine)
inspector = inspect(engine)

In [6]:
class Governors(Base):
    __tablename__="governors"
    governor_id = Column(Integer, primary_key = True, autoincrement=True)
    governor = Column(String(50))
    state = Column(String(20))
    party = Column(String(20))
    inauguration = Column(String(50))
    term_begin = Column(Integer)
    term_end = Column(Integer)
    term_limit = Column(String(10))


In [7]:
inspector.get_table_names()

['governors']

In [11]:
session.query(Governors.governor,Governors.state,Governors.governor_id).all()

[('Kay Ivey', 'Alabama', 1),
 ('Mike Dunleavy', 'Alaska', 2),
 ('Doug Ducey', 'Arizona', 3),
 ('Asa Hutchinson', 'Arkansas', 4),
 ('Gavin Newsom', 'California', 5),
 ('Jared Polis', 'Colorado', 6),
 ('Ned Lamont', 'Connecticut', 7),
 ('John Carney', 'Delaware', 8),
 ('Ron DeSantis', 'Florida', 9),
 ('Brian Kemp', 'Georgia', 10),
 ('David Ige', 'Hawaii', 11),
 ('Brad Little', 'Idaho', 12),
 ('J. B. Pritzker', 'Illinois', 13),
 ('Eric Holcomb', 'Indiana', 14),
 ('Kim Reynolds', 'Iowa', 15),
 ('Laura Kelly', 'Kansas', 16),
 ('Andy Beshear', 'Kentucky', 17),
 ('John Bel Edwards', 'Louisiana', 18),
 ('Janet Mills', 'Maine', 19),
 ('Larry Hogan', 'Maryland', 20),
 ('Charlie Baker', 'Massachusetts', 21),
 ('Gretchen Whitmer', 'Michigan', 22),
 ('Tim Walz', 'Minnesota', 23),
 ('Tate Reeves', 'Mississippi', 24),
 ('Mike Parson', 'Missouri', 25),
 ('Steve Bullock', 'Montana', 26),
 ('Pete Ricketts', 'Nebraska', 27),
 ('Steve Sisolak', 'Nevada', 28),
 ('Chris Sununu', 'New Hampshire', 29),
 ('Phi

In [38]:
def state_to_id(my_str):
    try:
        return int(session.query(Governors.governor_id)
                          .filter(Governors.state == my_str)
                          .first()[0])
    except (IndexError , TypeError):
        return -1

cases_df["governor_id"]=cases_df['state'].map(state_to_id)

In [42]:
cases_df = cases_df[cases_df['governor_id']!=-1]

In [43]:
cases_df

Unnamed: 0,date,state,fips,cases,deaths,governor_id
0,2020-01-21,Washington,53,1,0,47
1,2020-01-22,Washington,53,1,0,47
2,2020-01-23,Washington,53,1,0,47
3,2020-01-24,Illinois,17,1,0,13
4,2020-01-24,Washington,53,1,0,47
...,...,...,...,...,...,...
1659,2020-04-01,Virginia,51,1511,34,46
1660,2020-04-01,Washington,53,5588,249,47
1661,2020-04-01,West Virginia,54,191,2,48
1662,2020-04-01,Wisconsin,55,1550,33,49


In [46]:
class Cases(Base):
    __tablename__="cases"
    update_id = Column(Integer, primary_key = True, autoincrement=True)
    governor_id = Column(Integer,ForeignKey("governors.sector_id"))
    cases = Column(Integer)
    deaths = Column(Integer)




In [None]:
for index, row in cases_df.iterrows():
    data = Governors(governor = row["Governor"], 
                     state = row["State"], 
                     party = row["Party"],
                     inauguration = row["Inauguration"],
                     term_begin = row["Beginning of Term"],
                     term_end = row["End of term"],
                     term_limit = row["Term limit"])
    session.add(data)
    session.commit()
    
session.close()