In [1]:
# Dependencies and Setup
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import re
import time 
import datetime 
import numpy as np
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
# Define Urls for the Johns Hopkins Data
confirm_url='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
death_url='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv'

# Read in COVID-19 Files
confirm_df=pd.read_csv(confirm_url, error_bad_lines=False)
death_df=pd.read_csv(death_url, error_bad_lines=False)

# Combine State Data
state_confirms=confirm_df.groupby('Province_State').sum()
state_confirms=state_confirms.drop(columns=['UID','code3','FIPS','Lat','Long_'])
state_deaths=death_df.groupby('Province_State').sum()
state_deaths=state_deaths.drop(columns=['UID','code3','FIPS','Lat','Long_'])

In [3]:
state_confirms.head()

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,5/8/20,5/9/20,5/10/20,5/11/20,5/12/20,5/13/20,5/14/20,5/15/20,5/16/20,5/17/20
Province_State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,0,0,0,0,0,0,0,0,0,0,...,9385,9668,9889,10164,10464,10700,11101,11373,11674,11771
Alaska,0,0,0,0,0,0,0,0,0,0,...,377,378,379,379,383,383,383,388,388,388
American Samoa,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Arizona,0,0,0,0,1,1,1,1,1,1,...,10526,10960,11119,11383,11736,12216,12674,13169,13666,13945
Arkansas,0,0,0,0,0,0,0,0,0,0,...,3747,3747,4012,4043,4164,4236,4366,4463,4578,4759


In [4]:
state_deaths.head()

Unnamed: 0_level_0,Population,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,5/8/20,5/9/20,5/10/20,5/11/20,5/12/20,5/13/20,5/14/20,5/15/20,5/16/20,5/17/20
Province_State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,4903185,0,0,0,0,0,0,0,0,0,...,383,390,393,403,435,450,473,483,485,488
Alaska,731545,0,0,0,0,0,0,0,0,0,...,10,10,10,10,10,10,10,10,10,10
American Samoa,55641,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Arizona,7278717,0,0,0,0,0,0,0,0,0,...,517,532,536,542,562,595,624,651,679,680
Arkansas,3017804,0,0,0,0,0,0,0,0,0,...,88,88,91,94,95,97,98,98,98,98


In [5]:
# Specify url
url = 'https://www.finra.org/rules-guidance/key-topics/covid-19/shelter-in-place'

# Parse HTML Object 
response = requests.get(url)
soup = bs(response.text, 'lxml')

# Read Tables
tables = pd.read_html(url)
table = tables[0]

# Remove Excess Columns
orders = table[['State', 'Order Date', 'Order Expiration Date']]

# Remove Special Charectors from State names
orders['State'] = [re.sub(r'[^\w]', ' ', state) for state in orders['State']]

# Initialize lists for date formating
od=orders['Order Date']
dates=[]

# Loop through end dates
for date in od:
    # Split words and select formatted dates
    split = date.split()
    res = [i for i in split if '/' in i]
    try:
        # Select the date and reformate into standard form
        res=res[0]
        splitdate = res.split('/')
        formated = datetime.date(int(splitdate[2]),int(splitdate[0]),int(splitdate[1]))
        datestr=str(formated)
    except:
        # Add null sets for states without expiration dates
        datestr=np.nan
    # Add formatted dates to list
    dates.append(datestr)
# Add dates back in
orders['Order Date']=dates
    
# Initialize lists for date formating
oed= orders['Order Expiration Date']
dates=[]

# Loop through end dates
for date in oed:
    # Split words and select formatted dates
    split = date.split()
    res = [i for i in split if '/' in i]
    try:
        # Select the date and reformate into standard form
        res=res[0]
        splitdate = res.split('/')
        formated = datetime.date(int(splitdate[2]),int(splitdate[0]),int(splitdate[1]))
        datestr=str(formated)
    except:
        # Add null sets for states without expiration dates
        datestr=np.nan
    # Add formatted dates to list
    dates.append(datestr)
# Add dates back in
orders['Order Expiration Date']=dates

orders.head()

Unnamed: 0,State,Order Date,Order Expiration Date
0,Alabama,2020-04-03,2020-04-30
1,Alaska,2020-03-27,
2,Arizona,2020-03-30,2020-05-15
3,California,2020-03-19,
4,Colorado,2020-03-25,2020-04-26
