### Imports

In [77]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

### 2020 Census Population data

In [78]:
url = 'https://www.census.gov/quickfacts/quickfacts/geo/dashboard/US/POP010220'
res = requests.get(url)

In [79]:
soup = BeautifulSoup(res.content)

In [80]:
table = soup.find(attrs={'class':'qf-graph-scroll'})
# soup.find_all(attrs={'class':'qf-positive'})
tbody = table.find_all(attrs={'class':"qf-graph-geo"})
tr = tbody[1]
state = tr.find('a').attrs['data-title']
tr.find(attrs={'class':'qf-positive'}).attrs['data-value']

'29145505'

In [81]:
pop_2020 = []
for tr in tbody:
    state = tr.find('a').attrs['data-title']
    pop = tr.find(attrs={'class':'qf-positive'}).attrs['data-value']
    
    state_pop = {'State': state, '2020 Population': pop}
    pop_2020.append(state_pop)

In [82]:
pop_2020 = pd.DataFrame(pop_2020)

### 2010 Census Population Data

In [83]:
url2 = 'https://www.census.gov/quickfacts/quickfacts/geo/dashboard/US/POP010210'
res2 = requests.get(url2)

In [84]:
soup2 = BeautifulSoup(res2.content)
table2 = soup2.find(attrs={'class':'qf-graph-scroll'})
tbody2 = table2.find_all(attrs={'class':"qf-graph-geo"})

In [85]:
pop_2010 = []
for tr in tbody2:
    state = tr.find('a').attrs['data-title']
    pop = tr.find(attrs={'class':'qf-positive'}).attrs['data-value']
    
    state_pop = {'State': state, '2010 Population': pop}
    pop_2010.append(state_pop)

pop_2010 = pd.DataFrame(pop_2010)

In [86]:
population = pd.merge(pop_2010, pop_2020)
population.head()

Unnamed: 0,State,2010 Population,2020 Population
0,California,37253956,39538223
1,Texas,25145561,29145505
2,New York,19378102,20201249
3,Florida,18801310,21538187
4,Illinois,12830632,12812508


In [87]:
population.to_csv('Data/Population_data_2010_&_2020.csv')

### Excess Deaths Data

In [88]:
deaths = pd.read_csv('Ignore/Excess_Deaths_Associated_with_COVID-19.csv')
deaths.head()

Unnamed: 0,Week Ending Date,State,Observed Number,Upper Bound Threshold,Exceeds Threshold,Average Expected Count,Excess Estimate,Total Excess Estimate,Percent Excess Estimate,Year,Type,Outcome,Suppress,Note
0,2017-01-07,Alabama,1121.0,1136,False,1059,62,29601,5.8527,2017,Predicted (weighted),All causes,,
1,2017-01-14,Alabama,1130.0,1140,False,1067,63,29601,5.906102,2017,Predicted (weighted),All causes,,
2,2017-01-21,Alabama,1048.0,1142,False,1071,0,29601,0.0,2017,Predicted (weighted),All causes,,
3,2017-01-28,Alabama,1026.0,1142,False,1070,0,29601,0.0,2017,Predicted (weighted),All causes,,
4,2017-02-04,Alabama,1036.0,1142,False,1068,0,29601,0.0,2017,Predicted (weighted),All causes,,


In [89]:
# getting sum of each states excess deaths 
def excess_deaths(deaths, start_year=2017, end_year=2022):
    state_totals = {state: [0] * (end_year - start_year + 1) for state in 
                    deaths['State'].unique()}
    
    for index, row in deaths.iterrows():
        state = row['State']
        year = row['Year']
        excess_estimate = row['Excess Estimate']
        
        if start_year <= year <= end_year:
            state_totals[state][year - start_year] += excess_estimate

    result = pd.DataFrame(state_totals).T.reset_index()
    result.columns = ['State'] + [f'Exc_deaths_{year}' for year in 
                                  range(start_year, end_year + 1)]
    
    return result


excess_deaths = excess_deaths(deaths)
excess_deaths.head()


Unnamed: 0,State,Exc_deaths_2017,Exc_deaths_2018,Exc_deaths_2019,Exc_deaths_2020,Exc_deaths_2021,Exc_deaths_2022
0,Alabama,2649,4062,945,20726,29283,14805
1,Alaska,417,492,639,1422,3483,1783
2,Arizona,3522,4281,1785,30955,40347,20653
3,Arkansas,3054,2199,1434,11285,15391,8969
4,California,17241,13434,1029,93875,133206,89195


In [90]:
# Getting count for each year and each state where they exceed their threshold
def count_exceeds_threshold(deaths, start_year=2017, end_year=2022):
    state_counts = {state: [0] * (end_year - start_year + 1) for state in 
                    deaths['State'].unique()}
    
    for index, row in deaths.iterrows():
        state = row['State']
        year = row['Year']
        exceeds_threshold = row['Exceeds Threshold']
        
        if start_year <= year <= end_year and exceeds_threshold:
            state_counts[state][year - start_year] += 1

    result = pd.DataFrame(state_counts).T.reset_index()
    result.columns = ['State'] + [f'Exc_count_{year}' for year in 
                                  range(start_year, end_year + 1)]
    
    return result
exceeds_threshold = count_exceeds_threshold(deaths)
exceeds_threshold.head()

Unnamed: 0,State,Exc_count_2017,Exc_count_2018,Exc_count_2019,Exc_count_2020,Exc_count_2021,Exc_count_2022
0,Alabama,3,15,0,99,106,87
1,Alaska,6,6,3,34,75,42
2,Arizona,9,15,0,89,114,77
3,Arkansas,12,12,0,66,93,64
4,California,18,12,0,94,104,106


In [91]:
# Merging the data
finaldeaths = pd.merge(excess_deaths, exceeds_threshold, how='left')
finaldeaths.head()

Unnamed: 0,State,Exc_deaths_2017,Exc_deaths_2018,Exc_deaths_2019,Exc_deaths_2020,Exc_deaths_2021,Exc_deaths_2022,Exc_count_2017,Exc_count_2018,Exc_count_2019,Exc_count_2020,Exc_count_2021,Exc_count_2022
0,Alabama,2649,4062,945,20726,29283,14805,3,15,0,99,106,87
1,Alaska,417,492,639,1422,3483,1783,6,6,3,34,75,42
2,Arizona,3522,4281,1785,30955,40347,20653,9,15,0,89,114,77
3,Arkansas,3054,2199,1434,11285,15391,8969,12,12,0,66,93,64
4,California,17241,13434,1029,93875,133206,89195,18,12,0,94,104,106


In [92]:
finaldeaths.to_csv('Data/Excess_deaths&Exceeds_Threshold_data.csv')

### Population Density Data

In [93]:
url3 = 'https://wisevoter.com/state-rankings/population-density-by-state/'
res3 = requests.get(url3)
soup3 = BeautifulSoup(res3.content)

In [94]:
table3 = soup3.find('table', attrs={'id': 'shdb-on-page-table'})
tbody = table3.find('tbody')
trs = tbody.find_all('tr')

AttributeError: 'NoneType' object has no attribute 'find'

In [None]:
pop_density = []
for tr in trs:
    state = tr.find(attrs={'class':'shdb-on-page-table-body-Geo'}).text
    density = tr.find(attrs={'class':'shdb-on-page-table-body-Data'}).text.split()[0]
    
    state_density = {'State':state, 'Population Density per mi²': density}
    pop_density.append(state_density)
Pop_density = pd.DataFrame(pop_density)
Pop_density.head()

In [None]:
Pop_density.to_csv('Data/Population_Density_data.csv')

### Mask county data

In [95]:
county_mask = pd.read_csv('Ignore/U.S._State_and_Territorial_Public_Mask_Mandates_From_April_10__2020_through_August_15__2021_by_County_by_Day.csv')
county_mask

Unnamed: 0,State_Tribe_Territory,County_Name,FIPS_State,FIPS_County,date,order_code,Face_Masks_Required_in_Public,Source_of_Action,URL,Citation
0,AL,Autauga County,1,1,4/10/2020,2,,,,
1,AL,Autauga County,1,1,4/11/2020,2,,,,
2,AL,Autauga County,1,1,4/12/2020,2,,,,
3,AL,Autauga County,1,1,4/13/2020,2,,,,
4,AL,Autauga County,1,1,4/14/2020,2,,,,
...,...,...,...,...,...,...,...,...,...,...
1593864,VI,St. Thomas Island,78,30,8/11/2021,1,Yes,Official,,"V.I. Twenty-Seventh Supp. Exec. Order (Aug. 6,..."
1593865,VI,St. Thomas Island,78,30,8/12/2021,1,Yes,Official,,"V.I. Twenty-Seventh Supp. Exec. Order (Aug. 6,..."
1593866,VI,St. Thomas Island,78,30,8/13/2021,1,Yes,Official,,"V.I. Twenty-Seventh Supp. Exec. Order (Aug. 6,..."
1593867,VI,St. Thomas Island,78,30,8/14/2021,1,Yes,Official,,"V.I. Twenty-Seventh Supp. Exec. Order (Aug. 6,..."


In [101]:
def mandate_length(data):
    county_counts = {}
    for index, row in data.iterrows():
        county_name = row['County_Name']
        mask_required = row['Face_Masks_Required_in_Public']

        if mask_required == 'Yes':
            state = row['State_Tribe_Territory']
            if county_name in county_counts:
                county_counts[county_name] += 1
            else:
                county_counts[county_name] = 1

    return county_counts

county_counts = mandate_length(county_mask)

In [106]:
# Put my code into Chatgpt who helped simplify to speed up run time
def mandate_length(data):
    county_counts = {}
    for index, row in data.iterrows():
        county_name = row['County_Name']
        mask_required = row['Face_Masks_Required_in_Public']
        state = row['State_Tribe_Territory']
        
        if mask_required == 'Yes':
            if county_name in county_counts:
                county_counts[county_name]['Count'] += 1
            else:
                county_counts[county_name] = {'State': state, 'Count': 1}

    county_counts_list = [{'State': v['State'], 'County': k, 'Count': v['Count']} for k, v in county_counts.items()]

    return pd.DataFrame(county_counts_list)

county_counts_df = mandate_length(county_mask)

In [108]:
county_counts_df.to_csv('Data/county_mask_mandata.csv', index=False)

### Merging the data

In [None]:
employ = pd.read_csv('Data/cleaned_employment.csv')
income = pd.read_csv('Data/cleaned_income.csv')
life = pd.read_csv('Data/cleaned_Life_Expentency.csv')
insur = pd.read_csv('Data/cleaned_insur.csv')
pop_dense = pd.read_csv('Data/cleaned_pop_dense.csv')
pop_size = pd.read_csv('Data/cleaned_pop_size.csv')
pre_cond = pd.read_csv('Data/cleaned_pre_condtions.csv')
total_phys = pd.read_csv('Data/cleaned_total_physician.csv')

In [None]:
mask

### Modeling 

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression