### Imports

In [2]:
import pandas as pd
import numpy as np

# State Level Data

## Pre-Existing Health Conditions Datasets:

#### For pre-existing conditions & immunizations:
1. choose crude or adj
2. drop old index
3. rename data value to question
4. drop year and question columns
5. For total number of incident data -> after concatening with population data, convert to rate

*Crude are the raw numbers. Adj are these numbers adjusted/ standardized based on population's age distribution. We can retain the crude numbers however the age adjusted numbers will probably be better for cross comparison*

In [None]:
asthma = pd.read_csv('Data/Raw/asthma_adj.csv')
asthma.rename(columns={
    'DataValue':'asthma_prevalence',
    'LocationDesc':'Location'}, inplace=True)

In [None]:
asthma.drop(columns=['YearStart', 'Question'], inplace=True)
asthma.head()

In [None]:
high_bp = pd.read_csv('Data/Raw/high_blood_pressure_adj.csv')
high_bp.head()

In [None]:
high_bp.rename(columns={
    'DataValue':'high_bp_prevalence',
    'LocationDesc':'Location'}, inplace=True)
high_bp.drop(columns=['YearStart', 'Question'], inplace=True)
high_bp.head()

In [None]:
cardiac_mortality = pd.read_csv('Data/Raw/cardiac_mortality_adj.csv')
cardiac_mortality.rename(columns={
    'DataValue':'cardiac_mortality_rate',
    'LocationDesc':'Location'}, inplace=True)
cardiac_mortality.drop(columns=['YearStart', 'Question'], inplace=True)
cardiac_mortality.head()

In [None]:
diabetes = pd.read_csv('Data/Raw/diabetes_adj.csv')
diabetes.head()
diabetes.rename(columns={
    'DataValue':'diabetes_prevalence',
    'LocationDesc':'Location'}, inplace=True)

In [None]:
diabetes.drop(columns=['YearStart', 'Question'], inplace=True)
diabetes.head()

In [None]:
kidney = pd.read_csv('Data/Raw/kidney_adj.csv')
kidney.head()

In [None]:
kidney.rename(columns={
    'DataValue':'kidney_disease_prevalence',
    'LocationDesc':'Location'}, inplace=True)

In [None]:
kidney.drop(columns=['YearStart', 'Question'], inplace=True)
kidney.head()

In [None]:
copd = pd.read_csv('Data/Raw/copd_adj.csv')
copd.rename(columns={
    'DataValue':'copd_prevalence',
    'LocationDesc':'Location'}, inplace=True)
copd.drop(columns=['YearStart', 'Question'], inplace=True)
copd.head()

In [None]:
immun = pd.read_csv('Data/Raw/immun_adj.csv')
immun.rename(columns={
    'DataValue':'flu_vaccination_rate_2019',
    'LocationDesc':'Location'}, inplace=True)
immun.drop(columns=['YearStart', 'Question'], inplace=True)
immun.head()

In [None]:
pre_con = pd.merge(immun, asthma, on='Location', how='inner')

dataframes_to_merge = [cardiac_mortality, high_bp, copd, kidney, diabetes]

for df in dataframes_to_merge:
    pre_con = pd.merge(pre_con, df, on='Location', how='inner')

In [None]:
pre_con.head()

In [None]:
pre_con.to_csv('Data/cleaned_pre_condtions.csv', index = True)

## Insurance rates by state

In [None]:
insur_2019 = pd.read_csv('Data/Raw/2019_insurance.csv')
insur_2019.head()

In [None]:
insur_2021 = pd.read_csv('Data/Raw/2021_insurance.csv')

In [None]:
insur = pd.merge(insur_2019, insur_2021, how ='inner', on ='Location', suffixes=('_2019', '_2021'))
insur.head()

In [None]:
insur.drop(columns =['Year_2019', 'Year_2021'], inplace=True)

In [None]:
insur.to_csv('Data/cleaned_insur.csv', index = False)

## Covid Vaccines by State

In [None]:
ea = pd.read_csv('Data/Raw/Executive Approval.csv')

In [None]:
hb = pd.read_csv('Data/Raw/Health Behavior.csv')

In [None]:
phb = pd.read_csv('Data/Raw/Public Health Measures.csv')
phb.head()

In [None]:
ea_2020 = ea[ea['Start_Date'] == '2020-04-16'].copy()
ea_2020.head()

In [None]:
ea_2020.shape

In [None]:
ea_2020.drop(columns=['Wave_time', 'StateFIPS', 'Start_Date', 'End_Date', 'N_State'], inplace=True)

In [None]:
hb_2020 = hb[hb['Start_Date'] == '2020-04-16'].copy()
hb_2020.drop(columns=['Wave_time', 'StateFIPS', 'Start_Date', 'End_Date', 'N_state'], inplace=True)
hb_2020.head()

In [None]:
phb_2020 = phb[phb['Start_Date'] == '2020-12-16'].copy()
phb_2020.drop(columns=['Wave_time', 'StateFIPS', 'Start_Date', 'End_Date', 'N_State'], inplace=True)
phb_2020.head()

In [None]:
print(ea_2020.shape)
print(phb_2020.shape)
print(hb_2020.shape)

In [None]:
health_behavior = pd.merge(ea_2020, hb_2020, how = 'left', on = 'State')

In [None]:
health_behavior.head()

In [None]:
health_behavior.shape

In [None]:
health_behavior.rename(columns={'State' : 'Location'}, inplace = True)

In [None]:
health_behavior.to_csv('Data/Cleaned/cleaned_health_behavior.csv', index=False)
health_behavior.head()

### Total Physicians 

In [None]:
df = pd.read_csv('data/Raw/total physician.csv')

In [None]:
df.drop(columns=['Unnamed: 9'],inplace=True)

In [None]:
df.drop(columns=['Unnamed: 4'], inplace=True)

In [None]:
df = df.drop(df.index[-1])

In [None]:
df = df.drop(df.index[:3])

In [None]:
df = df.reset_index(drop=True)

In [None]:
df = df.drop(df.index[39])

In [None]:
df = df.reset_index(drop=True)

In [None]:
# rankings_pd.rename(columns = {'test':'TEST'}, inplace = True)
df.rename(columns={
    'Unnamed: 0':'Location',
    'Unnamed: 1':'Population',
    'Unnamed: 2':'Physicians',
    'Unnamed: 3':'Physicians Rate',
    'Unnamed: 5':'Active MO',
    'Unnamed: 6':'Active MO Rate',
    'Unnamed: 7':'Active DO',
    'Unnamed: 8':'Active DO Rate'
}, inplace=True)

In [None]:
df.drop(columns=['Population'],inplace=True)

In [None]:
df = pd.DataFrame(df)

In [None]:
df.to_csv('Data/cleaned_total_physician.csv',index=False)

### Cleaning Income per Capita

In [None]:
income = pd.read_csv('Data/Raw/Income per capita.csv')

In [None]:
income = income.drop(income.index[0])

In [None]:
income.drop(columns=['State or DC'],inplace=True)

In [None]:
income.rename(columns={
    'Unnamed: 1':'Location',
    'Unnamed: 2':'Inc_Per_Cap_2020',
    'Unnamed: 3':'Inc_Per_Cap_2021',
    'Unnamed: 4':'Inc_Per_CAp_2022'
},inplace=True)

In [None]:
income = income.reset_index(drop=True);

In [None]:
income.to_csv('Data/cleaned_income.csv',index=False)

### Cleaning Total Employment

In [None]:
employment = pd.read_csv('Data/Raw/total employment.csv')

In [None]:
employment = employment.drop(employment.index[0])

In [None]:
employment.drop(columns=['State or DC'],inplace=True)

In [None]:
employment.rename(columns={
    'Unnamed: 1':'Location',
    'Unnamed: 2':'Employment_2020',
    'Unnamed: 3':'Employment_2021',
    'Unnamed: 4':'Employment_2022'
},inplace=True)

In [None]:
employment = employment.reset_index(drop=True);

In [None]:
employment.to_csv('Data/cleaned_employment.csv',index=False)

### Cleaning Life Expectancy

In [None]:
life = pd.read_csv('Data/Raw/life_expectancy.csv')

In [None]:
life.drop(columns=['URL'],inplace=True)

In [None]:
life.STATE.unique()

In [None]:
state_name = {
    'AL':'Alabama',
    'AK':'Alaska',
    'AZ':'Arizona',
    'AR':'Arkansas',
    'CA':'California',
    'CO':'Colorado',
    'CT':'Connecticut',
    'DE':'Delaware',
    'DC':'District of Columbia',
    'FL':'Florida',
    'GA':'Georgia',
    'HI':'Hawaii',
    'ID':'Idaho',
    'IL':'Illinois',
    'IN':'Indiana',
    'IA':'Iowa',
    'KS':'Kansas',
    'KY':'Kentucky',
    'LA':'Louisiana',
    'ME':'Maine',
    'MD':'Maryland',
    'MA':'Massachusetts',
    'MI':'Michigan',
    'MN':'Minnesota',
    'MS':'Mississippi',
    'MO':'Missouri',
    'MT':'Montana',
    'NE':'Nebraska',
    'NV':'Nevada',
    'NH':'New Hampshire',
    'NJ':'New Jersey',
    'NM':'New Mexico',
    'NY':'New York',
    'NC':'North Carolina',
    'ND':'North Dakota',
    'OH':'Ohio',
    'OK':'Oklahoma',
    'OR':'Oregon',
    'PA':'Pennsylvania',
    'RI':'Rhode Island',
    'SC':'South Carolina',
    'SD':'South Dakota',
    'TN':'Tennessee',
    'TX':'Texas',
    'UT':'Utah',
    'VT':'Vermont',
    'VA':'Virginia',
    'WA':'Washington',
    'WV':'West Virginia',
    'WI':'Wisconsin',
    'WY':'Wyoming'
}

In [None]:
life['States'] = life['STATE'].map(state_name)

In [None]:
life.drop(columns=['STATE'],inplace=True)

In [None]:
life_2020 = df4.head(50)

In [None]:
life_2020.drop(columns=['YEAR'],inplace=True)

In [None]:
life_2020 = life_2020[['States', 'RATE']]

In [None]:
life_2020 = life_2020.reset_index(drop=True);

In [None]:
life_2020.rename(columns={'RATE':'Life_Exp_2020'},inplace=True)

In [None]:
life_2019 = df4.iloc[50:100]

In [None]:
life_2019.drop(columns=['YEAR'],inplace=True)

In [None]:
life_2019 = life_2019[['States', 'RATE']]

In [None]:
life_2019 = life_2019.reset_index(drop=True);

In [None]:
life_2019.rename(columns={'RATE':'Life_Exp_2019'},inplace=True)

In [None]:
life_2018 = df4.iloc[100:150]

In [None]:
life_2018.drop(columns=['YEAR'],inplace=True)

In [None]:
life_2018 = life_2018[['States', 'RATE']]

In [None]:
life_2018 = life_2018.reset_index(drop=True);

In [None]:
life_2018.rename(columns={'RATE':'Life_Exp_2018'},inplace=True)

In [None]:
life_2018.head()

In [None]:
life_exp = pd.concat([life_2020, life_2019, life_2018], axis=1)

In [None]:
life_exp = life_exp.loc[:, ~life_exp.columns.duplicated()]

In [None]:
# Add a new row for the "District of Columbia"
new_row = pd.DataFrame({'States': 'District of Columbia'}, index=[len(life_exp)])
life_exp = pd.concat([life_exp.iloc[:8], new_row, life_exp.iloc[8:]])

In [None]:
life_exp.reset_index(drop=True);

In [None]:
life_exp.rename(columns={'States':'Location'},inplace=True)

In [None]:
life_exp.to_csv('Data/cleaned_Life_Expentency.csv',index=False)

### Covid Deaths by State

In [None]:
deaths_covid = pd.read_csv('

In [None]:
deaths_covid = deaths_covid.pivot(index='State', columns='Location', 
                                  values=['All causes', 'covid'])

deaths_covid.columns = ['_'.join(map(str, col)) for col in deaths_covid.columns]
deaths_covid = deaths_covid.reset_index()
deaths_covid.rename(columns={'State': 'Location'}, inplace=True)
deaths_covid.rename(columns={
    'All causes_2020': 'all_causes_2020',
    'All causes_2021': 'all_causes_2021',
    'All causes_2022': 'all_causes_2022',
    'covid_2020': 'covid_2020',
    'covid_2021': 'covid_2021',
    'covid_2022': 'covid_2022',
}, inplace=True)
deaths_covid.head()

## Merging the Cleaned State Data into 1 df

In [None]:
def merge_dataframes():
    employ = pd.read_csv('Data/Cleaned/cleaned_employment.csv')
    income = pd.read_csv('Data/Cleaned/cleaned_income.csv')
    life = pd.read_csv('Data/Cleaned/cleaned_Life_Expentency.csv')
    insur = pd.read_csv('Data/Cleaned/cleaned_insur.csv')
    pop_dense = pd.read_csv('Data/Cleaned/cleaned_pop_dense.csv')
    pop_size = pd.read_csv('Data/Cleaned/cleaned_pop_size.csv')
    pre_cond = pd.read_csv('Data/Cleaned/cleaned_pre_condtions.csv')
    total_phys = pd.read_csv('Data/Cleaned/cleaned_total_physician.csv')
    
    merged_df = employ.merge(income, on='Location').merge(life, on='Location').merge(insur, on='Location') \
        .merge(pop_dense, on='Location').merge(pop_size, on='Location').merge(pre_cond, on='Location') \
        .merge(total_phys, on='Location').merge(excess_deaths, on='Location')

    return merged_df
merged_data = merge_dataframes()

In [None]:
merged_data.drop(columns=['Unnamed: 0'], inplace=True)
merged_data.head()

In [None]:
vax_state = pd.read_csv('Data/Cleaned/vax_state.csv')
vax_state.head()

In [None]:
state_name = {
    'AL':'Alabama',
    'AK':'Alaska',
    'AZ':'Arizona',
    'AR':'Arkansas',
    'CA':'California',
    'CO':'Colorado',
    'CT':'Connecticut',
    'DE':'Delaware',
    'DC':'District of Columbia',
    'FL':'Florida',
    'GA':'Georgia',
    'HI':'Hawaii',
    'ID':'Idaho',
    'IL':'Illinois',
    'IN':'Indiana',
    'IA':'Iowa',
    'KS':'Kansas',
    'KY':'Kentucky',
    'LA':'Louisiana',
    'ME':'Maine',
    'MD':'Maryland',
    'MA':'Massachusetts',
    'MI':'Michigan',
    'MN':'Minnesota',
    'MS':'Mississippi',
    'MO':'Missouri',
    'MT':'Montana',
    'NE':'Nebraska',
    'NV':'Nevada',
    'NH':'New Hampshire',
    'NJ':'New Jersey',
    'NM':'New Mexico',
    'NY':'New York',
    'NC':'North Carolina',
    'ND':'North Dakota',
    'OH':'Ohio',
    'OK':'Oklahoma',
    'OR':'Oregon',
    'PA':'Pennsylvania',
    'RI':'Rhode Island',
    'SC':'South Carolina',
    'SD':'South Dakota',
    'TN':'Tennessee',
    'TX':'Texas',
    'UT':'Utah',
    'VT':'Vermont',
    'VA':'Virginia',
    'WA':'Washington',
    'WV':'West Virginia',
    'WI':'Wisconsin',
    'WY':'Wyoming'
}
vax_state['Location'] = vax_state['Location'].map(state_name)

In [None]:
state_mask = pd.read_csv('Data/Cleaned/cleaned_mask.csv')
merged_data = merged_data.merge(vax_state, on='Location').merge(state_mask, on='Location')
merged_data.head()

In [None]:
merged_data['Population Density per mi²'] = merged_data['Population Density per mi²'].str.replace(',', '', regex=True).astype(float)
merged_data['Physicians'] = merged_data['Physicians'].str.replace(',', '', regex=True).astype(float)
merged_data['Active DO'] = merged_data['Active DO'].str.replace(',', '', regex=True).astype(float)
merged_data['Active MO'] = merged_data['Active MO'].str.replace(',', '', regex=True).astype(float)
merged_data['Mandatory'] = merged_data['Mandatory'].map({'Yes':1, 'No':0})

In [None]:
merged_data.dtypes

In [None]:
merged_data.isnull().sum()

In [None]:
merged_data['asthma_prevalence'].fillna(8.4, inplace=True)
merged_data['kidney_disease_prevalence'].fillna(52.1, inplace=True)
merged_data['flu_vaccination_rate_2019'].fillna(38, inplace=True)
merged_data['high_bp_prevalence'].fillna(33, inplace=True)
merged_data['copd_prevalence'].fillna(5.2, inplace =True)
merged_data['diabetes_prevalence'].fillna(9.5, inplace=True)

In [None]:
merged_data.to_csv('Data/Merged_state_data.csv', index=False)

In [None]:
merged = pd.read_csv('Data/Merged_state_data.csv')
merged.head()

In [None]:
merged = pd.merge(merged, health_behavior, how = 'left', on= 'Location')

In [None]:
merged = pd.read_csv('Data/merged_state_with_health.csv')

In [None]:
df = pd.read_csv('Data/merged_state_with_health.csv')
df.head()

In [None]:
df = df.merge(deaths_covid, on='Location')
df.head()

In [None]:
#df.drop('Mask_Mandate', inplace = True)
df['Population Density per mi²'] = df['Population Density per mi²'].str.replace(',', '', regex=True).astype(float)
df['Physicians'] = df['Physicians'].str.replace(',', '', regex=True).astype(float)
df['Active DO'] = df['Active DO'].str.replace(',', '', regex=True).astype(float)
df['Active MO'] = df['Active MO'].str.replace(',', '', regex=True).astype(float)
df['Mandatory'] = df['Mandatory'].map({'Yes':1, 'No':0})

In [None]:
df.to_csv('Data/merged_state_final.csv', index=False)

# County level Data

### Further Cleaning County Level Data

In [6]:
df = pd.read_csv('../Data/Cleaned/county_df2.csv')
df.head()

Unnamed: 0,FIPS_x,County,Years of Potential Life Lost Rate (premature death),YPLL Rate (Black),YPLL Rate (Hispanic),YPLL Rate (White),% Fair/Poor Health,percent_smokers,percent_obese,Food Environment Index,...,cases_2020,cases_2021,cases_2022,deaths_2020,deaths_2021,deaths_2022,Masks,FIPS_y,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_65PlusPop_Pct
0,1001,Autauga,8824.0,10471.0,,8707.0,18,19,38,7.2,...,4190.0,11018.0,18961.0,48.0,160.0,230.0,267.0,1001,42.2,73.8
1,1003,Baldwin,7225.0,10042.0,3087.0,7278.0,18,17,31,8.0,...,13601.0,39911.0,67496.0,161.0,593.0,719.0,267.0,1003,53.2,89.9
2,1005,Barbour,9586.0,11333.0,,7310.0,26,22,44,5.6,...,1514.0,3860.0,7027.0,32.0,81.0,103.0,267.0,1005,44.5,75.3
3,1007,Bibb,11784.0,14813.0,,11328.0,20,20,38,7.6,...,1834.0,4533.0,7692.0,46.0,95.0,108.0,267.0,1007,36.6,64.2
4,1009,Blount,10908.0,,5620.0,11336.0,21,20,34,8.5,...,4641.0,11256.0,17731.0,63.0,198.0,260.0,267.0,1009,31.9,56.6


In [7]:
# Drop rows that we will not be using 'Segregation index black/white',
df.drop(columns = ['County', 'YPLL Rate (Black)', 'YPLL Rate (Hispanic)', 'YPLL Rate (White)', 'Number Uninsured', 'Number Primary Care Physicians', 'FIPS_y', 'Number pre-mature Deaths',
                        'Preventable Hosp. Rate (Black)', 'Preventable Hosp. Rate (Hispanic)', 'Preventable Hosp. Rate (White)',  'Percent Vaccinated Flu (Black)', 'Percent Uninsured',
                        'Percent  Vaccinated (Hispanic) Flu', 'Percent Vaccinated (White) Flu', 'Number Some College', 'Number Unemployed', 'Labor Force', 'PCP Ratio', 
                        '80th Percentile Income', '20th Percentile Income', '95% CI - Low', '95% CI - High', 'Life Expectancy (Black)', 'Life Expectancy (Hispanic)', 
                        'Life Expectancy (White)', 'Number HIV Cases', 'Household income (Black)', 'Household income (Hispanic)', 'Household income (White)'], inplace = True)

In [8]:
# Make FIPS index 
df.set_index('FIPS_x', inplace=True)

# Dummify  Presence of water violation
df['water'] = df['Presence of water violation'].map({'No': 0, 'Yes': 1})
df.drop(columns = ['Presence of water violation', 'State'], inplace = True)

# Set case and deaths
df['cases'] = df['cases_2022']
df['deaths'] = df['deaths_2022']
df.drop(columns = ['cases_2020', 'cases_2021', 'cases_2022', 'deaths_2020', 'deaths_2021', 'deaths_2022'], inplace = True)

# Drop NA values
df.dropna(inplace=True)
df.shape

(1828, 47)

In [9]:
df.to_csv('../Data/Cleaned/county_df3.csv')

# Exploratory Data Analysis

## State Level

In [None]:
state_df = pd.read_csv('Data/merged_state_final.csv')

In [None]:
covid_by_pop = state_df['covid_2020'] + state_df['covid_2021'] + state_df['covid_2022']
state_df['covid_deaths_by_population'] = (covid_by_pop / state_df['2020 Population']) * 10000


In [None]:
state_df.head()

In [None]:
top_10_states = state_df.sort_values(by='covid_deaths_by_population', ascending=True).head(10)

In [None]:
bottom_10_states = state_df.sort_values(by='covid_deaths_by_population', ascending=False).head(10)
bottom_10_states

In [None]:
plt.figure(figsize=(10, 6))

# Scatter plot for 2020
plt.scatter(state_df['Inc_Per_Cap_2020'], state_df.covid_deaths_by_population, label='2020', alpha=0.6)

# Scatter plot for 2021
plt.scatter(state_df['Inc_Per_Cap_2021'], state_df.covid_deaths_by_population, label='2021', alpha=0.6)

# Scatter plot for 2022
plt.scatter(state_df['Inc_Per_CAp_2022'], state_df.covid_deaths_by_population, label='2022', alpha=0.6)

plt.title('Scatter Plot of Income Per Capita by Covid Deaths (2020-2022)')
plt.xlabel('Income Per Capita')
plt.ylabel('covid_deaths_by_population')
plt.legend()

## County Level

In [None]:
county_df = pd.read_csv('Data/Cleaned/county_df2.csv')
county_df

In [None]:
covid_by_pop = county_df['deaths_2020'] + county_df['deaths_2021'] + county_df['deaths_2022']
county_df['covid_deaths_by_population'] = (covid_by_pop / county_df['Population']) * 10000

In [None]:
county_df.head()

In [None]:
plt.scatter(county_df['% Physically Inactive'], county_df.covid_deaths_by_population, label='Physically Inactive', alpha=0.25)

plt.title('Scatter Plot of % Physically Inactive by Covid Deaths (2020-2022)')
plt.xlabel('% Physically Inactive')
plt.ylabel('covid_deaths_by_population')
plt.legend()

In [None]:
plt.scatter(county_df['Percent Unemployed'], county_df.covid_deaths_by_population, label='Percent Unemployed', alpha=0.25)

plt.title('Scatter Plot of Percent Unemployed by Covid Deaths (2020-2022)')
plt.xlabel('% Percent Unemployed')
plt.ylabel('covid_deaths_by_population')
plt.legend()

In [None]:
plt.scatter(county_df['Average Daily PM2.5'], county_df.covid_deaths_by_population, label='Average Daily PM2.5', alpha=0.25)

plt.title('Scatter Plot of Average Daily PM2.5 by Covid Deaths (2020-2022)')
plt.xlabel('Average Daily PM2.5')
plt.ylabel('covid_deaths_by_population')
plt.legend()

In [None]:
plt.scatter(county_df['Percent Insufficient Sleep'], county_df.covid_deaths_by_population, label='Percent Insufficient Sleep', alpha=0.25)

plt.title('Scatter Plot of Percent Insufficient Sleep by Covid Deaths (2020-2022)')
plt.xlabel('Percent Insufficient Sleep')
plt.ylabel('covid_deaths_by_population')
plt.legend()

In [None]:
plt.scatter(county_df['Percent Uninsured Adults'], county_df.covid_deaths_by_population, label='Percent Uninsured Adults', alpha=0.25)
plt.title('Scatter Plot of Percent Uninsured Adults by Covid Deaths (2020-2022)')
plt.xlabel('Percent Uninsured Adults')
plt.ylabel('covid_deaths_by_population')
plt.legend()

In [None]:
plt.scatter(county_df['Population'], county_df.covid_deaths_by_population, label='Population', alpha=0.25)

plt.title('Scatter Plot of Population by Covid Deaths (2020-2022)')
plt.xlabel('Population')
plt.ylabel('covid_deaths_by_population')
plt.legend()

In [None]:
plt.scatter(county_df['percent Asian'], county_df.covid_deaths_by_population, label='percent Asian', alpha=0.25)

plt.title('Scatter Plot of percent Asian by Covid Deaths (2020-2022)')
plt.xlabel('percent Asian')
plt.ylabel('covid_deaths_by_population')
plt.legend()

In [None]:
plt.scatter(county_df['Masks'], county_df.covid_deaths_by_population, label='Masks', alpha=0.25)

plt.title('Scatter Plot of Masks by Covid Deaths (2020-2022)')
plt.xlabel('Masks')
plt.ylabel('covid_deaths_by_population')
plt.legend()

In [None]:
plt.scatter(county_df['Administered_Dose1_Pop_Pct'], county_df.covid_deaths_by_population, label='Administered_Dose1_Pop_Pct', alpha=0.25)
plt.title('Scatter Plot of Administered_Dose1_Pop_Pct by Covid Deaths (2020-2022)')
plt.xlabel('Administered_Dose1_Pop_Pct')
plt.ylabel('covid_deaths_by_population')
plt.legend()