### Gather dependencies

In [109]:
# Dependencies
import pandas as pd
import numpy as np
import json
import matplotlib
# from sodapy import Socrata
from config import census_key
import pprint
import requests
import csv

### Function for quickly calling a DataFrame

In [None]:
# Function for quickly assempling a DataFrame
def json_to_dataframe(response):
    return pd.DataFrame(response.json()[1:], columns=response.json()[0])

### Create DataFrame for CDC Data

In [164]:
# CDC DataFrame by county in Minnesota
cdc_data = pd.read_csv("../Project_1/cdc_data.csv")
cdc_data
cdc_df = pd.DataFrame(cdc_data)
# cdc_df.columns
cdc_df = cdc_df[['case_month', 'res_state', 'res_county', 'age_group', 'sex', 'race', 'ethnicity', 'current_status']]
cdc_df = cdc_df.rename(columns={
    'case_month': 'Year-Month',
    'res_state': 'State',
    'res_county': 'County',
    'age_group': 'Age Group',
    'race': 'Race',
    'sex': 'Sex',
    'ethnicity': 'Ethnicity',
    'current_status': 'Infection Status'
})
# Drop "Unkown" race from dataset
cdc_df = cdc_df.loc[cdc_df['Race']!= 'Unknown',:]
cdc_df['Infections'] = 1
cdc_df['Race'] = cdc_df['Race'].replace('White', 'Caucasian')
cdc_df['Race'] = cdc_df['Race'].replace('Black', 'African American')
cdc_df['Race'] = cdc_df['Race'].replace('American Indian/Alaska Native', 'Native American')
cdc_df['Race'] = cdc_df['Race'].replace('Multiple/Other', 'Other')

cdc_df


Unnamed: 0,Year-Month,State,County,Age Group,Sex,Race,Ethnicity,Infection Status,Infections
0,2021-11,MN,STEELE,18 to 49 years,Male,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
2,2021-11,MN,STEARNS,50 to 64 years,Female,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
3,2020-05,MN,KANDIYOHI,18 to 49 years,Male,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
4,2021-05,MN,ANOKA,18 to 49 years,Male,,,Laboratory-confirmed case,1
5,2020-04,MN,CLAY,65+ years,Female,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
...,...,...,...,...,...,...,...,...,...
539745,2022-01,MN,SCOTT,50 to 64 years,Male,Asian,Non-Hispanic/Latino,Laboratory-confirmed case,1
539746,2020-11,MN,GOODHUE,18 to 49 years,Female,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
539747,2020-11,MN,DAKOTA,18 to 49 years,Male,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
539748,2022-01,MN,DAKOTA,50 to 64 years,Male,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1


### Update race column. Replace caucasian with hispanic where ethnicity is hispanic

In [165]:
cdc_df['Race2'] = np.where(np.logical_and(cdc_df['Race'] == 'Caucasian', cdc_df['Ethnicity'] == 'Hispanic/Latino'), 'Hispanic', cdc_df['Race'])
cdc_df = cdc_df[['Year-Month', 'State', 'County', 'Age Group', 'Sex', 'Race2', 'Ethnicity', 'Infection Status', 'Infections']]
cdc_df

Unnamed: 0,Year-Month,State,County,Age Group,Sex,Race2,Ethnicity,Infection Status,Infections
0,2021-11,MN,STEELE,18 to 49 years,Male,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
2,2021-11,MN,STEARNS,50 to 64 years,Female,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
3,2020-05,MN,KANDIYOHI,18 to 49 years,Male,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
4,2021-05,MN,ANOKA,18 to 49 years,Male,,,Laboratory-confirmed case,1
5,2020-04,MN,CLAY,65+ years,Female,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
...,...,...,...,...,...,...,...,...,...
539745,2022-01,MN,SCOTT,50 to 64 years,Male,Asian,Non-Hispanic/Latino,Laboratory-confirmed case,1
539746,2020-11,MN,GOODHUE,18 to 49 years,Female,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
539747,2020-11,MN,DAKOTA,18 to 49 years,Male,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1
539748,2022-01,MN,DAKOTA,50 to 64 years,Male,Caucasian,Non-Hispanic/Latino,Laboratory-confirmed case,1


### Drop NaN and compile totals by county and race

In [169]:
# Drop NaN
cdc_group_df = cdc_df[['State', 'County', 'Age Group', 'Sex', 'Race2', 'Ethnicity', 'Infection Status', 'Infections']]
cdc_group_df = cdc_group_df.dropna()

# County totals by race
cdc_total_group = cdc_group_df.groupby(["County", "Race2"])
county_totals_df = cdc_total_group.sum()
county_totals_df


Unnamed: 0_level_0,Unnamed: 1_level_0,Infections
County,Race2,Unnamed: 2_level_1
ANOKA,African American,2864
ANOKA,Asian,1732
ANOKA,Caucasian,24501
ANOKA,Hispanic,746
ANOKA,Native American,73
...,...,...
WRIGHT,African American,107
WRIGHT,Asian,49
WRIGHT,Caucasian,10656
WRIGHT,Hispanic,109


### Create poverty DataFrame based on US Census

In [187]:
# Poverty DataFrame By Race - 2020

url = "https://api.census.gov/data/2020/acs/acs5?get=NAME,B17001_002E,B17001A_002E,B17001B_002E,B17001C_002E,B17001D_002E,B17001E_002E,B17001F_002E,B17001G_002E,B17001H_002E,B17001I_002E&for=county:*&in=state:27&key={0}".format(census_key)
response = requests.request("GET", url)
response
poverty_df = json_to_dataframe(response)

# Split NAME into county & state
name = poverty_df['NAME'].str.split(",", n=1, expand=True)
COUNTY = poverty_df['County']=name[0]
STATE = poverty_df['State']=name[1]
poverty_df.drop(columns=["NAME"], inplace=True)
poverty_df['Caucasian2'] = poverty_df['B17001A_002E'].astype(int) - poverty_df['B17001I_002E'].astype(int)
poverty_df['Other'] = poverty_df['B17001F_002E'].astype(int) + poverty_df['B17001G_002E'].astype(int)
# Rename columns to something intelligible
poverty_df = poverty_df[['County', 'State','B17001_002E','Caucasian2', 'B17001B_002E','B17001C_002E','B17001D_002E','B17001E_002E','Other','B17001I_002E']]

poverty_df = poverty_df.rename(columns={
    'B17001_002E':'Poverty Total',
    'Caucasian2':'Caucasian',
    'B17001B_002E':'African American',
    'B17001C_002E':'Native American',
    'B17001D_002E':'Asian',
    'B17001E_002E':'Native Hawaiian',
    'B17001I_002E':'Hispanic' 
    })

# # Remove ...County from County in dataframe
poverty_df['County'] = poverty_df['County'].replace('County', '', regex=True)

# # Upper case for County
poverty_df['County'] = poverty_df['County'].str.upper()

poverty_df['Year'] = 2020
poverty_df.to_csv("../Project_1/population_in_poverty.csv")


### Create population DataFrame by county and race

In [189]:
# Population by county, by race
url = "https://api.census.gov/data/2020/acs/acs5?get=NAME,B03002_001E,B03002_003E,B03002_004E,B03002_005E,B03002_006E,B03002_007E,B03002_008E,B03002_009E,B03002_012E&for=county:*&in=state:27&key={0}".format(census_key)


response = requests.request("GET", url)
response
response_df = json_to_dataframe(response)
response_df = response_df.rename(columns={
    'B03002_001E': 'Population Total',
    'B03002_003E': 'Caucasian',
    'B03002_004E': 'African American',
    'B03002_005E': 'Native American',
    'B03002_006E': 'Asian',
    'B03002_007E': 'Hawaiian',
    'B03002_012E': 'Hispanic'
})
response_df['Other'] = response_df['B03002_008E'].astype(int) + response_df['B03002_009E'].astype(int)
mn_pop_df = response_df

# Split NAME into county & state
name = mn_pop_df['NAME'].str.split(",", n=1, expand=True)
COUNTY = mn_pop_df['County']=name[0]
STATE = mn_pop_df['State']=name[1]
mn_pop_df.drop(columns=["NAME"], inplace=True)

# # Remove ...County from County in dataframe
mn_pop_df['County'] = mn_pop_df['County'].replace('County', '', regex=True)

# # Upper case for County
mn_pop_df['County'] = mn_pop_df['County'].str.upper()
mn_pop_df = mn_pop_df[['County', 'Population Total', 'Caucasian', 'African American', 'Native American', 'Asian', 'Hawaiian', 'Hispanic', 'Other', 'State' ]]
mn_pop_df.to_csv("../Project_1/mn_population by race.csv")
mn_pop_df


Unnamed: 0,County,Population Total,Caucasian,African American,Native American,Asian,Hawaiian,Hispanic,Other,State
0,ANOKA,353775,283436,23534,1788,16082,86,16769,12080,Minnesota
1,BECKER,34227,29290,146,1938,209,6,738,1900,Minnesota
2,BELTRAMI,46784,33506,399,8920,295,36,1114,2514,Minnesota
3,BENTON,40476,35979,1876,49,378,0,1086,1108,Minnesota
4,BIG STONE,4974,4787,3,3,0,0,102,79,Minnesota
...,...,...,...,...,...,...,...,...,...,...
82,RENVILLE,14572,12741,67,186,108,0,1268,202,Minnesota
83,ROSEAU,15259,13876,124,254,399,0,223,383,Minnesota
84,SHERBURNE,96015,86342,2658,403,1206,0,2577,2829,Minnesota
85,STEELE,36710,31640,1225,119,150,6,2913,657,Minnesota


### Coordinates for heat map -- if time permits

In [55]:
# Get the coordinates of counties in MN
url = "https://en.wikipedia.org/wiki/User:Michael_J/County_table"

table = pd.read_html(url)
df = table[0]
Counties = df.loc[df['State'] == "MN", : ]
counties_df = Counties[['County [2]', 'Latitude', 'Longitude']]
counties_df#.to_csv("../Project_1/county_geo.csv")


Unnamed: 0,County [2],Latitude,Longitude
1314,Aitkin,+46.602446°,–93.419760°
1315,Anoka,+45.274110°,–93.242723°
1316,Becker,+46.937629°,–95.741757°
1317,Beltrami,+47.878825°,–94.986698°
1318,Benton,+45.701227°,–94.001440°
...,...,...,...
1396,Watonwan,+43.978366°,–94.614128°
1397,Wilkin,+46.362335°,–96.476657°
1398,Winona,+43.982268°,–91.776708°
1399,Wright,+45.175091°,–93.966397°


In [191]:
county_totals_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Infections
County,Race2,Unnamed: 2_level_1
ANOKA,African American,2864
ANOKA,Asian,1732
ANOKA,Caucasian,24501
ANOKA,Hispanic,746
ANOKA,Native American,73
...,...,...
WRIGHT,African American,107
WRIGHT,Asian,49
WRIGHT,Caucasian,10656
WRIGHT,Hispanic,109


In [196]:
merged_df = pd.merge(mn_pop_df, poverty_df, how="left", on="County")
del merged_df['State_x']
del merged_df['State_y']
merged_df = merged_df.rename(columns={
    'Caucasian_x': 'Caucasian Pop',
    'African American_x': 'African American Pop',
    'Native American_x': 'Native American Pop',
    'Hawaiian': 'Hawaiian Pop',
    'Asian_x': 'Asian Pop',
    'Hispanic_x': 'Hispanic Pop',
    'Other_x': 'Other Pop',
    'Caucasian_y': 'Caucasian Pov',
    'African American_y': 'African American Pov',
    'Native American_y': 'Native American Pov',
    'Asian_y': 'Asian Pov',
    'Native Hawaiian': 'Hawaiian Pov',
    'Other_y': 'Other Pov',
    'Hispanic_y': 'Hispanic Pov'
})
merged_df

Unnamed: 0,County,Population Total,Caucasian Pop,African American Pop,Native American Pop,Asian Pop,Hawaiian Pop,Hispanic Pop,Other Pop,Poverty Total,Caucasian Pov,African American Pov,Native American Pov,Asian Pov,Hawaiian Pov,Other Pov,Hispanic Pov,Year
0,ANOKA,353775,283436,23534,1788,16082,86,16769,12080,20270,11778,3697,327,953,0,1964,1551,2020
1,BECKER,34227,29290,146,1938,209,6,738,1900,3588,2296,17,709,45,0,381,140,2020
2,BELTRAMI,46784,33506,399,8920,295,36,1114,2514,8167,3657,310,3056,39,0,734,371,2020
3,BENTON,40476,35979,1876,49,378,0,1086,1108,3357,2605,333,19,73,0,205,122,2020
4,BIG STONE,4974,4787,3,3,0,0,102,79,596,596,0,0,0,0,0,0,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,RENVILLE,14572,12741,67,186,108,0,1268,202,1373,717,29,97,8,0,245,277,2020
83,ROSEAU,15259,13876,124,254,399,0,223,383,1133,971,6,67,11,0,28,50,2020
84,SHERBURNE,96015,86342,2658,403,1206,0,2577,2829,4953,2857,578,171,251,0,737,359,2020
85,STEELE,36710,31640,1225,119,150,6,2913,657,2887,1835,426,113,3,0,149,361,2020
