### Gather dependencies

In [1]:
# Dependencies
import pandas as pd
import numpy as np
import json
import matplotlib
# from sodapy import Socrata
from config import census_key
import pprint
import requests
import csv

### Function for quickly calling a DataFrame

In [2]:
# Function for quickly assempling a DataFrame
def json_to_dataframe(response):
    return pd.DataFrame(response.json()[1:], columns=response.json()[0])

### Create DataFrame for CDC Data

In [43]:
# CDC DataFrame by county in Minnesota
cdc_data = pd.read_csv("../Project_1/cdc_data.csv")
cdc_data
cdc_df = pd.DataFrame(cdc_data)
# cdc_df.columns
cdc_df = cdc_df[['case_month', 'res_state', 'res_county', 'age_group', 'sex', 'race', 'ethnicity', 'current_status']]
cdc_df = cdc_df.rename(columns={
    'res_state': 'State',
    'res_county': 'County',
    'ethnicity': 'Ethnicity',
    'current_status': 'Infection Status',
    'race':'Race'
})
# Drop "Unkown" race from dataset
cdc_df = cdc_df.loc[cdc_df['Race']!= 'Unknown',:]
cdc_df['Infections'] = 1
cdc_df['Race'] = cdc_df['Race'].replace('White', 'Caucasian')
cdc_df['Race'] = cdc_df['Race'].replace('Black', 'African American')
cdc_df['Race'] = cdc_df['Race'].replace('American Indian/Alaska Native', 'Native American')
cdc_df['Race'] = cdc_df['Race'].replace('Multiple/Other', 'Other')
cdc_df['Race2'] = np.where(np.logical_and(cdc_df['Race'] == 'Caucasian', cdc_df['Ethnicity'] == 'Hispanic/Latino'), 'Hispanic', cdc_df['Race'])
cdc_df = cdc_df[['County','Race2','Infections']]
cdc_df

Unnamed: 0,County,Race2,Infections
0,STEELE,Caucasian,1
2,STEARNS,Caucasian,1
3,KANDIYOHI,Caucasian,1
4,ANOKA,,1
5,CLAY,Caucasian,1
...,...,...,...
539745,SCOTT,Asian,1
539746,GOODHUE,Caucasian,1
539747,DAKOTA,Caucasian,1
539748,DAKOTA,Caucasian,1


### Drop NaN and compile totals by county and race

In [None]:
# Drop NaN
cdc_group_df = cdc_df[['County','Race2','Infections']]
cdc_group_df = cdc_group_df.dropna()
cdc_group_df
cdc_group_df.append(pd.DataFrame(cdc_group_df.sum()['Infections'], index=["Total"], columns=["Infections"]))
cdc_group_df
# County totals by race
# cdc_county_totals_df = cdc_group_df.groupby(["County", "Race2"], as_index=False)['Infections'].sum()
# cdc_county_totals_df

# cdc_pvt = pd.pivot_table(cdc_county_totals_df,index=['County'],columns='Race2',values='Infections',aggfunc=sum,fill_value=0)
# cdc_pvt


### Create poverty DataFrame based on US Census

In [18]:
# Poverty DataFrame By Race - 2020

url = "https://api.census.gov/data/2020/acs/acs5?get=NAME,B17001_002E,B17001A_002E,B17001B_002E,B17001C_002E,B17001D_002E,B17001E_002E,B17001F_002E,B17001G_002E,B17001H_002E,B17001I_002E&for=county:*&in=state:27&key={0}".format(census_key)
response = requests.request("GET", url)
response
poverty_df = json_to_dataframe(response)

# Split NAME into county & state
name = poverty_df['NAME'].str.split(",", n=1, expand=True)
COUNTY = poverty_df['County']=name[0]
STATE = poverty_df['State']=name[1]
poverty_df.drop(columns=["NAME"], inplace=True)
poverty_df['Caucasian2'] = poverty_df['B17001A_002E'].astype(int) - poverty_df['B17001I_002E'].astype(int)
poverty_df['Other'] = poverty_df['B17001F_002E'].astype(int) + poverty_df['B17001G_002E'].astype(int)
# Rename columns to something intelligible
poverty_df = poverty_df[['County', 'State','B17001_002E','Caucasian2', 'B17001B_002E','B17001C_002E','B17001D_002E','B17001E_002E','Other','B17001I_002E']]

poverty_df = poverty_df.rename(columns={
    'B17001_002E':'Poverty Total',
    'Caucasian2':'Caucasian',
    'B17001B_002E':'African American',
    'B17001C_002E':'Native American',
    'B17001D_002E':'Asian',
    'B17001E_002E':'Native Hawaiian',
    'B17001I_002E':'Hispanic' 
    })

# # Remove ...County from County in dataframe
poverty_df['County'] = poverty_df['County'].replace('County', '', regex=True)

# # Upper case for County
poverty_df['County'] = poverty_df['County'].str.upper()

poverty_df['Year'] = 2020
#poverty_df.to_csv("../Project_1/population_in_poverty.csv")
poverty_df


Unnamed: 0,County,State,Poverty Total,Caucasian,African American,Native American,Asian,Native Hawaiian,Other,Hispanic,Year
0,ANOKA,Minnesota,20270,11778,3697,327,953,0,1964,1551,2020
1,BECKER,Minnesota,3588,2296,17,709,45,0,381,140,2020
2,BELTRAMI,Minnesota,8167,3657,310,3056,39,0,734,371,2020
3,BENTON,Minnesota,3357,2605,333,19,73,0,205,122,2020
4,BIG STONE,Minnesota,596,596,0,0,0,0,0,0,2020
...,...,...,...,...,...,...,...,...,...,...,...
82,RENVILLE,Minnesota,1373,717,29,97,8,0,245,277,2020
83,ROSEAU,Minnesota,1133,971,6,67,11,0,28,50,2020
84,SHERBURNE,Minnesota,4953,2857,578,171,251,0,737,359,2020
85,STEELE,Minnesota,2887,1835,426,113,3,0,149,361,2020


### Create population DataFrame by county and race

In [19]:
# Population by county, by race
url = "https://api.census.gov/data/2020/acs/acs5?get=NAME,B03002_001E,B03002_003E,B03002_004E,B03002_005E,B03002_006E,B03002_007E,B03002_008E,B03002_009E,B03002_012E&for=county:*&in=state:27&key={0}".format(census_key)


response = requests.request("GET", url)
response
response_df = json_to_dataframe(response)
response_df = response_df.rename(columns={
    'B03002_001E': 'Population Total',
    'B03002_003E': 'Caucasian',
    'B03002_004E': 'African American',
    'B03002_005E': 'Native American',
    'B03002_006E': 'Asian',
    'B03002_007E': 'Hawaiian',
    'B03002_012E': 'Hispanic'
})
response_df['Other'] = response_df['B03002_008E'].astype(int) + response_df['B03002_009E'].astype(int)
mn_pop_df = response_df

# Split NAME into county & state
name = mn_pop_df['NAME'].str.split(",", n=1, expand=True)
COUNTY = mn_pop_df['County']=name[0]
STATE = mn_pop_df['State']=name[1]
mn_pop_df.drop(columns=["NAME"], inplace=True)

# # Remove ...County from County in dataframe
mn_pop_df['County'] = mn_pop_df['County'].replace('County', '', regex=True)

# # Upper case for County
mn_pop_df['County'] = mn_pop_df['County'].str.upper()
mn_pop_df = mn_pop_df[['County', 'Population Total', 'Caucasian', 'African American', 'Native American', 'Asian', 'Hawaiian', 'Hispanic', 'Other']]
mn_pop_df.to_csv("../Project_1/mn_population by race.csv")
mn_pop_df


Unnamed: 0,County,Population Total,Caucasian,African American,Native American,Asian,Hawaiian,Hispanic,Other
0,ANOKA,353775,283436,23534,1788,16082,86,16769,12080
1,BECKER,34227,29290,146,1938,209,6,738,1900
2,BELTRAMI,46784,33506,399,8920,295,36,1114,2514
3,BENTON,40476,35979,1876,49,378,0,1086,1108
4,BIG STONE,4974,4787,3,3,0,0,102,79
...,...,...,...,...,...,...,...,...,...
82,RENVILLE,14572,12741,67,186,108,0,1268,202
83,ROSEAU,15259,13876,124,254,399,0,223,383
84,SHERBURNE,96015,86342,2658,403,1206,0,2577,2829
85,STEELE,36710,31640,1225,119,150,6,2913,657


### Coordinates for heat map -- if time permits

In [None]:
# Get the coordinates of counties in MN
url = "https://en.wikipedia.org/wiki/User:Michael_J/County_table"

table = pd.read_html(url)
df = table[0]
Counties = df.loc[df['State'] == "MN", : ]
counties_df = Counties[['County [2]', 'Latitude', 'Longitude']]
counties_df#.to_csv("../Project_1/county_geo.csv")  

In [35]:
population_df = pd.read_csv("../Project_1/mn_population by race.csv")
poverty_df = pd.read_csv("../Project_1/population_in_poverty.csv")
infections_df = pd.read_csv("../Project_1/pivot.csv")
all_df = pd.merge(population_df, poverty_df, how="inner", on="County")
del all_df['Unnamed: 0_x']
del all_df['Unnamed: 0_y']
del all_df['State']
del all_df['Year']
all_df.to_csv("../Project_1/all_df.csv")
df = pd.read_csv("../Project_1/all_df.csv")
df.dtypes
all2_df = pd.merge(df, infections_df, how="left", on="County")

all2_df['African American']=all2_df['African American'].astype('Int64')
all2_df['Asian']=all2_df['Asian'].astype('Int64')
all2_df['Caucasian']=all2_df['Caucasian'].astype('Int64')
all2_df['Hispanic']=all2_df['Hispanic'].astype('Int64')
all2_df['Native American']=all2_df['Native American'].astype('Int64')
all2_df['Other']=all2_df['Other'].astype('Int64')
all2_df.dtypes
#  result['D_x']=result['D_x'].astype('Int64')
#  result['R1_x']=result['R1_x'].astype('Int64')
#  result['R2_x']=result['R2_x'].astype('Int64')
#  result['D_y']=result['D_y'].astype('Int64')
#  result['R1_y']=result['R1_y'].astype('Int64')
#  result['R2_y']=result['R2_y'].astype('Int64')

Unnamed: 0             int64
County                object
Population Total       int64
Caucasian_x            int64
African American_x     int64
Native American_x      int64
Asian_x                int64
Hawaiian               int64
Hispanic_x             int64
Other_x                int64
Poverty Total          int64
Caucasian_y            int64
African American_y     int64
Native American_y      int64
Asian_y                int64
Native Hawaiian        int64
Other_y                int64
Hispanic_y             int64
African American       Int64
Asian                  Int64
Caucasian              Int64
Hispanic               Int64
Native American        Int64
Other                  Int64
dtype: object