### Gather dependencies

In [None]:
# Dependencies
import pandas as pd
import numpy as np
import json
import matplotlib
# from sodapy import Socrata
from config import census_key
import pprint
import requests
import csv

### Function for quickly calling a DataFrame

In [None]:
# Function for quickly assempling a DataFrame
def json_to_dataframe(response):
    return pd.DataFrame(response.json()[1:], columns=response.json()[0])

### Create DataFrame for CDC Data

In [None]:
# CDC DataFrame by county in Minnesota
cdc_data = pd.read_csv("../Project_1/cdc_data.csv")
cdc_data
cdc_df = pd.DataFrame(cdc_data)
# cdc_df.columns
cdc_df = cdc_df[['case_month', 'res_state', 'res_county', 'age_group', 'sex', 'race', 'ethnicity', 'current_status']]
cdc_df = cdc_df.rename(columns={
    'case_month': 'Year-Month',
    'res_state': 'State',
    'res_county': 'County',
    'age_group': 'Age Group',
    'race': 'Race',
    'sex': 'Sex',
    'ethnicity': 'Ethnicity',
    'current_status': 'Infection Status'
})
# Drop "Unkown" race from dataset
cdc_df = cdc_df.loc[cdc_df['Race']!= 'Unknown',:]
cdc_df['Infections'] = 1
cdc_df['Race'] = cdc_df['Race'].replace('White', 'Caucasian')
cdc_df['Race'] = cdc_df['Race'].replace('Black', 'African American')
cdc_df['Race'] = cdc_df['Race'].replace('American Indian/Alaska Native', 'Native American')
cdc_df['Race'] = cdc_df['Race'].replace('Multiple/Other', 'Other')

cdc_df


### Update race column. Replace caucasian with hispanic where ethnicity is hispanic

In [None]:
cdc_df['Race2'] = np.where(np.logical_and(cdc_df['Race'] == 'Caucasian', cdc_df['Ethnicity'] == 'Hispanic/Latino'), 'Hispanic', cdc_df['Race'])
cdc_df = cdc_df[['Year-Month', 'State', 'County', 'Age Group', 'Sex', 'Race2', 'Ethnicity', 'Infection Status', 'Infections']]
cdc_df

### Drop NaN and compile totals by county and race

In [None]:
# Drop NaN
cdc_group_df = cdc_df[['State', 'County', 'Age Group', 'Sex', 'Race2', 'Ethnicity', 'Infection Status', 'Infections']]
cdc_group_df = cdc_group_df.dropna()

# County totals by race
cdc_total_group = cdc_group_df.groupby(["County", "Race2"])
county_totals_df = cdc_total_group.sum()
county_totals_df


### Create poverty DataFrame based on US Census

In [None]:
# Poverty DataFrame By Race - 2020

url = "https://api.census.gov/data/2020/acs/acs5?get=NAME,B17001_002E,B17001A_002E,B17001B_002E,B17001C_002E,B17001D_002E,B17001E_002E,B17001F_002E,B17001G_002E,B17001H_002E,B17001I_002E&for=county:*&in=state:27&key={0}".format(census_key)
response = requests.request("GET", url)
response
poverty_df = json_to_dataframe(response)

# Split NAME into county & state
name = poverty_df['NAME'].str.split(",", n=1, expand=True)
COUNTY = poverty_df['County']=name[0]
STATE = poverty_df['State']=name[1]
poverty_df.drop(columns=["NAME"], inplace=True)
poverty_df['Caucasian2'] = poverty_df['B17001A_002E'].astype(int) - poverty_df['B17001I_002E'].astype(int)
poverty_df['Other'] = poverty_df['B17001F_002E'].astype(int) + poverty_df['B17001G_002E'].astype(int)
# Rename columns to something intelligible
poverty_df = poverty_df[['County', 'State','B17001_002E','Caucasian2', 'B17001B_002E','B17001C_002E','B17001D_002E','B17001E_002E','Other','B17001I_002E']]

poverty_df = poverty_df.rename(columns={
    'B17001_002E':'Poverty Total',
    'Caucasian2':'Caucasian',
    'B17001B_002E':'African American',
    'B17001C_002E':'Native American',
    'B17001D_002E':'Asian',
    'B17001E_002E':'Native Hawaiian',
    'B17001I_002E':'Hispanic' 
    })

# # Remove ...County from County in dataframe
poverty_df['County'] = poverty_df['County'].replace('County', '', regex=True)

# # Upper case for County
poverty_df['County'] = poverty_df['County'].str.upper()

poverty_df['Year'] = 2020
poverty_df.to_csv("../Project_1/population_in_poverty.csv")


### Create population DataFrame by county and race

In [None]:
# Population by county, by race
url = "https://api.census.gov/data/2020/acs/acs5?get=NAME,B03002_001E,B03002_003E,B03002_004E,B03002_005E,B03002_006E,B03002_007E,B03002_008E,B03002_009E,B03002_012E&for=county:*&in=state:27&key={0}".format(census_key)


response = requests.request("GET", url)
response
response_df = json_to_dataframe(response)
response_df = response_df.rename(columns={
    'B03002_001E': 'Population Total',
    'B03002_003E': 'Caucasian',
    'B03002_004E': 'African American',
    'B03002_005E': 'Native American',
    'B03002_006E': 'Asian',
    'B03002_007E': 'Hawaiian',
    'B03002_012E': 'Hispanic'
})
response_df['Other'] = response_df['B03002_008E'].astype(int) + response_df['B03002_009E'].astype(int)
mn_pop_df = response_df

# Split NAME into county & state
name = mn_pop_df['NAME'].str.split(",", n=1, expand=True)
COUNTY = mn_pop_df['County']=name[0]
STATE = mn_pop_df['State']=name[1]
mn_pop_df.drop(columns=["NAME"], inplace=True)

# # Remove ...County from County in dataframe
mn_pop_df['County'] = mn_pop_df['County'].replace('County', '', regex=True)

# # Upper case for County
mn_pop_df['County'] = mn_pop_df['County'].str.upper()
mn_pop_df = mn_pop_df[['County', 'Population Total', 'Caucasian', 'African American', 'Native American', 'Asian', 'Hawaiian', 'Hispanic', 'Other', 'State' ]]
mn_pop_df.to_csv("../Project_1/mn_population by race.csv")
mn_pop_df


### Coordinates for heat map -- if time permits

In [None]:
# Get the coordinates of counties in MN
url = "https://en.wikipedia.org/wiki/User:Michael_J/County_table"

table = pd.read_html(url)
df = table[0]
Counties = df.loc[df['State'] == "MN", : ]
counties_df = Counties[['County [2]', 'Latitude', 'Longitude']]
counties_df#.to_csv("../Project_1/county_geo.csv")


### County totals

In [None]:
county_totals_df

### Merge CDC, population count, and poverty count

In [None]:
merged_df = pd.merge(mn_pop_df, poverty_df, how="left", on="County")
del merged_df['State_x']
del merged_df['State_y']
merged_df = merged_df.rename(columns={
    'Caucasian_x': 'Caucasian Pop',
    'African American_x': 'African American Pop',
    'Native American_x': 'Native American Pop',
    'Hawaiian': 'Hawaiian Pop',
    'Asian_x': 'Asian Pop',
    'Hispanic_x': 'Hispanic Pop',
    'Other_x': 'Other Pop',
    'Caucasian_y': 'Caucasian Pov',
    'African American_y': 'African American Pov',
    'Native American_y': 'Native American Pov',
    'Asian_y': 'Asian Pov',
    'Native Hawaiian': 'Hawaiian Pov',
    'Other_y': 'Other Pov',
    'Hispanic_y': 'Hispanic Pov'
})
merged_df