## Feature Exploration & Cleaning: ICU Capacity, by County

Data From Kaiser Foundation 

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import sqlite3
from sklearn.impute import SimpleImputer, MissingIndicator
from sklearn_pandas import DataFrameMapper

In [2]:
conn=sqlite3.connect('COVID19_county_data.db')
cursor= conn.cursor()

In [3]:
#Create function to query SQL data
def query_data(sql_statement):
    df=pd.read_sql(sql_statement, conn)
    #cursor.execute(sql_statement)
    return df.to_dict('records')

In [5]:
df=pd.read_csv('ICU_Population_County_Data.csv') #

In [6]:
df.head()

Unnamed: 0,State,County,ICU Beds,Total Population,Population Aged 60+,Percent of Population Aged 60+,Residents Aged 60+ Per Each ICU Bed
0,Alabama,Autauga,6,55036,10523,19.1,1754.0
1,Alabama,Baldwin,51,203360,53519,26.3,1049.0
2,Alabama,Barbour,5,26201,6150,23.5,1230.0
3,Alabama,Bibb,0,22580,4773,21.1,
4,Alabama,Blount,6,57667,13600,23.6,2267.0


In [7]:
df.isna().sum()

State                                     0
County                                    0
ICU Beds                                  0
Total Population                          0
Population Aged 60+                       0
Percent of Population Aged 60+            0
Residents Aged 60+ Per Each ICU Bed    1666
dtype: int64

In [8]:
#Applies transformer and replaces NaN with mean value
mapper=DataFrameMapper([
    (['Residents Aged 60+ Per Each ICU Bed'],[SimpleImputer()]),
    (['Residents Aged 60+ Per Each ICU Bed'],[MissingIndicator()], {'alias': 'Residents Aged 60+ Per Each ICU Bed_isna'})
    
], default=None, df_out=True)

In [9]:
clean_df=mapper.fit_transform(df)

In [10]:
clean_df.head()

Unnamed: 0,Residents Aged 60+ Per Each ICU Bed,Residents Aged 60+ Per Each ICU Bed_isna,State,County,ICU Beds,Total Population,Population Aged 60+,Percent of Population Aged 60+
0,1754.0,False,Alabama,Autauga,6,55036,10523,19.1
1,1049.0,False,Alabama,Baldwin,51,203360,53519,26.3
2,1230.0,False,Alabama,Barbour,5,26201,6150,23.5
3,1299.560298,True,Alabama,Bibb,0,22580,4773,21.1
4,2267.0,False,Alabama,Blount,6,57667,13600,23.6


In [11]:
clean_df.isna().sum()

Residents Aged 60+ Per Each ICU Bed         0
Residents Aged 60+ Per Each ICU Bed_isna    0
State                                       0
County                                      0
ICU Beds                                    0
Total Population                            0
Population Aged 60+                         0
Percent of Population Aged 60+              0
dtype: int64

In [12]:
clean_records=clean_df.to_dict('records')

In [14]:
county_state=[f"{record['County'], record['State'] }" for record in clean_records]

In [15]:
county_state[0]

"('Autauga', 'Alabama')"

In [16]:
clean_df['County_State']=county_state

In [17]:
clean_df.head()

Unnamed: 0,Residents Aged 60+ Per Each ICU Bed,Residents Aged 60+ Per Each ICU Bed_isna,State,County,ICU Beds,Total Population,Population Aged 60+,Percent of Population Aged 60+,County_State
0,1754.0,False,Alabama,Autauga,6,55036,10523,19.1,"('Autauga', 'Alabama')"
1,1049.0,False,Alabama,Baldwin,51,203360,53519,26.3,"('Baldwin', 'Alabama')"
2,1230.0,False,Alabama,Barbour,5,26201,6150,23.5,"('Barbour', 'Alabama')"
3,1299.560298,True,Alabama,Bibb,0,22580,4773,21.1,"('Bibb', 'Alabama')"
4,2267.0,False,Alabama,Blount,6,57667,13600,23.6,"('Blount', 'Alabama')"


In [18]:
clean_df.to_sql('icu_capacity', conn, index_label='id', if_exists='replace')

  dtype=dtype)


In [19]:
new_df=pd.DataFrame(query_data('SELECT * FROM icu_capacity'))

In [20]:
new_df.head()

Unnamed: 0,County,County_State,ICU Beds,Percent of Population Aged 60+,Population Aged 60+,Residents Aged 60+ Per Each ICU Bed,Residents Aged 60+ Per Each ICU Bed_isna,State,Total Population,id
0,Autauga,"('Autauga', 'Alabama')",6,19.1,10523,1754.0,0,Alabama,55036,0
1,Baldwin,"('Baldwin', 'Alabama')",51,26.3,53519,1049.0,0,Alabama,203360,1
2,Barbour,"('Barbour', 'Alabama')",5,23.5,6150,1230.0,0,Alabama,26201,2
3,Bibb,"('Bibb', 'Alabama')",0,21.1,4773,1299.560298,1,Alabama,22580,3
4,Blount,"('Blount', 'Alabama')",6,23.6,13600,2267.0,0,Alabama,57667,4


In [21]:
FIPS_df=pd.DataFrame(query_data('SELECT * FROM fips_codes'))
FIPS_df.head()

Unnamed: 0,County,County_State,FIPS,State,id
0,Autauga,"('Autauga', 'Alabama')",1001,Alabama,0
1,Baldwin,"('Baldwin', 'Alabama')",1003,Alabama,111
2,Barbour,"('Barbour', 'Alabama')",1005,Alabama,222
3,Bibb,"('Bibb', 'Alabama')",1007,Alabama,333
4,Blount,"('Blount', 'Alabama')",1009,Alabama,444


In [23]:
icu_fips_df=pd.DataFrame(query_data('''SELECT fips_codes.FIPS, icu_capacity.*  FROM icu_capacity 
                JOIN fips_codes ON fips_codes.County_State=icu_capacity.County_State
               '''))

In [24]:
icu_fips_df.head()

Unnamed: 0,County,County_State,FIPS,ICU Beds,Percent of Population Aged 60+,Population Aged 60+,Residents Aged 60+ Per Each ICU Bed,Residents Aged 60+ Per Each ICU Bed_isna,State,Total Population,id
0,Autauga,"('Autauga', 'Alabama')",1001,6,19.1,10523,1754.0,0,Alabama,55036,0
1,Baldwin,"('Baldwin', 'Alabama')",1003,51,26.3,53519,1049.0,0,Alabama,203360,1
2,Barbour,"('Barbour', 'Alabama')",1005,5,23.5,6150,1230.0,0,Alabama,26201,2
3,Bibb,"('Bibb', 'Alabama')",1007,0,21.1,4773,1299.560298,1,Alabama,22580,3
4,Blount,"('Blount', 'Alabama')",1009,6,23.6,13600,2267.0,0,Alabama,57667,4


In [27]:
icu_fips_df['FIPS'] = icu_fips_df['FIPS'].apply(lambda x: str(int(x)).zfill(5))

In [28]:
icu_fips_df.drop(columns='id').to_sql('icu_capacity', conn, index_label='id', if_exists='replace')