# Chicago Dataset

In [4]:
# Import dependencies
import os
import csv
import pandas as pd
import openpyxl

## Demographic Metric

**Measuring**: ACS Population Density, Median Age, Education Level

Data was pulled from the 2020 Chicago Census Database and the American Community Survey (ACS) by the U.S. Census Bureau using various coded tables to ensure unified data for each metropolitan city. Two tables used the same code and were seperated to ensure proper measurement. 
* ACS Population Density: Table DP05
* Median Age: Table DP05
* Education Level: S1501

#### ACS Population Density & Median Age (Table DP05)

In [None]:
# Import csv file Chicago ACS Population Density 
chicago_poulation_data = '../chicago_data/chi_ACS_pop_density.csv'

# Read csv file into DataFrame
chicago_population_df = pd.read_csv(chicago_poulation_data)
chicago_population_df.head()

Unnamed: 0,Label (Grouping),Illinois!!Estimate,Illinois!!Margin of Error,Illinois!!Percent,Illinois!!Percent Margin of Error,"Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Margin of Error"
0,SEX AND AGE,,,,,,,,
1,Total population,12716164.0,*****,12716164,(X),9478801.0,*****,9478801,(X)
2,Male,6247083.0,"±1,174",49.1%,±0.1,4640471.0,±544,49.0%,±0.1
3,Female,6469081.0,"±1,174",50.9%,±0.1,4838330.0,±544,51.0%,±0.1
4,Sex ratio (males per 100 females),96.6,±0.1,(X),(X),95.9,±0.1,(X),(X)


In [None]:
# Display columns and data types
chicago_population_df.dtypes

Label (Grouping)                                                          object
Illinois!!Estimate                                                        object
Illinois!!Margin of Error                                                 object
Illinois!!Percent                                                         object
Illinois!!Percent Margin of Error                                         object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Estimate                   object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Margin of Error            object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent                    object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Margin of Error    object
dtype: object

In [10]:
# Drop unnecessary columns (i.e anything with "Margin of Error")
chicago_population_df = chicago_population_df.drop(columns=['Illinois!!Margin of Error', 
                                                            'Illinois!!Percent Margin of Error', 
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Margin of Error'])


In [11]:
# Rename columns for clarity
chicago_population_df = chicago_population_df.rename(columns={
    'Illinois!!Estimate': 'Illinois Total Population',
    'Illinois!!Percent': 'Illinois %',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Estimate': 'Chicago Metro Total Population',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent': 'Chicago Metro %'
})

chicago_population_df.head()

Unnamed: 0,Label (Grouping),Illinois Total Population,Illinois %,Chicago Metro Total Population,Chicago Metro %
0,SEX AND AGE,,,,
1,Total population,12716164.0,12716164,9478801.0,9478801
2,Male,6247083.0,49.1%,4640471.0,49.0%
3,Female,6469081.0,50.9%,4838330.0,51.0%
4,Sex ratio (males per 100 females),96.6,(X),95.9,(X)


In [64]:
# Save the cleaned DataFrame to a new CSV file
output_csv_path = '../chicago_data/cleaned_chicago_population_density.csv'

#### Education Level (Table S1501)

In [16]:
# Import csv file for education data 
chicago_edu_data = '../chicago_data/chi_edu_level.csv'

# Read the CSV file into a DataFrame
chicago_edu_df = pd.read_csv(chicago_edu_data)
chicago_edu_df.head()


Unnamed: 0,Label (Grouping),Illinois!!Total!!Estimate,Illinois!!Total!!Margin of Error,Illinois!!Percent!!Estimate,Illinois!!Percent!!Margin of Error,Illinois!!Male!!Estimate,Illinois!!Male!!Margin of Error,Illinois!!Percent Male!!Estimate,Illinois!!Percent Male!!Margin of Error,Illinois!!Female!!Estimate,...,"Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Male!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Male!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Female!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Female!!Margin of Error"
0,AGE BY EDUCATIONAL ATTAINMENT,,,,,,,,,,...,,,,,,,,,,
1,Population 18 to 24 years,1174031.0,±964,(X),(X),597348.0,±710,(X),(X),576683.0,...,(X),(X),433876.0,±276,(X),(X),422068.0,±448,(X),(X)
2,Less than high school graduate,138521.0,"±2,960",11.8%,±0.3,78436.0,"±2,146",13.1%,±0.4,60085.0,...,11.8%,±0.3,56432.0,"±1,730",13.0%,±0.4,44349.0,"±2,071",10.5%,±0.5
3,High school graduate (includes equival...,352546.0,"±3,753",30.0%,±0.3,197183.0,"±3,209",33.0%,±0.5,155363.0,...,30.7%,±0.4,144304.0,"±2,915",33.3%,±0.7,118237.0,"±2,477",28.0%,±0.6
4,Some college or associate's degree,513656.0,"±4,458",43.8%,±0.4,248837.0,"±3,286",41.7%,±0.5,264819.0,...,41.6%,±0.5,173489.0,"±2,826",40.0%,±0.6,182196.0,"±2,849",43.2%,±0.7


In [17]:
# Display columns and data types
chicago_edu_df.dtypes

Label (Grouping)                                                                  object
Illinois!!Total!!Estimate                                                         object
Illinois!!Total!!Margin of Error                                                  object
Illinois!!Percent!!Estimate                                                       object
Illinois!!Percent!!Margin of Error                                                object
Illinois!!Male!!Estimate                                                          object
Illinois!!Male!!Margin of Error                                                   object
Illinois!!Percent Male!!Estimate                                                  object
Illinois!!Percent Male!!Margin of Error                                           object
Illinois!!Female!!Estimate                                                        object
Illinois!!Female!!Margin of Error                                                 object
Illinois!!Percent Fem

In [18]:
# Drop unnecessary columns (i.e anything with "Margin of Error")
chicago_edu_df = chicago_edu_df.drop(columns=['Illinois!!Total!!Margin of Error',
                                              'Illinois!!Percent!!Margin of Error',
                                              'Illinois!!Male!!Margin of Error',
                                              'Illinois!!Percent Male!!Margin of Error',
                                              'Illinois!!Female!!Margin of Error',
                                              'Illinois!!Percent Female!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Male!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Female!!Margin of Error'
                                                ])

In [59]:
# Drop NaN values
chicago_edu_df = chicago_edu_df.dropna()

In [60]:
# Rename columns for clarity
chicago_edu_df = chicago_edu_df.rename(columns={
    'Label (Grouping)': 'Age by Educational Attainment',
    'Illinois!!Total!!Estimate': 'Illinois Total Population',
    'Illinois!!Percent!!Estimate': 'Illinois Pop %',
    'Illinois!!Male!!Estimate': 'Illinois Male Population',
    'Illinois!!Percent Male!!Estimate': 'Illinois Male %',
    'Illinois!!Female!!Estimate': 'Illinois Female Population',
    'Illinois!!Percent Female!!Estimate': 'Illinois Female %',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate': 'Chicago Metro Total Population',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent!!Estimate': 'Chicago Metro %',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Estimate': 'Chicago Male Population',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Male!!Estimate': 'Chicago Male %',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Estimate': 'Chicago Female Population',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Female!!Estimate': 'Chicago Female %'
})

# Display the cleaned DataFrame
chicago_edu_df.head()

Unnamed: 0,Age by Educational Attainment,Illinois Total Population,Illinois %,Illinois Male Population,Illinois Male %,Illinois Female Population,Illinois Female %,Chicago Metro Total Population,Chicago Metro %,Chicago Male Population,Chicago Male %,Chicago Female Population,Chicago Female %
1,Population 18 to 24 years,1174031,(X),597348,(X),576683,(X),855944,(X),433876,(X),422068,(X)
2,Less than high school graduate,138521,11.8%,78436,13.1%,60085,10.4%,100781,11.8%,56432,13.0%,44349,10.5%
3,High school graduate (includes equival...,352546,30.0%,197183,33.0%,155363,26.9%,262541,30.7%,144304,33.3%,118237,28.0%
4,Some college or associate's degree,513656,43.8%,248837,41.7%,264819,45.9%,355685,41.6%,173489,40.0%,182196,43.2%
5,Bachelor's degree or higher,169308,14.4%,72892,12.2%,96416,16.7%,136937,16.0%,59651,13.7%,77286,18.3%


In [None]:
# save the cleaned DataFrame to a new CSV file


#### Found: Population Count by Age and Zipcode

In [19]:
# Import new csv file
chicago_pop_counts = 'data_files/chicago_data/Chicago_Population_Counts.csv'

# Read the csv file
chicago_pop_counts_df = pd.read_csv(chicago_pop_counts)
chicago_pop_counts_df.head()

Unnamed: 0,Geography Type,Year,Geography,Population - Total,Population - Age 0-17,Population - Age 18-29,Population - Age 30-39,Population - Age 40-49,Population - Age 50-59,Population - Age 60-69,...,Population - Age 18+,Population - Age 65+,Population - Female,Population - Male,Population - Latinx,Population - Asian Non-Latinx,Population - Black Non-Latinx,Population - White Non-Latinx,Population - Other Race Non-Latinx,Record ID
0,Citywide,2018,Chicago,2705988,548999,552935,456321,336457,312965,262991,...,2156989,349712,1386113,1319875,776661,179841.0,784266.0,899980,119467.0,Citywide-Chicago-2018
1,ZIP Code,2018,60601,14675,820,4606,2792,2190,1333,1340,...,13855,2075,7484,7191,1274,,,9677,,ZIP_Code-60601-2018
2,ZIP Code,2018,60602,1244,149,435,462,135,53,10,...,1095,5,551,693,81,,,788,,ZIP_Code-60602-2018
3,ZIP Code,2018,60603,1174,56,561,101,97,197,97,...,1118,112,601,573,115,,,707,,ZIP_Code-60603-2018
4,ZIP Code,2018,60604,782,38,303,104,51,101,130,...,744,93,413,369,34,,,479,,ZIP_Code-60604-2018


In [20]:
# Clean data by droping any rows that do not have 2020 in the 'Year' column
chicago_pop_counts_2020_df = chicago_pop_counts_df[chicago_pop_counts_df['Year'] == 2020]
chicago_pop_counts_2020_df.head()

Unnamed: 0,Geography Type,Year,Geography,Population - Total,Population - Age 0-17,Population - Age 18-29,Population - Age 30-39,Population - Age 40-49,Population - Age 50-59,Population - Age 60-69,...,Population - Age 18+,Population - Age 65+,Population - Female,Population - Male,Population - Latinx,Population - Asian Non-Latinx,Population - Black Non-Latinx,Population - White Non-Latinx,Population - Other Race Non-Latinx,Record ID
120,Citywide,2020,Chicago,2699347,552668,548747,463143,336591,313865,255435,...,2146679,342174,1388469,1310878,772791,182251.0,776470.0,900055,67780.0,Citywide-Chicago-2020
121,ZIP Code,2020,60601,14513,825,4696,3048,1815,809,1974,...,13688,2605,7894,6619,1242,3528.0,679.0,8614,450.0,ZIP_CODE-60601-2020
122,ZIP Code,2020,60602,1596,115,332,860,191,81,17,...,1481,4,744,852,120,435.0,37.0,794,210.0,ZIP_CODE-60602-2020
123,ZIP Code,2020,60603,1186,15,423,248,68,176,146,...,1171,198,560,626,62,397.0,29.0,692,6.0,ZIP_CODE-60603-2020
124,ZIP Code,2020,60604,729,5,313,171,30,77,89,...,724,65,441,288,23,127.0,72.0,507,0.0,ZIP_CODE-60604-2020


In [21]:
# list all columns in the dataframe
chicago_pop_counts_2020_df.columns

Index(['Geography Type', 'Year', 'Geography', 'Population - Total',
       'Population - Age 0-17', 'Population - Age 18-29',
       'Population - Age 30-39', 'Population - Age 40-49',
       'Population - Age 50-59', 'Population - Age 60-69',
       'Population - Age 70-79', 'Population - Age 80+',
       'Population - Age 0-4', 'Population - Age 5-11',
       'Population - Age 12-17', 'Population - Age 5+', 'Population - Age 18+',
       'Population - Age 65+', 'Population - Female', 'Population - Male',
       'Population - Latinx', 'Population - Asian Non-Latinx',
       'Population - Black Non-Latinx', 'Population - White Non-Latinx',
       'Population - Other Race Non-Latinx', 'Record ID'],
      dtype='object')

In [22]:
# Rename columns to get rid of population in the title. 
chicago_pop_counts_2020_df = chicago_pop_counts_2020_df.rename(columns={'Population - Total': 'Total', 
                                                                        'Population - Age 0-17': 'Age 0-17', 
                                                                        'Population - Age 18-29': 'Age 18-29',
                                                                        'Population - Age 30-39': 'Age 30-39',
                                                                        'Population - Age 40-49': 'Age 40-49',
                                                                        'Population - Age 50-59': 'Age 50-59',
                                                                        'Population - Age 60-69': 'Age 60-69',
                                                                        'Population - Age 70-79': 'Age 70-79',
                                                                        'Population - Age 80+': 'Age 80+',
                                                                        'Population - Female': 'Female',
                                                                        'Population - Male': 'Male',
                                                                        'Population - Latinx': 'Latinx',
                                                                        'Population - Asian Non-Latinx': 'Asian Non-Latinx',
                                                                        'Population - Black Non-Latinx': 'Black Non-Latinx',
                                                                        'Population - White Non-Latinx': 'White Non-Latinx',
                                                                        'Population - Other Non-Latinx': 'Other Non-Latinx',
                                                                })
chicago_pop_counts_2020_df.head()

Unnamed: 0,Geography Type,Year,Geography,Total,Age 0-17,Age 18-29,Age 30-39,Age 40-49,Age 50-59,Age 60-69,...,Population - Age 18+,Population - Age 65+,Female,Male,Latinx,Asian Non-Latinx,Black Non-Latinx,White Non-Latinx,Population - Other Race Non-Latinx,Record ID
120,Citywide,2020,Chicago,2699347,552668,548747,463143,336591,313865,255435,...,2146679,342174,1388469,1310878,772791,182251.0,776470.0,900055,67780.0,Citywide-Chicago-2020
121,ZIP Code,2020,60601,14513,825,4696,3048,1815,809,1974,...,13688,2605,7894,6619,1242,3528.0,679.0,8614,450.0,ZIP_CODE-60601-2020
122,ZIP Code,2020,60602,1596,115,332,860,191,81,17,...,1481,4,744,852,120,435.0,37.0,794,210.0,ZIP_CODE-60602-2020
123,ZIP Code,2020,60603,1186,15,423,248,68,176,146,...,1171,198,560,626,62,397.0,29.0,692,6.0,ZIP_CODE-60603-2020
124,ZIP Code,2020,60604,729,5,313,171,30,77,89,...,724,65,441,288,23,127.0,72.0,507,0.0,ZIP_CODE-60604-2020


In [23]:
# Drop columns that are not needed ('Population - Age 0-4', 'Population - Age 5-11', 'Population - Age 12-17', 'Population - Age 5+', 'Population - Age 18+', 'Population - Age 65+', "Record ID")

chicago_pop_counts_2020_df = chicago_pop_counts_2020_df.drop(columns=['Population - Age 0-4', 
                                                                      'Population - Age 5-11', 
                                                                      'Population - Age 12-17', 
                                                                      'Population - Age 5+', 
                                                                      'Population - Age 18+', 
                                                                      'Population - Age 65+', 
                                                                      'Record ID'])
chicago_pop_counts_2020_df.head()

Unnamed: 0,Geography Type,Year,Geography,Total,Age 0-17,Age 18-29,Age 30-39,Age 40-49,Age 50-59,Age 60-69,Age 70-79,Age 80+,Female,Male,Latinx,Asian Non-Latinx,Black Non-Latinx,White Non-Latinx,Population - Other Race Non-Latinx
120,Citywide,2020,Chicago,2699347,552668,548747,463143,336591,313865,255435,145426,83472,1388469,1310878,772791,182251.0,776470.0,900055,67780.0
121,ZIP Code,2020,60601,14513,825,4696,3048,1815,809,1974,1070,276,7894,6619,1242,3528.0,679.0,8614,450.0
122,ZIP Code,2020,60602,1596,115,332,860,191,81,17,0,0,744,852,120,435.0,37.0,794,210.0
123,ZIP Code,2020,60603,1186,15,423,248,68,176,146,91,19,560,626,62,397.0,29.0,692,6.0
124,ZIP Code,2020,60604,729,5,313,171,30,77,89,36,8,441,288,23,127.0,72.0,507,0.0


In [24]:
# Save new cleaned data to a new csv file
chicago_pop_counts_2020_df.to_csv('cleaned_data/chicago_pop_counts_2020.csv', index=False)

## Economic Metric

**Measuring**: Median Household Income, Unemployment Rate, Poverty Rate.

Data for this section was pulled from the Chicago 2020 Census Database using various coded tables:
* Median Household Income: Table S1901
* Unemployment Rate: Table S2301
* Poverty Rate: S1501

##### Median Household Income (Table S1901)

In [26]:
# Import new csv file for Median Household Income
chicago_income_data = '../chicago_data/chi_median_income.csv'

# Read the csv file
chicago_income_df = pd.read_csv(chicago_income_data)
chicago_income_df.head()

Unnamed: 0,Label (Grouping),Illinois!!Households!!Estimate,Illinois!!Households!!Margin of Error,Illinois!!Families!!Estimate,Illinois!!Families!!Margin of Error,Illinois!!Married-couple families!!Estimate,Illinois!!Married-couple families!!Margin of Error,Illinois!!Nonfamily households!!Estimate,Illinois!!Nonfamily households!!Margin of Error,"Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Households!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Households!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Families!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Families!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Married-couple families!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Married-couple families!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Nonfamily households!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Nonfamily households!!Margin of Error"
0,Total,4884061,"±8,585",3116415,"±10,228",2301114,"±11,693",1767646,"±7,877",3548735,"±5,885",2288602,"±9,235",1671999,"±10,169",1260133,"±7,479"
1,"Less than $10,000",6.1%,±0.1,3.3%,±0.1,1.2%,±0.1,11.9%,±0.2,5.8%,±0.1,3.3%,±0.1,1.1%,±0.1,11.2%,±0.2
2,"$10,000 to $14,999",3.7%,±0.1,2.0%,±0.1,0.9%,±0.1,7.0%,±0.1,3.3%,±0.1,1.8%,±0.1,0.8%,±0.1,6.2%,±0.2
3,"$15,000 to $24,999",8.1%,±0.1,5.3%,±0.1,2.8%,±0.1,13.8%,±0.2,7.3%,±0.1,4.9%,±0.1,2.7%,±0.1,12.3%,±0.2
4,"$25,000 to $34,999",8.1%,±0.1,6.3%,±0.1,4.3%,±0.1,11.7%,±0.2,7.4%,±0.1,6.0%,±0.1,4.0%,±0.1,10.6%,±0.2


In [None]:
# Display columns and data types
chicago_income_df.dtypes

Label (Grouping)                                                                           object
Illinois!!Households!!Estimate                                                             object
Illinois!!Households!!Margin of Error                                                      object
Illinois!!Families!!Estimate                                                               object
Illinois!!Families!!Margin of Error                                                        object
Illinois!!Married-couple families!!Estimate                                                object
Illinois!!Married-couple families!!Margin of Error                                         object
Illinois!!Nonfamily households!!Estimate                                                   object
Illinois!!Nonfamily households!!Margin of Error                                            object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Households!!Estimate                        object
Chicago-Naperville-E

In [28]:
# Drop unnecessary columns (i.e anything with "Margin of Error")
chicago_income_df = chicago_income_df.drop(columns=['Illinois!!Households!!Margin of Error', 
                                                    'Illinois!!Families!!Margin of Error', 
                                                    'Illinois!!Married-couple families!!Margin of Error',
                                                    'Illinois!!Nonfamily households!!Margin of Error',
                                                    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Households!!Margin of Error',
                                                    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Families!!Margin of Error',
                                                    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Married-couple families!!Margin of Error',
                                                    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Nonfamily households!!Margin of Error'
                                                    ])

In [58]:
# Rename columns for clarity
chicago_income_df = chicago_income_df.rename(columns={
    'Label (Grouping)': 'Income Grouping',
    'Illinois!!Households!!Estimate': 'Illinois Households',
    'Illinois!!Families!!Estimate': 'Illinois Families',
    'Illinois!!Married-couple families!!Estimate': 'Illinois Married-couple Households',
    'Illinois!!Nonfamily households!!Estimate': 'Illinois Nonfamily Households',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Households!!Estimate': 'Chicago Metro Households',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Families!!Estimate': 'Chicago Metro Families',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Married-couple families!!Estimate': 'Chicago Metro Married-couple Households',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Nonfamily households!!Estimate': 'Chicago Metro Nonfamily Households'
})

# Display the cleaned DataFrame
chicago_income_df.head()

Unnamed: 0,Income Grouping,Illinois Households,Illinois Families,Illinois Married-couple Households,Illinois Nonfamily Households,Chicago Metro Households,Chicago Metro Families,Chicago Metro Married-couple Households,Chicago Metro Nonfamily Households
0,Total,4884061,3116415,2301114,1767646,3548735,2288602,1671999,1260133
1,"Less than $10,000",6.1%,3.3%,1.2%,11.9%,5.8%,3.3%,1.1%,11.2%
2,"$10,000 to $14,999",3.7%,2.0%,0.9%,7.0%,3.3%,1.8%,0.8%,6.2%
3,"$15,000 to $24,999",8.1%,5.3%,2.8%,13.8%,7.3%,4.9%,2.7%,12.3%
4,"$25,000 to $34,999",8.1%,6.3%,4.3%,11.7%,7.4%,6.0%,4.0%,10.6%


In [None]:
# Save the cleaned DataFrame to a new CSV file

##### Unemployment Rate (Table S2301)

In [30]:
# Import new csv file for Unemployment Rate
chicago_employment_data = '../chicago_data/chi_employment_status.csv'

# Read the csv file
chicago_employment_df = pd.read_csv(chicago_employment_data)
chicago_employment_df.head()

Unnamed: 0,Label (Grouping),Illinois!!Total!!Estimate,Illinois!!Total!!Margin of Error,Illinois!!Labor Force Participation Rate!!Estimate,Illinois!!Labor Force Participation Rate!!Margin of Error,Illinois!!Employment/Population Ratio!!Estimate,Illinois!!Employment/Population Ratio!!Margin of Error,Illinois!!Unemployment rate!!Estimate,Illinois!!Unemployment rate!!Margin of Error,"Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Labor Force Participation Rate!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Labor Force Participation Rate!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Employment/Population Ratio!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Employment/Population Ratio!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Unemployment rate!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Unemployment rate!!Margin of Error"
0,Population 16 years and over,10193604.0,"±2,843",65.2%,±0.1,61.2%,±0.1,6.0%,±0.1,7572022.0,"±2,220",66.9%,±0.1,62.6%,±0.1,6.2%,±0.1
1,AGE,,,,,,,,,,,,,,,,
2,16 to 19 years,662629.0,"±2,868",40.1%,±0.4,32.4%,±0.4,18.1%,±0.6,491955.0,"±2,371",38.2%,±0.5,30.5%,±0.5,19.0%,±0.7
3,20 to 24 years,844275.0,"±1,252",75.6%,±0.4,66.8%,±0.4,11.0%,±0.3,617600.0,±624,76.5%,±0.4,66.7%,±0.5,12.0%,±0.4
4,25 to 29 years,896223.0,±929,85.1%,±0.3,79.1%,±0.4,6.7%,±0.3,691955.0,±452,85.8%,±0.3,79.6%,±0.4,6.8%,±0.3


In [32]:
# Display columns and data types
chicago_employment_df.dtypes

Label (Grouping)                                                                                  object
Illinois!!Total!!Estimate                                                                         object
Illinois!!Total!!Margin of Error                                                                  object
Illinois!!Labor Force Participation Rate!!Estimate                                                object
Illinois!!Labor Force Participation Rate!!Margin of Error                                         object
Illinois!!Employment/Population Ratio!!Estimate                                                   object
Illinois!!Employment/Population Ratio!!Margin of Error                                            object
Illinois!!Unemployment rate!!Estimate                                                             object
Illinois!!Unemployment rate!!Margin of Error                                                      object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!E

In [33]:
# Drop unnecessary columns (i.e anything with "Margin of Error")
chicago_employment_df = chicago_employment_df.drop(columns=['Illinois!!Total!!Margin of Error', 
                                                            'Illinois!!Labor Force Participation Rate!!Margin of Error', 
                                                            'Illinois!!Employment/Population Ratio!!Margin of Error',
                                                            'Illinois!!Unemployment rate!!Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Labor Force Participation Rate!!Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Employment/Population Ratio!!Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Unemployment rate!!Margin of Error'
                                                            ])

In [56]:
# Drop Nan Values
chicago_employment_df = chicago_employment_df.dropna()

In [57]:
# Rename columns for clarity
chicago_employment_df = chicago_employment_df.rename(columns={
    'Label (Grouping)': 'Age',
    'Illinois!!Total!!Estimate': 'Illinois Total',
    'Illinois!!Labor Force Participation Rate!!Estimate': 'Illinois Labor Force Participation Rate',
    'Illinois!!Employment/Population Ratio!!Estimate': 'Illinois Employment/Population Ratio',
    'Illinois!!Unemployment rate!!Estimate': 'Illinois Unemployment Rate',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate': 'Chicago Metro Total Population',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Labor Force Participation Rate!!Estimate': 'Chicago Metro Labor Force Participation Rate',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Employment/Population Ratio!!Estimate': 'Chicago Metro Employment/Population Ratio',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Unemployment rate!!Estimate': 'Chicago Metro Unemployment Rate'
})

# Display the cleaned DataFrame
chicago_employment_df.head()

Unnamed: 0,Age,Illinois Total,Illinois Labor Force Participation Rate,Illinois Employment/Population Ratio,Illinois Unemployment Rate,Chicago Metro Total Population,Chicago Metro Labor Force Participation Rate,Chicago Metro Employment/Population Ratio,Chicago Metro Unemployment Rate
0,Population 16 years and over,10193604,65.2%,61.2%,6.0%,7572022,66.9%,62.6%,6.2%
2,16 to 19 years,662629,40.1%,32.4%,18.1%,491955,38.2%,30.5%,19.0%
3,20 to 24 years,844275,75.6%,66.8%,11.0%,617600,76.5%,66.7%,12.0%
4,25 to 29 years,896223,85.1%,79.1%,6.7%,691955,85.8%,79.6%,6.8%
5,30 to 34 years,867606,85.6%,80.9%,5.2%,673000,86.2%,81.4%,5.2%


In [None]:
# Save the cleaned DataFrame to a new CSV file

#### Poverty Rate (S1501)

In [35]:
# Import new csv file for Poverty Rate
chicago_poverty_data = '../chicago_data/chi_poverty_status.csv'

# Read the csv file
chicago_poverty_df = pd.read_csv(chicago_poverty_data)
chicago_poverty_df.head()

Unnamed: 0,Label (Grouping),Illinois!!Total!!Estimate,Illinois!!Total!!Margin of Error,Illinois!!Below poverty level!!Estimate,Illinois!!Below poverty level!!Margin of Error,Illinois!!Percent below poverty level!!Estimate,Illinois!!Percent below poverty level!!Margin of Error,"Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Below poverty level!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Below poverty level!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent below poverty level!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent below poverty level!!Margin of Error"
0,Population for whom poverty status is determined,12418504.0,"±2,377",1488670.0,"±18,242",12.0%,±0.1,9319224.0,"±3,122",1052416.0,"±15,120",11.3%,±0.2
1,AGE,,,,,,,,,,,,
2,Under 18 years,2813715.0,"±2,419",454654.0,"±9,623",16.2%,±0.3,2133785.0,"±1,927",331541.0,"±8,005",15.5%,±0.4
3,Under 5 years,742571.0,"±1,463",130683.0,"±3,603",17.6%,±0.5,559864.0,"±1,254",90114.0,"±2,880",16.1%,±0.5
4,5 to 17 years,2071144.0,"±1,695",323971.0,"±7,648",15.6%,±0.4,1573921.0,"±1,315",241427.0,"±6,823",15.3%,±0.4


In [36]:
# Display columns and data types
chicago_poverty_df.dtypes

Label (Grouping)                                                                               object
Illinois!!Total!!Estimate                                                                      object
Illinois!!Total!!Margin of Error                                                               object
Illinois!!Below poverty level!!Estimate                                                        object
Illinois!!Below poverty level!!Margin of Error                                                 object
Illinois!!Percent below poverty level!!Estimate                                                object
Illinois!!Percent below poverty level!!Margin of Error                                         object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate                                 object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error                          object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Below poverty level!!Estimate      

In [37]:
# Drop unnecessary columns (i.e anything with "Margin of Error")
chicago_poverty_df = chicago_poverty_df.drop(columns=['Illinois!!Total!!Margin of Error', 
                                                       'Illinois!!Below poverty level!!Margin of Error', 
                                                       'Illinois!!Percent below poverty level!!Margin of Error',
                                                       'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error',
                                                       'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Below poverty level!!Margin of Error',
                                                       'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent below poverty level!!Margin of Error'
                                                       ])

In [54]:
# Delete NaN values
chicago_poverty_df = chicago_poverty_df.dropna()

In [55]:
# Rename columns
chicago_poverty_df = chicago_poverty_df.rename(columns={
    'Label (Grouping)': 'Age',
    'Illinois!!Total!!Estimate': 'Illinois Total Population',
    'Illinois!!Below poverty level!!Estimate': 'Illinois Below Poverty Level',
    'Illinois!!Percent below poverty level!!Estimate': 'Illinois % Below Poverty Level',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate': 'Chicago Metro Total Population',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Below poverty level!!Estimate': 'Chicago Metro Below Poverty Level',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent below poverty level!!Estimate': 'Chicago Metro % Below Poverty Level'
})

# Display the cleaned DataFrame
chicago_poverty_df.head()

Unnamed: 0,Age,Illinois Total Population,Illinois Below Poverty Level,Illinois % Below Poverty Level,Chicago Metro Total Population,Chicago Metro Below Poverty Level,Chicago Metro % Below Poverty Level
0,Population for whom poverty status is determined,12418504,1488670,12.0%,9319224,1052416,11.3%
2,Under 18 years,2813715,454654,16.2%,2133785,331541,15.5%
3,Under 5 years,742571,130683,17.6%,559864,90114,16.1%
4,5 to 17 years,2071144,323971,15.6%,1573921,241427,15.3%
5,Related children of householder un...,2802857,444689,15.9%,2126805,325159,15.3%


In [None]:
# Save the cleaned DataFrame to a new CSV file

## Housing  Metric 

**Measuring**: Median Home Price, Median Rent, Homeownership Rate

Data for this section was pulled from the Chicago 2020 Census Database using the same table code from the American Community Survey by the U.S. Census Bureau (ACS). 
* Median Home Price: Table DP04
* Median Rent: Table DP04
* Homeownership Rate: Table DP04

In [39]:
# Import new csv file for Housing Metric
chicago_housing_data = '../chicago_data/chi_housing.csv'

# Read the csv file
chicago_housing_df = pd.read_csv(chicago_housing_data)
chicago_housing_df.head()


Unnamed: 0,Label (Grouping),Illinois!!Estimate,Illinois!!Margin of Error,Illinois!!Percent,Illinois!!Percent Margin of Error,"Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Margin of Error"
0,HOUSING OCCUPANCY,,,,,,,,
1,Total housing units,5373385.0,±564,5373385,(X),3856019.0,±936,3856019,(X)
2,Occupied housing units,4884061.0,"±8,585",90.9%,±0.2,3548735.0,"±5,885",92.0%,±0.1
3,Vacant housing units,489324.0,"±8,662",9.1%,±0.2,307284.0,"±5,724",8.0%,±0.1
4,Homeowner vacancy rate,1.6,±0.1,(X),(X),1.5,±0.1,(X),(X)


In [40]:
# Display columns and data types
chicago_housing_df.dtypes

Label (Grouping)                                                          object
Illinois!!Estimate                                                        object
Illinois!!Margin of Error                                                 object
Illinois!!Percent                                                         object
Illinois!!Percent Margin of Error                                         object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Estimate                   object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Margin of Error            object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent                    object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Margin of Error    object
dtype: object

In [42]:
# Drop unnecessary columns (i.e anything with "Margin of Error")
chicago_housing_df = chicago_housing_df.drop(columns=['Illinois!!Margin of Error',
                                                            'Illinois!!Percent Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Margin of Error'])


In [52]:
# Delete NaN values from the DataFrame
chicago_housing_df = chicago_housing_df.dropna()

In [53]:
# Rename columns 
chicago_housing_df = chicago_housing_df.rename(columns={
    'Label (Grouping)': 'Housing Occupancy Status',
    'Illinois!!Estimate': 'Illinois Total Housing Units',
    'Illinois!!Percent': 'Illinois % Housing Units',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Estimate': 'Chicago Metro Total Housing Units',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent': 'Chicago Metro % Housing Units'
})

# Display the cleaned DataFrame
chicago_housing_df.head()

Unnamed: 0,Housing Occupancy Status,Illinois Total Housing Units,Illinois % Housing Units,Chicago Metro Total Housing Units,Chicago Metro % Housing Units
1,Total housing units,5373385.0,5373385,3856019.0,3856019
2,Occupied housing units,4884061.0,90.9%,3548735.0,92.0%
3,Vacant housing units,489324.0,9.1%,307284.0,8.0%
4,Homeowner vacancy rate,1.6,(X),1.5,(X)
5,Rental vacancy rate,5.9,(X),5.3,(X)


In [None]:
# Save the cleaned DataFrame to a new CSV file

## Quality of Life Metric

**Measuring**: Commute Time 

Data was pulled from the Census Database using the following survey by ASC in 2020. 
* Commute Time: Table S0801 

In [45]:
# Import new csv file for Commute Time
chicago_commute_data = '../chicago_data/chi_commute.csv'

# Read the csv file
chicago_commute_df = pd.read_csv(chicago_commute_data)
chicago_commute_df.head()

Unnamed: 0,Label (Grouping),Illinois!!Total!!Estimate,Illinois!!Total!!Margin of Error,Illinois!!Male!!Estimate,Illinois!!Male!!Margin of Error,Illinois!!Female!!Estimate,Illinois!!Female!!Margin of Error,"Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Margin of Error","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Estimate","Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Margin of Error"
0,Workers 16 years and over,6129176,"±12,308",3211679,"±7,869",2917497,"±8,018",4654790,"±10,685",2453220,"±7,291",2201570,"±7,397"
1,MEANS OF TRANSPORTATION TO WORK,,,,,,,,,,,,
2,"Car, truck, or van",79.3%,±0.2,79.7%,±0.2,78.9%,±0.2,76.1%,±0.2,76.7%,±0.2,75.4%,±0.3
3,Drove alone,71.5%,±0.2,71.9%,±0.2,71.1%,±0.2,68.3%,±0.2,69.1%,±0.3,67.5%,±0.3
4,Carpooled,7.8%,±0.1,7.8%,±0.1,7.8%,±0.1,7.7%,±0.1,7.5%,±0.2,7.9%,±0.2


In [46]:
# Display columns and data types
chicago_commute_df.dtypes

Label (Grouping)                                                          object
Illinois!!Total!!Estimate                                                 object
Illinois!!Total!!Margin of Error                                          object
Illinois!!Male!!Estimate                                                  object
Illinois!!Male!!Margin of Error                                           object
Illinois!!Female!!Estimate                                                object
Illinois!!Female!!Margin of Error                                         object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate            object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error     object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Estimate             object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Margin of Error      object
Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Estimate           object
Chicago-Naperville-Elgin, IL

In [47]:
# Delete unnecessary columns (i.e anything with "Margin of Error")
chicago_commute_df = chicago_commute_df.drop(columns=['Illinois!!Total!!Margin of Error',
                                                        'Illinois!!Male!!Margin of Error',
                                                        'Illinois!!Female!!Margin of Error',
                                                        'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error',
                                                        'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Margin of Error',
                                                        'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Margin of Error'
                                                        ])

In [49]:
# Delete NaN values
chicago_commute_df = chicago_commute_df.dropna()

In [51]:
# Rename columns for clarity
chicago_commute_df = chicago_commute_df.rename(columns={
    'Label (Grouping)': 'Means of Transportation',
    'Illinois!!Total!!Estimate': 'Illinois Total Commute Time',
    'Illinois!!Male!!Estimate': 'Male Total',
    'Illinois!!Female!!Estimate': 'Female Total',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate': 'Chicago Metro Total',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Estimate': 'Chicago Male Total',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Estimate': 'Chicago Female Total'
})

# Display the cleaned DataFrame
chicago_commute_df.head()

Unnamed: 0,Means of Transportation,Illinois Total Commute Time,Illinois Male Total,Illinois Female Total,Chicago Metro Total,Chicago Male Total,Chicago Female Total
0,Workers 16 years and over,6129176,3211679,2917497,4654790,2453220,2201570
2,"Car, truck, or van",79.3%,79.7%,78.9%,76.1%,76.7%,75.4%
3,Drove alone,71.5%,71.9%,71.1%,68.3%,69.1%,67.5%
4,Carpooled,7.8%,7.8%,7.8%,7.7%,7.5%,7.9%
5,In 2-person carpool,5.9%,5.9%,5.9%,5.8%,5.7%,6.0%


## THIS IS A DATASET COMPARING CHICAGO FROM 2020 TO 2024. MIGHT WANT TO KEEP.

In [None]:
## THIS IS A DATASET COMPARING CHICAGO FROM 2020 TO 2024. MIGHT WANT TO KEEP.
# Import csv file for chicago_data
chicago_data = 'data_files/chicago_data/chicago_demographics.csv'

# Read the csv file for chicago_data
chicago_data_df = pd.read_csv(chicago_data)
chicago_data_df.head()

Unnamed: 0,Fact,Fact Note,"Chicago city, Illinois","Value Note for Chicago city, Illinois"
0,"Population estimates, July 1, 2024, (V2024)",,,
1,"Population estimates, July 1, 2023, (V2023)",,2664452.0,
2,"Population estimates base, April 1, 2020, (V2024)",,,
3,"Population estimates base, April 1, 2020, (V2023)",,2746352.0,
4,"Population, percent change - April 1, 2020 (es...",,,


In [14]:
# Filter the dataframe to only include rows with '2020' in the 'Fact' column
chicago_data_2020_df = chicago_data_df[chicago_data_df['Fact'].str.contains('2020')]
chicago_data_2020_df

Unnamed: 0,Fact,Fact Note,"Chicago city, Illinois","Value Note for Chicago city, Illinois"
2,"Population estimates base, April 1, 2020, (V2024)",,,
3,"Population estimates base, April 1, 2020, (V2023)",,2746352,
4,"Population, percent change - April 1, 2020 (es...",,,
5,"Population, percent change - April 1, 2020 (es...",,-3.0%,
6,"Population, Census, April 1, 2020",,2746388,
62,"Population per square mile, 2020",,12059.8,
64,"Land area in square miles, 2020",,227.73,


In [15]:
# Clean the data by dropping the columns that are not needed (want to drop "Fact Note", "Value Note for Chicago City, Illinois") 
chicago_data_2020_df = chicago_data_2020_df.drop(columns=['Fact Note', 'Value Note for Chicago city, Illinois'])
chicago_data_2020_df.head()

Unnamed: 0,Fact,"Chicago city, Illinois"
2,"Population estimates base, April 1, 2020, (V2024)",
3,"Population estimates base, April 1, 2020, (V2023)",2746352
4,"Population, percent change - April 1, 2020 (es...",
5,"Population, percent change - April 1, 2020 (es...",-3.0%
6,"Population, Census, April 1, 2020",2746388


In [16]:
# Drop the rows with NaN values
chicago_data_2020_df = chicago_data_2020_df.dropna()

In [17]:
chicago_data_2020_df.head()

Unnamed: 0,Fact,"Chicago city, Illinois"
3,"Population estimates base, April 1, 2020, (V2023)",2746352
5,"Population, percent change - April 1, 2020 (es...",-3.0%
6,"Population, Census, April 1, 2020",2746388
62,"Population per square mile, 2020",12059.8
64,"Land area in square miles, 2020",227.73


In [18]:
# save the cleaned data to a new csv file
chicago_data_2020_df.to_csv('cleaned_data/chicago_data_2020.csv', index=False)