# Chicago Dataset

In [1]:
# Import dependencies
import os
import csv
import pandas as pd

## Demographic Metric

**Measuring**: ACS Population Density, Median Age, Education Level

Data was pulled from the 2020 Chicago Census Database and the American Community Survey (ACS) by the U.S. Census Bureau using various coded tables to ensure unified data for each metropolitan city. Two tables used the same code and were seperated to ensure proper measurement. 
* ACS Population Density: Table DP05
* Median Age: Table DP05
* Education Level: S1501

#### ACS Population Density & Median Age (Table DP05)

In [2]:
# Import csv file age data (ACS 2020) 
chicago_age_data = '../chicago_data/chi_raw_csv/chi_age_demo.csv'

# Read csv file into DataFrame
chicago_age_df = pd.read_csv(chicago_age_data)
chicago_age_df.head()

Unnamed: 0,Label (Grouping),SEX AND AGE,SEX AND AGE!!Total population,SEX AND AGE!!Total population!!Male,SEX AND AGE!!Total population!!Female,SEX AND AGE!!Total population!!Under 5 years,SEX AND AGE!!Total population!!5 to 9 years,SEX AND AGE!!Total population!!10 to 14 years,SEX AND AGE!!Total population!!15 to 19 years,SEX AND AGE!!Total population!!20 to 24 years,...,SEX AND AGE!!Total population!!75 to 84 years,SEX AND AGE!!Total population!!85 years and over,SEX AND AGE!!Total population!!Median age (years),SEX AND AGE!!Total population!!Under 18 years,SEX AND AGE!!Total population!!16 years and over,SEX AND AGE!!Total population!!18 years and over,SEX AND AGE!!Total population!!21 years and over,SEX AND AGE!!Total population!!62 years and over,SEX AND AGE!!Total population!!65 years and over,SEX AND AGE!!Total population!!18 years and over.1
0,Illinois,,,,,,,,,,...,,,,,,,,,,
1,Estimate,,12716164.0,6247083,6469081,755518,768804,829779,831088,844275,...,573017,270768,38.3,2855433,10193604,9860731,9363657,2443504,1990426,9860731.0
2,Percent,,12716164.0,49.1%,50.9%,5.9%,6.0%,6.5%,6.5%,6.6%,...,4.5%,2.1%,(X),22.5%,80.2%,77.5%,73.6%,19.2%,15.7%,9860731.0
3,"Cook County, Illinois",,,,,,,,,,...,,,,,,,,,,
4,Estimate,,5169517.0,2509686,2659831,315368,302232,322415,310116,334453,...,219634,104191,37.0,1128625,4167295,4040892,3859321,931306,757688,4040892.0


In [3]:
# Drop all rows where 'Label (Grouping)' column contains 'Illinois'
chicago_age_df = chicago_age_df[chicago_age_df['Label (Grouping)'] != 'Illinois']
chicago_age_df.head()

Unnamed: 0,Label (Grouping),SEX AND AGE,SEX AND AGE!!Total population,SEX AND AGE!!Total population!!Male,SEX AND AGE!!Total population!!Female,SEX AND AGE!!Total population!!Under 5 years,SEX AND AGE!!Total population!!5 to 9 years,SEX AND AGE!!Total population!!10 to 14 years,SEX AND AGE!!Total population!!15 to 19 years,SEX AND AGE!!Total population!!20 to 24 years,...,SEX AND AGE!!Total population!!75 to 84 years,SEX AND AGE!!Total population!!85 years and over,SEX AND AGE!!Total population!!Median age (years),SEX AND AGE!!Total population!!Under 18 years,SEX AND AGE!!Total population!!16 years and over,SEX AND AGE!!Total population!!18 years and over,SEX AND AGE!!Total population!!21 years and over,SEX AND AGE!!Total population!!62 years and over,SEX AND AGE!!Total population!!65 years and over,SEX AND AGE!!Total population!!18 years and over.1
1,Estimate,,12716164.0,6247083,6469081,755518,768804,829779,831088,844275,...,573017,270768,38.3,2855433,10193604,9860731,9363657,2443504,1990426,9860731.0
2,Percent,,12716164.0,49.1%,50.9%,5.9%,6.0%,6.5%,6.5%,6.6%,...,4.5%,2.1%,(X),22.5%,80.2%,77.5%,73.6%,19.2%,15.7%,9860731.0
3,"Cook County, Illinois",,,,,,,,,,...,,,,,,,,,,
4,Estimate,,5169517.0,2509686,2659831,315368,302232,322415,310116,334453,...,219634,104191,37.0,1128625,4167295,4040892,3859321,931306,757688,4040892.0
5,Percent,,5169517.0,48.5%,51.5%,6.1%,5.8%,6.2%,6.0%,6.5%,...,4.2%,2.0%,(X),21.8%,80.6%,78.2%,74.7%,18.0%,14.7%,4040892.0


In [4]:
# Drop first 2 rows (contain unnecessary Illinois data)
chicago_age_df = chicago_age_df.drop(index=[1, 2])
chicago_age_df.head()

Unnamed: 0,Label (Grouping),SEX AND AGE,SEX AND AGE!!Total population,SEX AND AGE!!Total population!!Male,SEX AND AGE!!Total population!!Female,SEX AND AGE!!Total population!!Under 5 years,SEX AND AGE!!Total population!!5 to 9 years,SEX AND AGE!!Total population!!10 to 14 years,SEX AND AGE!!Total population!!15 to 19 years,SEX AND AGE!!Total population!!20 to 24 years,...,SEX AND AGE!!Total population!!75 to 84 years,SEX AND AGE!!Total population!!85 years and over,SEX AND AGE!!Total population!!Median age (years),SEX AND AGE!!Total population!!Under 18 years,SEX AND AGE!!Total population!!16 years and over,SEX AND AGE!!Total population!!18 years and over,SEX AND AGE!!Total population!!21 years and over,SEX AND AGE!!Total population!!62 years and over,SEX AND AGE!!Total population!!65 years and over,SEX AND AGE!!Total population!!18 years and over.1
3,"Cook County, Illinois",,,,,,,,,,...,,,,,,,,,,
4,Estimate,,5169517.0,2509686,2659831,315368,302232,322415,310116,334453,...,219634,104191,37.0,1128625,4167295,4040892,3859321,931306,757688,4040892.0
5,Percent,,5169517.0,48.5%,51.5%,6.1%,5.8%,6.2%,6.0%,6.5%,...,4.2%,2.0%,(X),21.8%,80.6%,78.2%,74.7%,18.0%,14.7%,4040892.0


In [5]:
# Display columns and data types
chicago_age_df.dtypes

Label (Grouping)                                       object
SEX AND AGE                                           float64
SEX AND AGE!!Total population                          object
SEX AND AGE!!Total population!!Male                    object
SEX AND AGE!!Total population!!Female                  object
SEX AND AGE!!Total population!!Under 5 years           object
SEX AND AGE!!Total population!!5 to 9 years            object
SEX AND AGE!!Total population!!10 to 14 years          object
SEX AND AGE!!Total population!!15 to 19 years          object
SEX AND AGE!!Total population!!20 to 24 years          object
SEX AND AGE!!Total population!!25 to 34 years          object
SEX AND AGE!!Total population!!35 to 44 years          object
SEX AND AGE!!Total population!!45 to 54 years          object
SEX AND AGE!!Total population!!55 to 59 years          object
SEX AND AGE!!Total population!!60 to 64 years          object
SEX AND AGE!!Total population!!65 to 74 years          object
SEX AND 

In [6]:
# Rename columns to remove 'SEX AND AGE' from the column names
chicago_age_df.columns = chicago_age_df.columns.str.replace('SEX AND AGE!!', '')

# Drop unnecessary columns
chicago_age_df = chicago_age_df.drop(columns=['Total population!!16 years and over',
                                              'Total population!!18 years and over.1',
                                              'Total population!!62 years and over',
                                              'Total population!!21 years and over',
                                              'Total population!!Under 18 years',
                                              'Total population!!65 years and over',
                                              'Total population!!Male',
                                              'Total population!!Female',
                                              'SEX AND AGE',
                                              'Total population!!18 years and over'
                                            ])


In [7]:
chicago_age_df.dtypes

Label (Grouping)                        object
Total population                        object
Total population!!Under 5 years         object
Total population!!5 to 9 years          object
Total population!!10 to 14 years        object
Total population!!15 to 19 years        object
Total population!!20 to 24 years        object
Total population!!25 to 34 years        object
Total population!!35 to 44 years        object
Total population!!45 to 54 years        object
Total population!!55 to 59 years        object
Total population!!60 to 64 years        object
Total population!!65 to 74 years        object
Total population!!75 to 84 years        object
Total population!!85 years and over     object
Total population!!Median age (years)    object
dtype: object

In [10]:
# Rename columns for clarity
chicago_age_df.columns = ['Chicago',
  'Total Population',
  'Under 5',
  '5 to 9',
  '10 to 14',
  '15 to 19',
  '20 to 24',
  '25 to 34',
  '35 to 44',
  '45 to 54',
  '55 to 59',
  '60 to 64',
  '65 to 74',
  '75 to 84',
  '85 and Over',
  'Median Age'
]

chicago_age_df.head()

Unnamed: 0,Chicago,Total Population,Under 5,5 to 9,10 to 14,15 to 19,20 to 24,25 to 34,35 to 44,45 to 54,55 to 59,60 to 64,65 to 74,75 to 84,85 and Over,Median Age
3,"Cook County, Illinois",,,,,,,,,,,,,,,
4,Estimate,5169517.0,315368,302232,322415,310116,334453,844827,699725,647738,330348,304607,433863,219634,104191,37.0
5,Percent,5169517.0,6.1%,5.8%,6.2%,6.0%,6.5%,16.3%,13.5%,12.5%,6.4%,5.9%,8.4%,4.2%,2.0%,(X)


In [11]:
# Drop NaN values from the DataFrame
chicago_age_df = chicago_age_df.dropna()

In [12]:
# Save the cleaned DataFrame to a new CSV file
chicago_age_df.to_csv('../chicago_data/cleaned_chi_AGE.csv', index=False)

#### Race (Table DP05)

In [13]:
# Import csv file for race data
chicago_race_data = '../chicago_data/chi_raw_csv/chi_race.csv'

# Read csv file into DataFrame
chicago_race_df = pd.read_csv(chicago_race_data)
chicago_race_df.head()

Unnamed: 0,Label (Grouping),RACE,Race alone or in combination with one or more other races!!Total population,Race alone or in combination with one or more other races!!Total population!!White,Race alone or in combination with one or more other races!!Total population!!Black or African American,Race alone or in combination with one or more other races!!Total population!!American Indian and Alaska Native,Race alone or in combination with one or more other races!!Total population!!Asian,Race alone or in combination with one or more other races!!Total population!!Native Hawaiian and Other Pacific Islander,Race alone or in combination with one or more other races!!Total population!!Some other race
0,Illinois,,,,,,,,
1,Estimate,,12716164.0,9367474,1957092,104386,828847,16535,1013633
2,Percent,,12716164.0,73.7%,15.4%,0.8%,6.5%,0.1%,8.0%
3,"Cook County, Illinois",,,,,,,,
4,Estimate,,5169517.0,3041776,1263902,43551,445203,6818,640553


In [14]:
# Delete the first row (contains unnecessary Illinois data)
chicago_race_df = chicago_race_df[chicago_race_df['Label (Grouping)'] != 'Illinois']

In [15]:
# Delete first 2 rows
chicago_race_df = chicago_race_df.drop(index=[1, 2])
chicago_race_df.head()

Unnamed: 0,Label (Grouping),RACE,Race alone or in combination with one or more other races!!Total population,Race alone or in combination with one or more other races!!Total population!!White,Race alone or in combination with one or more other races!!Total population!!Black or African American,Race alone or in combination with one or more other races!!Total population!!American Indian and Alaska Native,Race alone or in combination with one or more other races!!Total population!!Asian,Race alone or in combination with one or more other races!!Total population!!Native Hawaiian and Other Pacific Islander,Race alone or in combination with one or more other races!!Total population!!Some other race
3,"Cook County, Illinois",,,,,,,,
4,Estimate,,5169517.0,3041776,1263902,43551,445203,6818,640553
5,Percent,,5169517.0,58.8%,24.4%,0.8%,8.6%,0.1%,12.4%


In [16]:
# Rename columns to remove 'Race alone or in combination with one or more other races!!Total population!!' from the column names
chicago_race_df.columns = chicago_race_df.columns.str.replace('Race alone or in combination with one or more other races!!Total population!!', '')

# Delete 'Race' column
chicago_race_df = chicago_race_df.drop(columns=['RACE'])

# Display the cleaned DataFrame
chicago_race_df.head()

Unnamed: 0,Label (Grouping),Race alone or in combination with one or more other races!!Total population,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some other race
3,"Cook County, Illinois",,,,,,,
4,Estimate,5169517.0,3041776,1263902,43551,445203,6818,640553
5,Percent,5169517.0,58.8%,24.4%,0.8%,8.6%,0.1%,12.4%


In [None]:
# Delete NaN values
chicago_race_df = chicago_race_df.dropna()

In [None]:
# Rename columns for clarity 
chicago_race_df.columns = ['Chicago, IL', 
                           'Total Population', 
                           'White', 
                           'Black or African American', 
                           'American Indian and Alaska Native', 
                           'Asian', 
                           'Native Hawaiian and Other Pacific Islander', 
                           'Other']

# Display
chicago_race_df.head()

In [None]:
# Save the cleaned DataFrame to a new CSV file
chicago_race_df.to_csv('../chicago_data/cleaned_chi_race.csv', index=False)

#### Education Level (Table S1501)

In [None]:
# Import csv file for education data 
chicago_edu_data = '../chicago_data/chi_edu_level.csv'

# Read the CSV file into a DataFrame
chicago_edu_df = pd.read_csv(chicago_edu_data)
chicago_edu_df.head()


In [None]:
# Display columns and data types
chicago_edu_df.dtypes

In [None]:
# Drop unnecessary columns (i.e anything with "Margin of Error")
chicago_edu_df = chicago_edu_df.drop(columns=['Illinois!!Total!!Estimate',
                                              'Illinois!!Total!!Margin of Error',
                                              'Illinois!!Percent!!Estimate',
                                              'Illinois!!Percent!!Margin of Error',
                                              'Illinois!!Male!!Estimate',
                                              'Illinois!!Male!!Margin of Error',
                                              'Illinois!!Percent Male!!Estimate',
                                              'Illinois!!Percent Male!!Margin of Error',
                                              'Illinois!!Female!!Estimate',
                                              'Illinois!!Female!!Margin of Error',
                                              'Illinois!!Percent Female!!Estimate,'
                                              'Illinois!!Percent Female!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Male!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Margin of Error',
                                              'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Female!!Margin of Error'
                                                ])

In [None]:
# Drop NaN values
chicago_edu_df = chicago_edu_df.dropna()

In [None]:
# Rename columns for clarity
chicago_edu_df = chicago_edu_df.rename(columns={
    'Label (Grouping)': 'Age by Educational Attainment',
    'Illinois!!Total!!Estimate': 'Illinois Total Population',
    'Illinois!!Percent!!Estimate': 'Illinois Pop %',
    'Illinois!!Male!!Estimate': 'Illinois Male Population',
    'Illinois!!Percent Male!!Estimate': 'Illinois Male %',
    'Illinois!!Female!!Estimate': 'Illinois Female Population',
    'Illinois!!Percent Female!!Estimate': 'Illinois Female %',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate': 'Chicago Metro Total Population',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent!!Estimate': 'Chicago Metro %',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Estimate': 'Chicago Male Population',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Male!!Estimate': 'Chicago Male %',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Estimate': 'Chicago Female Population',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Female!!Estimate': 'Chicago Female %'
})

# Display the cleaned DataFrame
chicago_edu_df.head()

In [None]:
# save the cleaned DataFrame to a new CSV file
chicago_edu_df.to_csv('data_files/chicago_data/chi_cleaned_csv/cleaned_chi_edu.csv', index=False)

#### Found: Population Count by Age and Zipcode

In [None]:
# Import new csv file
chicago_pop_counts = '../chicago_data/Chicago_Population_Counts.csv'

# Read the csv file
chicago_pop_counts_df = pd.read_csv(chicago_pop_counts)
chicago_pop_counts_df.head()

In [None]:
# Clean data by droping any rows that do not have 2020 in the 'Year' column
chicago_pop_counts_2020_df = chicago_pop_counts_df[chicago_pop_counts_df['Year'] == 2020]
chicago_pop_counts_2020_df.head()

In [None]:
# list all columns in the dataframe
chicago_pop_counts_2020_df.columns

In [None]:
# Rename columns to get rid of population in the title. 
chicago_pop_counts_2020_df = chicago_pop_counts_2020_df.rename(columns={'Population - Total': 'Total', 
                                                                        'Population - Age 0-17': 'Age 0-17', 
                                                                        'Population - Age 18-29': 'Age 18-29',
                                                                        'Population - Age 30-39': 'Age 30-39',
                                                                        'Population - Age 40-49': 'Age 40-49',
                                                                        'Population - Age 50-59': 'Age 50-59',
                                                                        'Population - Age 60-69': 'Age 60-69',
                                                                        'Population - Age 70-79': 'Age 70-79',
                                                                        'Population - Age 80+': 'Age 80+',
                                                                        'Population - Female': 'Female',
                                                                        'Population - Male': 'Male',
                                                                        'Population - Latinx': 'Latinx',
                                                                        'Population - Asian Non-Latinx': 'Asian Non-Latinx',
                                                                        'Population - Black Non-Latinx': 'Black Non-Latinx',
                                                                        'Population - White Non-Latinx': 'White Non-Latinx',
                                                                        'Population - Other Non-Latinx': 'Other Non-Latinx',
                                                                })
chicago_pop_counts_2020_df.head()

In [None]:
# Drop columns that are not needed ('Population - Age 0-4', 'Population - Age 5-11', 'Population - Age 12-17', 'Population - Age 5+', 'Population - Age 18+', 'Population - Age 65+', "Record ID")

chicago_pop_counts_2020_df = chicago_pop_counts_2020_df.drop(columns=['Population - Age 0-4', 
                                                                      'Population - Age 5-11', 
                                                                      'Population - Age 12-17', 
                                                                      'Population - Age 5+', 
                                                                      'Population - Age 18+', 
                                                                      'Population - Age 65+', 
                                                                      'Record ID'])
chicago_pop_counts_2020_df.head()

In [None]:
# Save new cleaned data to a new csv file
chicago_pop_counts_2020_df.to_csv('data_files/chicago_data/chi_cleaned_csv', index=False)

## Economic Metric

**Measuring**: Median Household Income, Unemployment Rate, Poverty Rate.

Data for this section was pulled from the Chicago 2020 Census Database using various coded tables:
* Median Household Income: Table S1901
* Unemployment Rate: Table S2301
* Poverty Rate: S1501

##### Median Household Income (Table S1901)

In [None]:
# Import new csv file for Median Household Income
chicago_income_data = '../chicago_data/chi_median_income.csv'

# Read the csv file
chicago_income_df = pd.read_csv(chicago_income_data)
chicago_income_df.head()

In [None]:
# Display columns and data types
chicago_income_df.dtypes

In [None]:
# Drop unnecessary columns (i.e anything with "Margin of Error")
chicago_income_df = chicago_income_df.drop(columns=['Illinois!!Households!!Margin of Error', 
                                                    'Illinois!!Families!!Margin of Error', 
                                                    'Illinois!!Married-couple families!!Margin of Error',
                                                    'Illinois!!Nonfamily households!!Margin of Error',
                                                    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Households!!Margin of Error',
                                                    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Families!!Margin of Error',
                                                    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Married-couple families!!Margin of Error',
                                                    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Nonfamily households!!Margin of Error'
                                                    ])

In [None]:
# Rename columns for clarity
chicago_income_df = chicago_income_df.rename(columns={
    'Label (Grouping)': 'Income Grouping',
    'Illinois!!Households!!Estimate': 'Illinois Households',
    'Illinois!!Families!!Estimate': 'Illinois Families',
    'Illinois!!Married-couple families!!Estimate': 'Illinois Married-couple Households',
    'Illinois!!Nonfamily households!!Estimate': 'Illinois Nonfamily Households',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Households!!Estimate': 'Chicago Metro Households',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Families!!Estimate': 'Chicago Metro Families',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Married-couple families!!Estimate': 'Chicago Metro Married-couple Households',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Nonfamily households!!Estimate': 'Chicago Metro Nonfamily Households'
})

# Display the cleaned DataFrame
chicago_income_df.head()

In [None]:
# Save the cleaned DataFrame to a new CSV file
chicago_income_df.to_csv('../chicago_data/cleaned_chi_income.csv', index=False)

##### Unemployment Rate (Table S2301)

In [None]:
# Import new csv file for Unemployment Rate
chicago_employment_data = '../chicago_data/chi_employment_status.csv'

# Read the csv file
chicago_employment_df = pd.read_csv(chicago_employment_data)
chicago_employment_df.head()

In [None]:
# Display columns and data types
chicago_employment_df.dtypes

In [None]:
# Drop unnecessary columns (i.e anything with "Margin of Error")
chicago_employment_df = chicago_employment_df.drop(columns=['Illinois!!Total!!Margin of Error', 
                                                            'Illinois!!Labor Force Participation Rate!!Margin of Error', 
                                                            'Illinois!!Employment/Population Ratio!!Margin of Error',
                                                            'Illinois!!Unemployment rate!!Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Labor Force Participation Rate!!Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Employment/Population Ratio!!Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Unemployment rate!!Margin of Error'
                                                            ])

In [None]:
# Drop Nan Values
chicago_employment_df = chicago_employment_df.dropna()

In [None]:
# Rename columns for clarity
chicago_employment_df = chicago_employment_df.rename(columns={
    'Label (Grouping)': 'Age',
    'Illinois!!Total!!Estimate': 'Illinois Total',
    'Illinois!!Labor Force Participation Rate!!Estimate': 'Illinois Labor Force Participation Rate',
    'Illinois!!Employment/Population Ratio!!Estimate': 'Illinois Employment/Population Ratio',
    'Illinois!!Unemployment rate!!Estimate': 'Illinois Unemployment Rate',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate': 'Chicago Metro Total Population',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Labor Force Participation Rate!!Estimate': 'Chicago Metro Labor Force Participation Rate',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Employment/Population Ratio!!Estimate': 'Chicago Metro Employment/Population Ratio',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Unemployment rate!!Estimate': 'Chicago Metro Unemployment Rate'
})

# Display the cleaned DataFrame
chicago_employment_df.head()

In [None]:
# Save the cleaned DataFrame to a new CSV file
chicago_income_df.to_csv('../chicago_data/cleaned_chi_employment.csv', index=False)

#### Poverty Rate (S1501)

In [None]:
# Import new csv file for Poverty Rate
chicago_poverty_data = '../chicago_data/chi_poverty_status.csv'

# Read the csv file
chicago_poverty_df = pd.read_csv(chicago_poverty_data)
chicago_poverty_df.head()

In [None]:
# Display columns and data types
chicago_poverty_df.dtypes

In [None]:
# Drop unnecessary columns (i.e anything with "Margin of Error")
chicago_poverty_df = chicago_poverty_df.drop(columns=['Illinois!!Total!!Margin of Error', 
                                                       'Illinois!!Below poverty level!!Margin of Error', 
                                                       'Illinois!!Percent below poverty level!!Margin of Error',
                                                       'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error',
                                                       'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Below poverty level!!Margin of Error',
                                                       'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent below poverty level!!Margin of Error'
                                                       ])

In [None]:
# Delete NaN values
chicago_poverty_df = chicago_poverty_df.dropna()

In [None]:
# Rename columns
chicago_poverty_df = chicago_poverty_df.rename(columns={
    'Label (Grouping)': 'Age',
    'Illinois!!Total!!Estimate': 'Illinois Total Population',
    'Illinois!!Below poverty level!!Estimate': 'Illinois Below Poverty Level',
    'Illinois!!Percent below poverty level!!Estimate': 'Illinois % Below Poverty Level',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate': 'Chicago Metro Total Population',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Below poverty level!!Estimate': 'Chicago Metro Below Poverty Level',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent below poverty level!!Estimate': 'Chicago Metro % Below Poverty Level'
})

# Display the cleaned DataFrame
chicago_poverty_df.head()

In [None]:
# Save the cleaned DataFrame to a new CSV file
chicago_income_df.to_csv('../chicago_data/cleaned_chi_poverty.csv', index=False)

## Housing  Metric 

**Measuring**: Median Home Price, Median Rent, Homeownership Rate

Data for this section was pulled from the Chicago 2020 Census Database using the same table code from the American Community Survey by the U.S. Census Bureau (ACS). 
* Median Home Price: Table DP04
* Median Rent: Table DP04
* Homeownership Rate: Table DP04

In [None]:
# Import new csv file for Housing Metric
chicago_housing_data = '../chicago_data/chi_housing.csv'

# Read the csv file
chicago_housing_df = pd.read_csv(chicago_housing_data)
chicago_housing_df.head()


In [None]:
# Display columns and data types
chicago_housing_df.dtypes

In [None]:
# Drop unnecessary columns (i.e anything with "Margin of Error")
chicago_housing_df = chicago_housing_df.drop(columns=['Illinois!!Margin of Error',
                                                            'Illinois!!Percent Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Margin of Error',
                                                            'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent Margin of Error'])


In [None]:
# Delete NaN values from the DataFrame
chicago_housing_df = chicago_housing_df.dropna()

In [None]:
# Rename columns 
chicago_housing_df = chicago_housing_df.rename(columns={
    'Label (Grouping)': 'Housing Occupancy Status',
    'Illinois!!Estimate': 'Illinois Total Housing Units',
    'Illinois!!Percent': 'Illinois % Housing Units',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Estimate': 'Chicago Metro Total Housing Units',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Percent': 'Chicago Metro % Housing Units'
})

# Display the cleaned DataFrame
chicago_housing_df.head()

In [None]:
# Save the cleaned DataFrame to a new CSV file
chicago_income_df.to_csv('../chicago_data/cleaned_chi_housing.csv', index=False)

## Quality of Life Metric

**Measuring**: Commute Time 

Data was pulled from the Census Database using the following survey by ASC in 2020. 
* Commute Time: Table S0801 

In [None]:
# Import new csv file for Commute Time
chicago_commute_data = '../chicago_data/chi_commute.csv'

# Read the csv file
chicago_commute_df = pd.read_csv(chicago_commute_data)
chicago_commute_df.head()

In [None]:
# Display columns and data types
chicago_commute_df.dtypes

In [None]:
# Delete unnecessary columns (i.e anything with "Margin of Error")
chicago_commute_df = chicago_commute_df.drop(columns=['Illinois!!Total!!Margin of Error',
                                                        'Illinois!!Male!!Margin of Error',
                                                        'Illinois!!Female!!Margin of Error',
                                                        'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Margin of Error',
                                                        'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Margin of Error',
                                                        'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Margin of Error'
                                                        ])

In [None]:
# Delete NaN values
chicago_commute_df = chicago_commute_df.dropna()

In [None]:
# Rename columns for clarity
chicago_commute_df = chicago_commute_df.rename(columns={
    'Label (Grouping)': 'Means of Transportation',
    'Illinois!!Total!!Estimate': 'Illinois Total Commute Time',
    'Illinois!!Male!!Estimate': 'Male Total',
    'Illinois!!Female!!Estimate': 'Female Total',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Total!!Estimate': 'Chicago Metro Total',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Male!!Estimate': 'Chicago Male Total',
    'Chicago-Naperville-Elgin, IL-IN-WI Metro Area!!Female!!Estimate': 'Chicago Female Total'
})

# Display the cleaned DataFrame
chicago_commute_df.head()

In [None]:
# Save the cleaned DataFrame to a new CSV file
chicago_commute_df.to_csv('../chicago_data/cleaned_chi_commute.csv', index=False)

## THIS IS A DATASET COMPARING CHICAGO FROM 2020 TO 2024. MIGHT WANT TO KEEP.

In [None]:
## THIS IS A DATASET COMPARING CHICAGO FROM 2020 TO 2024. MIGHT WANT TO KEEP.
# Import csv file for chicago_data
chicago_data = '../chicago_data/chicago_demographics.csv'

# Read the csv file for chicago_data
chicago_data_df = pd.read_csv(chicago_data)
chicago_data_df.head()

In [None]:
# Filter the dataframe to only include rows with '2020' in the 'Fact' column
chicago_data_2020_df = chicago_data_df[chicago_data_df['Fact'].str.contains('2020')]
chicago_data_2020_df

In [None]:
# Clean the data by dropping the columns that are not needed (want to drop "Fact Note", "Value Note for Chicago City, Illinois") 
chicago_data_2020_df = chicago_data_2020_df.drop(columns=['Fact Note', 'Value Note for Chicago city, Illinois'])
chicago_data_2020_df.head()

In [None]:
# Drop the rows with NaN values
chicago_data_2020_df = chicago_data_2020_df.dropna()

In [None]:
chicago_data_2020_df.head()

In [None]:
# save the cleaned data to a new csv file
chicago_data_2020_df.to_csv('../chicago_data/cleaned_chicago_demographics_2020.csv', index=False)