# Phoenix Dataset

In [51]:
# Import dependencies
import os
import csv
import pandas as pd
import json
from pathlib import Path

## Demographic Metric

**Measuring**: ACS Population Density, Median Age, Education Level

Data was pulled from the 2020 Phoenix Census Database and the American Community Survey (ACS) by the U.S. Census Bureau using various coded tables to ensure unified data for each metropolitan city. Two tables used the same code and were seperated to ensure proper measurement. 
* ACS Population Density: Table DP05
* Median Age: Table DP05
* Education Level: S1501

##### ACS Population Density & Median Age (Table DP05)

In [52]:
# Import csv file for ACS Population Density
# Read the CSV file into a DataFrame
phx_age_df = pd.read_csv("../phoenix_data/phx_new/phx_age.csv")
phx_age_df.head()

Unnamed: 0,Label (Grouping),SEX AND AGE,SEX AND AGE!!Total population,SEX AND AGE!!Total population!!Under 5 years,SEX AND AGE!!Total population!!5 to 9 years,SEX AND AGE!!Total population!!10 to 14 years,SEX AND AGE!!Total population!!15 to 19 years,SEX AND AGE!!Total population!!20 to 24 years,SEX AND AGE!!Total population!!25 to 34 years,SEX AND AGE!!Total population!!35 to 44 years,SEX AND AGE!!Total population!!45 to 54 years,SEX AND AGE!!Total population!!55 to 59 years,SEX AND AGE!!Total population!!60 to 64 years,SEX AND AGE!!Total population!!65 to 74 years,SEX AND AGE!!Total population!!75 to 84 years,SEX AND AGE!!Total population!!85 years and over,SEX AND AGE!!Total population!!Median age (years)
0,"Maricopa County, Arizona",,,,,,,,,,,,,,,,
1,Estimate,,4412779.0,277315,285526,308049,296440,293079,645693,574796,552339,265441,243005,389119,202237,79740,36.6
2,Percent,,4412779.0,6.3%,6.5%,7.0%,6.7%,6.6%,14.6%,13.0%,12.5%,6.0%,5.5%,8.8%,4.6%,1.8%,(X)


In [53]:
phx_age_df = phx_age_df.drop(columns=['SEX AND AGE'])
phx_age_df.columns = ['',
  'Total Population',
  'Under 5',
  '5 to 9',
  '10 to 14',
  '15 to 19',
  '20 to 24',
  '25 to 34',
  '35 to 44',
  '45 to 54',
  '55 to 59',
  '60 to 64',
  '65 to 74',
  '75 to 84',
  '85 and Over',
  'Median Age'
]
phx_age_df = phx_age_df.dropna()

In [54]:
# Save the cleaned DataFrame to a new CSV file
phx_age_df.to_csv("../phoenix_data/phx_cleaned_csv/cleaned_phx_age.csv", index = False)

In [55]:
phx_race_df = pd.read_csv("../phoenix_data/phx_new/phx_race.csv")

phx_race_df.columns = ['', 
                       'Total Population',
                           'White', 
                           'Black or African American', 
                           'American Indian and Alaska Native', 
                           'Asian', 
                           'Native Hawaiian and Other Pacific Islander', 
                           'Other']

phx_race_df = phx_race_df.dropna()

phx_race_df.to_csv("../phoenix_data/phx_cleaned_csv/cleaned_phx_race.csv", index = False)

Note: will have to seperate the pop density and median age eventual...

#### Education Level (Table S1501)

In [56]:
# Import csv file for edu level 
# Read the CSV file into a DataFrame
phx_edu_df = pd.read_csv("../phoenix_data/phx_new/phx_edu_level.csv")
phx_edu_df

Unnamed: 0,Label (Grouping),"Maricopa County, Arizona!!Total!!Estimate","Maricopa County, Arizona!!Male!!Estimate","Maricopa County, Arizona!!Female!!Estimate"
0,AGE BY EDUCATIONAL ATTAINMENT,,,
1,Population 18 to 24 years,409391,209981,199410
2,Less than high school graduate,56172,31902,24270
3,High school graduate (includes equival...,141276,76600,64676
4,Some college or associate's degree,167314,82048,85266
...,...,...,...,...
63,Less than high school graduate,25640,29024,20741
64,High school graduate (includes equival...,32143,36288,27372
65,Some college or associate's degree,40064,45402,34627
66,Bachelor's degree,56667,70031,48773


In [57]:
# Source data (filter only what is needed out of dataset)
phx_edu_df = phx_edu_df.iloc[:28]
phx_edu_df.head()
# Rename columns for clarity 
phx_edu_df.columns = ['Age by Educational Attainment',
                          'Total', 
                            'Male',
                            'Female']
phx_edu_df = phx_edu_df.dropna()

    quesiton to ask: will we want to seperate this further??? 

In [58]:
# save as new csv file with cleaned data
phx_edu_df.to_csv("../phoenix_data/phx_cleaned_csv/cleaned_phx_edu_level.csv", index = False)

## Economic Metric

**Measuring**: Median Household Income, Unemployment Rate, Poverty Rate.

Data for this section was pulled from the __YOUR CITY__ 2020 Census Database using various coded tables:
* Median Household Income: Table S1901
* Unemployment Rate: Table S2301
* Poverty Rate: S1501

##### Median Household Income (Table S1901)

In [59]:
# Import csv file from Median Household Income


# Read the CSV file into a DataFrame
phx_income_df = pd.read_csv("../phoenix_data/phx_new/phx_median_income.csv")
phx_income_df.head()

Unnamed: 0,Label (Grouping),"Maricopa County, Arizona!!Households!!Estimate","Maricopa County, Arizona!!Families!!Estimate","Maricopa County, Arizona!!Married-couple families!!Estimate","Maricopa County, Arizona!!Nonfamily households!!Estimate"
0,Total,1596784,1047899,763885,548885
1,"Less than $10,000",5.0%,3.4%,1.6%,9.5%
2,"$10,000 to $14,999",3.1%,1.9%,1.0%,5.7%
3,"$15,000 to $24,999",7.3%,5.4%,3.4%,11.9%
4,"$25,000 to $34,999",8.2%,6.6%,4.7%,12.1%


In [60]:
phx_income_df = phx_income_df.iloc[:13]
phx_income_df.columns = ['',
                             'Household', 
                              'Families', 
                              'Married-couple Households', 
                              'Nonfamily Households']
phx_income_df.head()

Unnamed: 0,Unnamed: 1,Household,Families,Married-couple Households,Nonfamily Households
0,Total,1596784,1047899,763885,548885
1,"Less than $10,000",5.0%,3.4%,1.6%,9.5%
2,"$10,000 to $14,999",3.1%,1.9%,1.0%,5.7%
3,"$15,000 to $24,999",7.3%,5.4%,3.4%,11.9%
4,"$25,000 to $34,999",8.2%,6.6%,4.7%,12.1%


In [61]:
# Save as new csv file with cleaned data
phx_income_df.to_csv("../phoenix_data/phx_cleaned_csv/cleaned_phx_income.csv", index = False)

##### Unemployment Rate (Table S2301)

In [62]:
# Import csv file from unemployment rate

# Read the CSV file into a DataFrame
phx_unemployment_df = pd.read_csv("../phoenix_data/phx_new/phx_unemployment_rate.csv")
phx_unemployment_df.head()

Unnamed: 0,Label (Grouping),"Maricopa County, Arizona!!Total!!Estimate","Maricopa County, Arizona!!Labor Force Participation Rate!!Estimate","Maricopa County, Arizona!!Employment/Population Ratio!!Estimate","Maricopa County, Arizona!!Unemployment rate!!Estimate"
0,Population 16 years and over,3482330.0,64.6%,61.1%,5.1%
1,AGE,,,,
2,16 to 19 years,236881.0,41.1%,33.4%,18.4%
3,20 to 24 years,293079.0,80.1%,73.1%,8.1%
4,25 to 29 years,334251.0,83.7%,78.7%,5.6%


In [63]:
# Filter the DataFrame to only include rows 0-11
phx_unemployment_age_df = phx_unemployment_df.iloc[0:12]

# Display the filtered DataFrame
phx_unemployment_age_df.head()
# Rename columns for clarity
phx_unemployment_df.columns = ['Age Group', 
                                   'Total', 
                                   'Labor Force Participation', 
                                   'Employment Population Ratio', 
                                   'Unemployment Rate']
phx_unemployment_df.head()
phx_unemployment_df = phx_unemployment_df.dropna()

In [64]:
# save as new csv file with cleaned data
phx_unemployment_df.to_csv("../phoenix_data/phx_cleaned_csv/cleaned_phx_unemployment.csv", index = False)

## Housing Metric

**Measuring**: Median Home Price, Median Rent, Homeownership Rate

Data for this section was pulled from the ___YOUR CITY___ 2020 Census Database using the same table code from the American Community Survey by the U.S. Census Bureau (ACS). 
* Median Home Price: Table DP04
* Median Rent: Table DP04
* Homeownership Rate: Table DP04

In [65]:
# Import csv file from phx housing metrics


# Read the CSV file into a DataFrame
phx_rent_df = pd.read_csv("../phoenix_data/phx_new/phx_rent.csv")

    NOTE: will need to seperate all these out.

In [66]:
phx_rent_df.columns = ['',
                             'Total Housing Units',
                             'Occupied Housing Units Paying Rent',
                             'Less than $500',
                             '$500 to $999',
                             '$1,000 to $1,499',
                             '$1,500 to $1,999',
                             '$2,000 to $2,499',
                             '$2,500 to $2,999',
                             '$3,000 or more',
                             'Median Rent Price',
                             'No rent paid'
]
phx_rent_df = phx_rent_df.dropna()

In [67]:
# save as new csv file with cleaned data
phx_rent_df.to_csv("../phoenix_data/phx_cleaned_csv/cleaned_phx_rent.csv", index = False)

## Quality of Life Metric

**Measuring**: Commute Time 

Data was pulled from the Census Database using the following survey by ASC in 2020. 
* Commute Time: Table S0801 

In [68]:
# Import csv file from commute time

# Read the CSV file into a DataFrame
phx_commute_df = pd.read_csv("../phoenix_data/phx_new/phx_commute.csv")

In [69]:
phx_commute_df = phx_commute_df.iloc[:14]
phx_commute_df.columns = ['',
                              'Total Estimate',
                              'Male',
                              'Female'
                              ]
phx_commute_df.head()
phx_commute_df = phx_commute_df.dropna()

In [70]:
# Save as new csv file with cleaned data
phx_commute_df.to_csv("../phoenix_data/phx_cleaned_csv/cleaned_phx_commute.csv", index = False)

### Create JSON


In [71]:
# Load the datasets from phx 
phx_commute_csv = Path('../phoenix_data/phx_cleaned_csv/cleaned_phx_commute.csv')
phx_unemployment_csv = Path('../phoenix_data/phx_cleaned_csv/cleaned_phx_unemployment.csv')
phx_rent_csv = Path('../phoenix_data/phx_cleaned_csv/cleaned_phx_rent.csv')
phx_income_csv = Path('../phoenix_data/phx_cleaned_csv/cleaned_phx_income.csv')
phx_edu_csv = Path('../phoenix_data/phx_cleaned_csv/cleaned_phx_edu_level.csv')
phx_age_csv = Path('../phoenix_data/phx_cleaned_csv/cleaned_phx_age.csv')
phx_race_csv = Path('../phoenix_data/phx_cleaned_csv/cleaned_phx_race.csv')

# Read the csv files
phx_commute_df = pd.read_csv(phx_commute_csv)
phx_unemployment_df = pd.read_csv(phx_unemployment_csv)
phx_rent_df = pd.read_csv(phx_rent_csv)
phx_income_df = pd.read_csv(phx_income_csv)
phx_edu_df = pd.read_csv(phx_edu_csv)
phx_age_df = pd.read_csv(phx_age_csv)
phx_race_df = pd.read_csv(phx_race_csv)

In [72]:
# Combine all data into a dictionary
phx_data = {
    "commute": phx_commute_df.to_dict(orient='records'),
    "unemployment": phx_unemployment_df.to_dict(orient='records'),
    "rent": phx_rent_df.to_dict(orient='records'),
    "income": phx_income_df.to_dict(orient='records'),
    "education": phx_edu_df.to_dict(orient='records'),
    "age": phx_age_df.to_dict(orient='records'),
    "race": phx_race_df.to_dict(orient='records')
}

In [73]:
# Save the dictionary as a JSON file
with open('../phoenix_data/phx_data.json', 'w') as json_file:
    json.dump(phx_data, json_file)