# Load dependencies and libraries

In [None]:
# Import dependencies and libraries
from IPython.display import clear_output
import pandas as pd
from census import Census
from us import states
from dotenv import load_dotenv
import matplotlib.pyplot as plt
# from prophet import Prophet # Uncomment this line if you want to use Facebook Prophet for time series forecasting
import requests
import sys
import csv
import os
import json
import time

# Load development mode
try:
  dev = os.getenv("DEV_MODE")
  if dev: print("Development mode loaded successfully!")
except:
  print("Error loading development mode")
  print("Defaulting to production mode")
  dev = False

# Load environment variables and U.S. Census API key
if dev: print("Loading environment variables...")
load_dotenv()
if dev: print("Environment variables loaded successfully!")
try:
  if dev: print("Loading U.S. Census API key...")
  api_key = os.getenv("CENSUS_API_KEY")
  if dev: print("API key loaded successfully!")
except:
  print("Error loading U.S. Census API key")
  print("Please check your environment variables")
  

# Data Gathering

## Census Data

### Loading Census data

In [None]:
# Load the final census data from the CSV file
try:
  if dev: print("Loading final census data from the CSV file...")
  census_df = pd.read_csv("data/census/clean_census_data.csv")
  if dev: display(census_df.head())
  print("Final census data loaded from file successfully!")
except:
  print("Error loading final census data from file, please make sure the file exists.")


### Cleaning Census data

In [None]:
urban_county_names = ["Denver County, Colorado", "Jefferson County, Colorado", "El Paso County, Colorado", "Adams County, Colorado", "Arapahoe County, Colorado"]
rural_county_names = ["Hinsdale County, Colorado", "San Juan County, Colorado", "Mineral County, Colorado", "Jackson County, Colorado", "Kiowa County, Colorado"]


#### Cleaning Rural County data

In [None]:
# filter the data to include data from a list of counties
rural_employment_poverty_df = census_df[census_df["County"].isin(rural_county_names)]
rural_employment_poverty_df = rural_employment_poverty_df[["Year", "Poverty Count", "Number of Employed (16+)", "Number of Unemployed (16+)"]]
rural_employment_poverty_df = rural_employment_poverty_df.groupby("Year").mean()
display(rural_employment_poverty_df.head())


#### Cleaning Urban County data

In [None]:
# filter the data to include data from a list of counties
urban_employment_poverty_df = census_df[census_df["County"].isin(urban_county_names)]
urban_employment_poverty_df = urban_employment_poverty_df[["Year", "Poverty Count", "Number of Employed (16+)", "Number of Unemployed (16+)"]]
urban_employment_poverty_df = urban_employment_poverty_df.groupby("Year").mean()
display(urban_employment_poverty_df.head())


#### Cleaning Overall Census data

In [None]:
# Filter out unused data
employment_poverty_df = census_df[["Poverty Count", "Number of Employed (16+)", "Number of Unemployed (16+)"]].groupby(census_df["Year"]).mean()
employment_poverty_df


In [None]:
# See the distribution of the data
census_df[["Poverty Count", "Number of Employed (16+)", "Number of Unemployed (16+)"]].groupby(census_df["Year"]).describe()


## PIT Data

### Import PIT data

In [None]:
# Import PIT data into a DataFrame
try:
  if dev: print("Loading PIT data...")
  raw_pit_data_df = pd.read_excel("data/pit/PIT_CO_2007-2023.xlsx")
  if dev: display(raw_pit_data_df.head())
  print("PIT data loaded successfully!")

except:
  print("Error loading PIT data from file, please make sure the file exists.")


### Cleaning PIT data

In [None]:
# Copy PIT data to a new DataFrame
if dev: print("Copying PIT data to a new DataFrame...")
co_pit_data_df = raw_pit_data_df.copy()
# Select columns we want the data for
if dev: print("Filtering DataFrame columns...")
co_pit_data_df = co_pit_data_df[['Year', 'Overall Homeless',]]
# Filter to the years we are interested in
if dev: print("Filtering DataFrame years...")
co_pit_data_df = co_pit_data_df.iloc[4:15]
# Reset the index
if dev: print("Resetting index...")
co_pit_data_df.reset_index(drop=True, inplace=True)
display(co_pit_data_df)


## Homeless Students Data

### Load Homeless Students data

In [None]:
# Import School data into a DataFrame
try:
  if dev: print("Loading school data from the CSV file...")
  raw_school_data_df = pd.read_excel("data/schools/combined_2011-2021.xlsx")

  if dev: display(raw_school_data_df.head())
  print("School data loaded from file successfully!")
except:
  print("Error loading school data from file, please make sure the file exists.")


### Cleaning Homeless Students data

In [None]:
# Copy the School data to a new DataFrame
if dev: print("Copying the School data to a new DataFrame...")
school_data_df = raw_school_data_df.copy()
# Sort the DataFrame by Name and Year
if dev: print("Sorting the DataFrame by County and Year...")
school_data_df.sort_values(by=["County", "Year"], inplace=True)
# Reset the index of the DataFrame
school_data_df.reset_index(inplace=True, drop=True)
# Display the DataFrame
if dev: print("New DataFrame:")
display(school_data_df.head())


In [None]:
urban_county_names = ["DENVER", "JEFFERSON", "EL PASO", "ADAMS", "ARAPAHOE"]
rural_county_names = ["HINSDALE", "SAN JUAN", "MINERAL", "JACKSON", "KIOWA"]


#### Colorado Overall

In [None]:
# Create another dataframe containing the average number of students per year
if dev: print("Creating a new DataFrame containing the average number of students per year...")
co_school_avg_df = school_data_df[["Year", "TOTAL"]].groupby(school_data_df["Year"]).mean()
# Convert the Year and TOTAL columns to integer and float, respectively
if dev: print("Converting columns...")
co_school_avg_df[['Year']] = co_school_avg_df[['Year']].astype(int)
co_school_avg_df[['TOTAL']] = co_school_avg_df[['TOTAL']].round(2)
# Rename the columns
if dev: print("Renaming columns...")
co_school_avg_df.rename(columns={"TOTAL": "Average Homeless Students"}, inplace=True)
# Reset the index of the DataFrame
if dev: print("Resetting the index...")
co_school_avg_df.reset_index(inplace=True, drop=True)
display(co_school_avg_df)


#### Rural Counties

In [None]:
# Filter the data to include data from a list of counties
if dev: print("Filtering rural counties...")
rural_homeless_students_df = school_data_df[school_data_df["County"].isin(rural_county_names)]
# Filter columns
if dev: print("Filtering columns...")
rural_homeless_students_df = rural_homeless_students_df[["Year", "TOTAL"]]
# Convert the Year and TOTAL columns to integer and float, respectively
if dev: print("Converting columns...")
rural_homeless_students_df[['Year']] = rural_homeless_students_df[['Year']].astype(int)
rural_homeless_students_df[['TOTAL']] = rural_homeless_students_df[['TOTAL']].round(2)
# Rename the columns
if dev: print("Renaming columns...")
rural_homeless_students_df.rename(columns={"TOTAL": "Average Homeless Students"}, inplace=True)
# Get the average number of homeless students per year
rural_homeless_students_df = rural_homeless_students_df.groupby("Year").mean()
display(rural_homeless_students_df)


#### Urban Counties

In [None]:
# Filter the data to include data from a list of counties
if dev: print("Filtering urban counties...")
urban_homeless_students_df = school_data_df[school_data_df["County"].isin(urban_county_names)]
# Filter columns
if dev: print("Filtering columns...")
urban_homeless_students_df = urban_homeless_students_df[["Year", "TOTAL"]]
# Convert the Year and TOTAL columns to integer and float, respectively
if dev: print("Converting columns...")
urban_homeless_students_df[['Year']] = urban_homeless_students_df[['Year']].astype(int)
urban_homeless_students_df[['TOTAL']] = urban_homeless_students_df[['TOTAL']].round(2)
# Rename the columns
if dev: print("Renaming columns...")
urban_homeless_students_df.rename(columns={"TOTAL": "Average Homeless Students"}, inplace=True)
# Get the average number of homeless students per year
urban_homeless_students_df = urban_homeless_students_df.groupby("Year").mean()
display(urban_homeless_students_df)


## Combining PIT & Homeless Students Data

### Colorado Overall

In [None]:
# Merge the PIT data with the School data
if dev: print("Merging the PIT data with the School data...")
co_homeless_df = pd.merge(co_pit_data_df, co_school_avg_df, on="Year")
# Set the Year column as the index
if dev: print("Setting the Year column as the index...")
co_homeless_df.set_index("Year", inplace=True)
display(co_homeless_df)


### Rural Counties

In [None]:
# Merge the PIT data with the School data
if dev: print("Merging the PIT data with the School data...")
rural_homeless_df = pd.merge(co_pit_data_df, rural_homeless_students_df, on="Year")
# Set the Year column as the index
if dev: print("Setting the Year column as the index...")
rural_homeless_df.set_index("Year", inplace=True)
display(rural_homeless_df)


### Urban Counties

In [None]:
# Merge the PIT data with the School data
if dev: print("Merging the PIT data with the School data...")
urban_homeless_df = pd.merge(co_pit_data_df, urban_homeless_students_df, on="Year")
# Set the Year column as the index
if dev: print("Setting the Year column as the index...")
urban_homeless_df.set_index("Year", inplace=True)
display(urban_homeless_df)


# Graphs

## Colorado PIT

### Colorado Overall

In [None]:
# Display the plot
co_homeless_df.plot(y='Overall Homeless', kind='line', title='Overall Homeless in Colorado (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/co_homeless_line.png")


In [None]:
# Display the plot
co_homeless_df.plot(y='Overall Homeless', kind='bar', title='Overall Homeless in Colorado (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/co_homeless_bar.png")


## Homeless Students

### Colorado Overall

In [None]:
# Display the plot
co_homeless_df.plot(y='Average Homeless Students', kind='line', title='Avg Homeless Students in Colorado (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/avg_homeless_students_line.png")


In [None]:
# Display the plot
co_homeless_df.plot(y='Average Homeless Students', kind='bar', title='Avg Homeless Students in Colorado (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/avg_homeless_students_bar.png")


### Rural Counties

In [None]:
# Display the plot
rural_homeless_df.plot(y='Average Homeless Students', kind='line', title='Avg Homeless Students in Rural Counties (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/rural_avg_homeless_students_line.png")


In [None]:
# Display the plot
rural_homeless_df.plot(y='Average Homeless Students', kind='bar', title='Avg Homeless Students in Rural Counties (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/rural_avg_homeless_students_bar.png")


### Urban Counties

In [None]:
# Display the plot
urban_homeless_df.plot(y='Average Homeless Students', kind='line', title='Avg Homeless Students in Urban Counties (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/urban_avg_homeless_students_line.png")


In [None]:
# Display the plot
urban_homeless_df.plot(y='Average Homeless Students', kind='bar', title='Avg Homeless Students in Urban Counties (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/urban_avg_homeless_students_bar.png")


## Colorado PIT vs Homeless Students

### Colorado Overall

In [None]:
# Display the plot
co_homeless_df.plot(y=['Overall Homeless', 'Average Homeless Students'], kind='line', title='Overall Homeless vs. Avg Homeless Students in Colorado (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/co_homeless_vs_avg_homeless_students_line.png")


In [None]:
# Display the plot
co_homeless_df.plot(y=['Overall Homeless', 'Average Homeless Students'], kind='bar', title='Overall Homeless vs. Avg Homeless Students in Colorado (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/co_homeless_vs_avg_homeless_students_bar.png")


### Rural Counties

In [None]:
# Display the plot
rural_homeless_df.plot(y=['Overall Homeless', 'Average Homeless Students'], kind='line', title='Overall Homeless vs. Avg Homeless Students in Rural Counties (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/rural_homeless_vs_avg_homeless_students_line.png")


In [None]:
# Display the plot
rural_homeless_df.plot(y=['Overall Homeless', 'Average Homeless Students'], kind='bar', title='Overall Homeless vs. Avg Homeless Students in Rural Counties (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/rural_homeless_vs_avg_homeless_students_bar.png")


### Urban Counties

In [None]:
# Display the plot
urban_homeless_df.plot(y=['Overall Homeless', 'Average Homeless Students'], kind='line', title='Overall Homeless vs. Avg Homeless Students in Urban Counties (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/urban_homeless_vs_avg_homeless_students_line.png")


In [None]:
# Display the plot
urban_homeless_df.plot(y=['Overall Homeless', 'Average Homeless Students'], kind='bar', title='Overall Homeless vs. Avg Homeless Students in Urban Counties (2011-2021)')
# Save the plot to a file
plt.savefig("images/michael/urban_homeless_vs_avg_homeless_students_bar.png")


## Employment

### Colorado Overall

In [None]:
# Display the plot
employment_poverty_df.plot(y=["Number of Employed (16+)", "Number of Unemployed (16+)"], kind="line", title="Employment (16+) in Colorado (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/employment_line.png")


In [None]:
# Display the plot
employment_poverty_df.plot(y=["Number of Employed (16+)", "Number of Unemployed (16+)"], kind="bar", title="Employment (16+) in Colorado (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/employment_bar.png")


### Rural Counties

In [None]:
# Display the plot
rural_employment_poverty_df.plot(y=["Number of Employed (16+)", "Number of Unemployed (16+)"], kind="line", title="Employment (16+) in Rural Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/rural_employment_line.png")


In [None]:
# Display the plot
rural_employment_poverty_df.plot(y=["Number of Employed (16+)", "Number of Unemployed (16+)"], kind="bar", title="Employment (16+) in Rural Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/rural_employment_bar.png")


### Urban Counties

In [None]:
# Display the plot
urban_employment_poverty_df.plot(y=["Number of Employed (16+)", "Number of Unemployed (16+)"], kind="line", title="Employment (16+) in Urban Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/urban_employment_line.png")


In [None]:
# Display the plot
urban_employment_poverty_df.plot(y=["Number of Employed (16+)", "Number of Unemployed (16+)"], kind="bar", title="Employment (16+) in Urban Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/urban_employment_bar.png")


## Poverty

### Colorado Overall

In [None]:
# Display the plot
employment_poverty_df.plot(y="Poverty Count", kind="line", title="Poverty Count in Colorado (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/poverty_count_line.png")


In [None]:
# Display the plot
employment_poverty_df.plot(y="Poverty Count", kind="bar", title="Poverty Count in Colorado (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/poverty_count_bar.png")


### Rural Counties

In [None]:
# Display the plot
rural_employment_poverty_df.plot(y="Poverty Count", kind="line", title="Poverty Count in Rural Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/rural_poverty_count_line.png")


In [None]:
# Display the plot
rural_employment_poverty_df.plot(y="Poverty Count", kind="bar", title="Poverty Count in Rural Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/rural_poverty_count_bar.png")


### Urban Counties

In [None]:
# Display the plot
urban_employment_poverty_df.plot(y="Poverty Count", kind="line", title="Poverty Count in Urban Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/urban_poverty_count_line.png")


In [None]:
# Display the plot
urban_employment_poverty_df.plot(y="Poverty Count", kind="bar", title="Poverty Count in Urban Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/urban_poverty_count_bar.png")


## Employment vs Poverty

### Colorado Overall

In [None]:
# Display the plot
employment_poverty_df.plot(y=["Poverty Count", "Number of Employed (16+)", "Number of Unemployed (16+)"], kind="line", title="Poverty Count vs. Employment (16+) in Colorado (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/poverty_count_vs_employment_line.png")


In [None]:
# Display the plot
employment_poverty_df.plot(y=["Poverty Count", "Number of Employed (16+)", "Number of Unemployed (16+)"], kind="bar", title="Poverty Count vs. Employment (16+) in Colorado (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/poverty_count_vs_employment_bar.png")


### Urban Counties

In [None]:
# Display the plot
urban_employment_poverty_df.plot(y=["Poverty Count", "Number of Employed (16+)", "Number of Unemployed (16+)"], kind="line", title="Poverty Count vs. Employment (16+) in Urban Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/urban_poverty_count_vs_employment_line.png")


In [None]:
# Display the plot
urban_employment_poverty_df.plot(y=["Poverty Count", "Number of Employed (16+)", "Number of Unemployed (16+)"], kind="bar", title="Poverty Count vs. Employment (16+) in Urban Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/urban_poverty_count_vs_employment_bar.png")


### Rural Counties

In [None]:
# Display the plot
rural_employment_poverty_df.plot(y=["Poverty Count", "Number of Employed (16+)", "Number of Unemployed (16+)"], kind="line", title="Poverty Count vs. Employment (16+) in Rural Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/rural_poverty_count_vs_employment_line.png")


In [None]:
# Display the plot
rural_employment_poverty_df.plot(y=["Poverty Count", "Number of Employed (16+)", "Number of Unemployed (16+)"], kind="bar", title="Poverty Count vs. Employment (16+) in Rural Counties (2011-2021)")
# Save the plot to a file
plt.savefig("images/michael/rural_poverty_count_vs_employment_bar.png")
