# Urban Growth Boundary (UGB)

In [None]:
#import dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as stats
from scipy.stats import linregress
import gmaps

#import API keys: 
from config import api_key

#save data in a CSV
output_file = "output_data/ugb.csv"


In [None]:
# Start of code for Question 1: housing types

In [None]:
# Input cleaned housing data as data frame
housing = pd.read_csv("../Data/HousingTypes_Cleaned.csv")

### Question 1 - Part A: Do cities with a UGB have more multi-unit structures than non-UGB cities?

In [None]:
# Grouping by UGB identifier
units = housing.groupby(["UGB"]).sum()
# Calculating percentages
units = units.div(units["Total Response"], axis=0) * 100

units

In [None]:
# Creating a summary graph of responses 
# Reorganizing data frame for graphing
# Dropping extra columns
units_graph = units.drop(columns=["Total Response", "Median Housing Costs", "MOE Housing Costs"])
# Tranpose data frame
graph_units = units_graph.transpose()
# Reset index
graph_structure = graph_units.reset_index()
graph_structure

In [None]:
# Setting the positions and width for the bars
pos = list(range(len(graph_structure["No"])))
width= 0.25

# X tick labels
x_labels = ("1 Unit", "2 Units", "3 or 4 Units", "5 to 9 Units", "10 to 19 Units", "20 to 49 Units", "50 or more Units", "Other")

# Plotting the bars
fig, ax = plt.subplots(figsize=(20,10))

# Create bars for Non UGB
plt.bar(pos, graph_structure["No"], width, color="blue", label=graph_structure['index'][0])
# Create bars for UGB
plt.bar([p + width for p in pos], graph_structure["Yes"], width, color="red", label=graph_structure['index'][1])

# Set x axis
ax.set_xticks([p + .5 * width for p in pos])
ax.set_xticklabels(x_labels, fontsize=16)
plt.xlim(min(pos)-width*2, max(pos)+width*3)

# Set y axis and labels
ax.tick_params(axis='y', labelsize=14)
plt.ylim(0,60)
plt.ylabel("Percent", fontsize=16)

# Add title and legend
plt.title("Percent of Responses for Units in Structure by UGB Cities", fontsize=20)
plt.legend(["Non UGB Cities", "UGB Cities"], fontsize=18)

# Save graph
plt.savefig("../Images/UnitsinStructure.png")

# Sizing graph to frame and displaying
plt.tight_layout()
plt.show()

In [None]:
# Multi-unit structures
# New data frame
multiunits = housing.copy()
multiunits

# Creating single and multi unit variables
multiunits["Single Unit"] = multiunits["1 unit"]
multiunits["Multi Units"] = multiunits["2 units"] + multiunits["3 or 4 units"] + multiunits["5 to 9 units"]\
+ multiunits["10 to 19 units"] + multiunits["20 to 49 units"] + multiunits["50 or more units"]

multiunits
# Cleaned data frame
multi_units = multiunits[["City", "State", "UGB", "Single Unit", "Multi Units", "Total Response"]]
multi_units

In [None]:
# Variables for t-test
sample_ugb = multi_units.loc[multi_units["UGB"] == "Yes"]
multi_ugb = sample_ugb["Multi Units"]
sample_non = multi_units.loc[multi_units["UGB"] == "No"]
multi_non = sample_non["Multi Units"]

# Independent T-test
stats.ttest_ind(multi_ugb, multi_non, equal_var=False)

### Question 1 - Part B: Do cities with a UGB have higher overall cost of housing than non-UGB cities?

In [None]:
# Grouping by UGB identifier
costs = housing[["City", "State", "UGB", "Median Housing Costs", "MOE Housing Costs"]]
costs

In [None]:
# Variables for t-test
costs_ugb = costs.loc[costs["UGB"] == "Yes"]
ugb_cost = costs_ugb["Median Housing Costs"]
costs_non = costs.loc[costs["UGB"] == "No"]
non_cost = costs_non["Median Housing Costs"]

# Independent T-test
stats.ttest_ind(ugb_cost, non_cost, equal_var=False)

In [None]:
# Creating a Box plot to view data distribution for ugb and non-ugb
# Creating simple data frame to graph from
costs_box = costs[["UGB", "Median Housing Costs"]].copy()
costs_box.set_index("UGB", inplace=True)

# Creating list of values for each box
ugb_box = list(costs_box.loc["Yes", "Median Housing Costs"])
non_ugb = list(costs_box.loc["No", "Median Housing Costs"])

# Creating variable list to pass to boxplot
cities_box = [ugb_box, non_ugb]

# Specifying outlier dots and median line
flierprops = dict(markerfacecolor='black', marker='o', markersize=12)
medianprops = dict(linestyle='-', color="blue", linewidth=2.5)

# Generating figure and drawing boxplot
fig, ax = plt.subplots(figsize=(15,10))
bp = ax.boxplot(cities_box, flierprops=flierprops, medianprops=medianprops)

# Updating y axis
ax.tick_params(axis='y', labelsize=14)
plt.ylim(500,3000)

#Adding title and labels
ax.set_title("Distribution of Median Housing Costs", fontsize=20)
ax.set_ylabel("Median Housing Prices ($)", fontsize=18)
ax.set_xticklabels(["UGB Cities", "Non UGB Cities"], fontsize=18)

# Save graph
plt.savefig("../Images/MedianHousing.png")

# Adjust to frame and display
plt.tight_layout()
plt.show()

In [None]:
# End of code for Question 1: housing types

Question 2: Do Residents in UGB Cities use Public Transportation more often than those in other cities?

In [None]:
ransport_data = pd.read_csv("transportation_data_use_this_one.csv", encoding="ISO-8859-1")
transport_data.head(20)

In [None]:
# End of code for Question 2: public transportation

In [None]:
# Start of code for Question 3: population growth

In [None]:
# End of code for Question 3: population growth

# Question 4:  Do cities with a UGB have a higher population density than U.S. cities?


In [None]:
#format top 100 cities dataframe
populationcsv = "../Data/top_100clean.csv"
population = pd.read_csv(populationcsv)
population = population.rename(columns = {"Persons by km2": "Persons per km2", "2010 Census": "Census Population"})
population.set_index('City', inplace=True)
population

In [None]:
#ugb dataframe
ugb = population.loc[population["UGB "] == "Yes", :]
ugb

In [None]:
#non-ugb cities dataframe
non_ugb = population.loc[population["UGB "] == "No", :]

In [None]:
#set density variables
non_ugb_density = non_ugb["Persons per km2"]
density = population["Persons per km2"]
ugb_density = ugb["Persons per km2"]

In [None]:
#summary statistics
ugb_density_mean = ugb_density.mean()
density_mean = density.mean()
ugb_density_median = ugb_density.median()
density_median = density.median()
min_dens = density.min()
max_dens = density.max()
min_ugb = ugb_density.min()
max_ugb = ugb_density.max()

In [None]:
#summary statistics dataframe
density_summary = pd.DataFrame({'Mean Density':[density_mean], 'Median Density': [density_median], "Minimun Density": [min_dens],
                                "Maximum Density": [max_dens]})
density_summary


In [None]:
#summary stats for UGB cities
ugb_density_summary = pd.DataFrame({'UGB Mean Density':[ugb_density_mean], 'UGB Median Density': [ugb_density_median], 
                                    "UGB Minimun Density": [min_ugb], "UGB Maximum Density": [max_ugb]})
ugb_density_summary

In [None]:
#independent t-test of population density
stats.ttest_ind(non_ugb_density, ugb_density, equal_var=False)

In [None]:
#comfigure gmaps
gmaps.configure(api_key=api_key)

In [None]:
#change data types to float
locations = population[["Latitude", "Longitude"]].astype(float)
ugb_locations =ugb_df[["Latitude", "Longitude"]].astype(float)
density= population["Persons per km2"].astype(float)
ugb_density = ugb_df["Persons per km2"].astype(float)

In [None]:
#heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=density, 
                                 dissipating=False, max_intensity=5000,
                                 point_radius = 1)

fig.add_layer(heat_layer)

In [None]:
#add ugb markers
ugb_layer = gmaps.symbol_layer(
    ugb_locations, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=4)
    
fig = gmaps.figure()
fig.add_layer(ugb_layer)

In [None]:
#add layers and show figure
fig = gmaps.figure()
fig.add_layer(heat_layer)
fig.add_layer(ugb_layer)

fig

In [None]:
#create boxplots
data = [density, ugb_density]
fig1, ax1 = plt.subplots()
ax1.set_title('Population Density for Top 100 US Cities vs UGB Cities')
ax1.set_ylabel('Persons per km2')
ax1.boxplot(data)
ax1.set_xticklabels(['Top 100 Cities', 'UGB Cities'])
top = 5000
bottom = 0
ax1.set_ylim(bottom, top)
plt.show()
plt.savefig("../Images/PopD_boxplot.png")
plt.show()

In [None]:
#Top 100 cities quartiles and outliers
quartiles = density.quantile([.25,.5,.75])
lowerq = quartiles[0.25]
upperq = quartiles[0.75]
iqr = upperq-lowerq

print(f"The lower quartile of population density is: {lowerq}")
print(f"The upper quartile of population density is: {upperq}")
print(f"The interquartile range of population density is: {iqr}")
print(f"The the median of population density is: {quartiles[0.5]} ")

lower_bound = lowerq - (1.5*iqr)
upper_bound = upperq + (1.5*iqr)
print(f"Values below {lower_bound} could be outliers.")
print(f"Values above {upper_bound} could be outliers.")

In [None]:
#UGB cities quartiles and outliers
quartiles = ugb_density.quantile([.25,.5,.75])
lowerq = quartiles[0.25]
upperq = quartiles[0.75]
iqr = upperq-lowerq

print(f"The lower quartile of UGB population density is: {lowerq}")
print(f"The upper quartile of UGB population density is: {upperq}")
print(f"The interquartile range of UGB population density is: {iqr}")
print(f"The the median of UGB population density is: {quartiles[0.5]} ")

lower_bound = lowerq - (1.5*iqr)
upper_bound = upperq + (1.5*iqr)
print(f"Values below {lower_bound} could be outliers.")
print(f"Values above {upper_bound} could be outliers.")

In [None]:
# End of code for Question 4: population density