In [None]:
#setup dependencies
import pandas as pd
import numpy as np 
import scipy.stats as stats
import matplotlib.pyplot as plt
from scipy.stats import linregress
import gmaps
import requests
import json
from config import gkey

In [None]:
#import csvs
UGB_Popcsv = "UGB_pop.csv"
UGB_Growcsv = "UGB_PopGrowth.csv"
top100_Popcsv = "Top100_CitiesPop.csv"
top100_Growcsv = "Pop_Growth.csv"

In [None]:
#set up dataframes and check output
#UGB pop and growth
#top 100 pop and growth
UGBpop_df = pd.read_csv(UGB_Popcsv)
UGBgro_df = pd.read_csv(UGB_Growcsv)
Toppop_df = pd.read_csv(top100_Popcsv)
Topgro_df = pd.read_csv(top100_Growcsv)

In [None]:
#merge the top100 Pop and growth, output
top100_df = pd.merge(Toppop_df, Topgro_df, how = "left", on = ["ID", "ID"])
#top100_df.head()

In [None]:
#merge UGB pop and growth, output
UGB_df = pd.merge(UGBpop_df, UGBgro_df, how = "left", on = ["ID", "ID"])
#UGB_df

In [None]:
#reformat the df's, (remove Rank_x, Rank_y, City_y, State_y,
#Estimated Population 2017, Estimated Population 2018)
UGB_df.drop(["Rank_x", "Rank_y", "City_y", "State_y","Estimated Population 2017", "Estimated Population 2018"], axis = 1, inplace = True)
UGB_df.rename(columns = {"Rank_x": "Rank", "City_x": "City", "State_x": "State"}, inplace = True)
UGB_df

In [None]:
#repeat for the top100 cities, rename City_x and State_x
top100_df.drop(["Rank_x", "Rank_y", "City_y", "State_y","Estimated Population 2017", "Estimated Population 2018"], axis = 1, inplace = True)
top100_df.rename(columns = {"Rank_x": "Rank", "City_x": "City", "State_x": "State"}, inplace = True)
top100_df

In [None]:
#create statistics
UGB_summ_df = {}
UGB_summ_df = pd.DataFrame()

UGB_summ_df["Avg Population"] = [UGB_df["2018_Estimate"].mean()]
UGB_summ_df["Median Population"] = [UGB_df["2018_Estimate"].median()]
UGB_summ_df["Population Variance"] = [UGB_df["2018_Estimate"].var()]
UGB_summ_df["Population Standard Deviation"] = [UGB_df["2018_Estimate"].std()]
UGB_summ_df["Population SEM"] = [UGB_df["2018_Estimate"].sem()]

UGB_summ_df["Avg Pop Growth"] = [UGB_df["Percent Difference"].mean()]
UGB_summ_df["Median Pop Growth"] = [UGB_df["Percent Difference"].median()]
UGB_summ_df["Pop Growth Variance"] = [UGB_df["Percent Difference"].var()]
UGB_summ_df["Pop Gorwth Standard Deviation"] = [UGB_df["Percent Difference"].std()]
UGB_summ_df["Pop Growth SEM"] = [UGB_df["Percent Difference"].sem()]

UGB_summ_df = pd.DataFrame(UGB_summ_df)
UGB_summ_df

In [None]:
top100_summ_df = {}
top100_summ_df = pd.DataFrame()

top100_summ_df["Avg Population"] = [top100_df["2018_Estimate"].mean()]
top100_summ_df["Median Population"] = [top100_df["2018_Estimate"].median()]
top100_summ_df["Population Variance"] = [top100_df["2018_Estimate"].var()]
top100_summ_df["Population Standard Deviation"] = [top100_df["2018_Estimate"].std()]
top100_summ_df["Population SEM"] = [top100_df["2018_Estimate"].sem()]

top100_summ_df["Avg Pop Growth"] = [top100_df["Percent Difference"].mean()]
top100_summ_df["Median Pop Growth"] = [top100_df["Percent Difference"].median()]
top100_summ_df["Pop Growth Variance"] = [top100_df["Percent Difference"].var()]
top100_summ_df["Pop Gorwth Standard Deviation"] = [top100_df["Percent Difference"].std()]
top100_summ_df["Pop Growth SEM"] = [top100_df["Percent Difference"].sem()]

top100_sum_df = pd.DataFrame(top100_summ_df)
top100_sum_df

In [None]:
#barchart of UGB cities population
plt.bar(UGB_df["City"], UGB_df["2018_Estimate"], width = 0.5, color = 'lightskyblue')
plt.title("Populations of UGB Cities")
plt.xlabel("City")
plt.ylabel("Population")
plt.xlim(-1,10)
plt.ylim(0,1050000)
plt.xticks(rotation = 45)

In [None]:
#bar chart of top100, UGB and total US
#compare to total US population growth = .62%, https://www.multpl.com/us-population-growth-rate/table/by-year
plt.bar("Top 100 Cities", top100_summ_df["Avg Pop Growth"], color = 'lightskyblue')
plt.bar("UGB Cities", UGB_summ_df["Avg Pop Growth"], color = 'purple' )
plt.bar("Entire US", [.62], color = 'crimson')
plt.title("Average Population Growth by Subset")
plt.xlabel("Subset")
plt.ylabel("Average Population Growth (%)")
plt.xlim(-1,3)
plt.ylim(0,1)
plt.xticks(rotation = 45)

In [None]:
UGB_growth = ((UGB_summ_df["Avg Pop Growth"]/100) * top100_summ_df["Avg Population"])
top100_growth = ((top100_summ_df["Avg Pop Growth"]/100) * top100_summ_df["Avg Population"])
US_growth = (.0062 * top100_summ_df["Avg Population"])

Pop_growth_df = {}
Pop_growth_df = pd.DataFrame()
Pop_growth_df["UGB Pop Increase"] = UGB_growth
Pop_growth_df["Top 100 City Pop Increase"] = top100_growth
Pop_growth_df["USA Pop Increase"] = US_growth
Pop_growth_df = pd.DataFrame(Pop_growth_df)
Pop_growth_df

In [None]:
UGB_sample = (UGB_df["Percent Difference"])
top100_sample = (top100_df["Percent Difference"])
stats.ttest_ind(UGB_sample, top100_sample, equal_var=False)

In [None]:
#creat heatmap of UGB cities population growth and decline
target_cities = ["San Jose, California", "Seattle, Washington", "Portland, Oregon", "Miami, Florida", "Virginia Beach, Virginia",
                "Minneapolis, Minnesota", "Honolulu, Hawaii", "Lexington, Kentucky", "St. Paul, Minnesota", "Boulder, Colorado"]

for city in target_cities:
    params = {"address": city, "key": gkey}
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"
    response = requests.get(base_url, params = params)

    cities_geo = response.json()

    lat = cities_geo["results"][0]["geometry"]["location"]["lat"]
    lng = cities_geo["results"][0]["geometry"]["location"]["lng"]

    print(f"{city}: {lat}, {lng}")


In [None]:
#create data frame of the outputs
cities_df = {}
cities_df = pd.DataFrame()

cities_df["City"] = ["San Jose, California", "Seattle, Washington", "Portland, Oregon", "Miami, Florida", "Virginia Beach, Virginia",
                "Minneapolis, Minnesota", "Honolulu, Hawaii", "Lexington, Kentucky", "St. Paul, Minnesota", "Boulder, Colorado"]
cities_df["Lat"] = [37.3382082, 47.6062095, 45.5051064,
                           25.7616798, 36.8529263, 44.977753,
                           21.3069444, 38.0405837, 44.9537029, 40.0149856]
cities_df["Lng"] = ["-121.8863286", "-122.3320708", "-122.6750261", "-80.1917902", "-75.97798499999999", "-93.2650108",
                    "-157.8583333", "-84.5037164", "-93.0899578", "-105.2705456"]

cities_df["Population Growth"] = UGB_df["Percent Difference"]
cities_df = pd.DataFrame(cities_df)
cities_df

In [None]:
#create heatmap 
locations = cities_df[["Lat", "Lng"]].astype(float)
growth = cities_df["Population Growth"].astype(float)

fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights = (growth + .6), 
                                 dissipating = False, max_intensity = 2.7,
                                 point_radius = 1)

fig.add_layer(heat_layer)

fig

In [None]:
#positive cities; Seattle, Portland, Miami, Virginia Beach, Lexington, Minneapolis, St. Paul 
#negative cities; San Jose, Honolulu, Boulder