### Choropleth Map Tool

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import os
from us import states
import plotly.express as px
from pathlib import Path
from panel.interact import interact
import panel as pn

api_key = 'ccc41077b9d4b0a0e2e2a2962b4d8539d61889cf'
# api_key = os.getenv("CENSUS_API_KEY")
# Census & gmaps API Keys

c = Census(api_key, year=2018)


In [None]:
map_box_api = os.getenv("MAPBOX_API_KEY")
px.set_mapbox_access_token(map_box_api)

In [None]:
census_earn_data = c.acs1.get(("NAME", "B24022_005E",
                               "B24022_006E",
                               "B24022_008E",
                               "B24022_041E",
                               "B24022_042E",
                               "B24022_044E"
), {'for': 'county:061,005,047,081,085', 'in': 'state:36'})


In [None]:
census_earn_pd_2018 = pd.DataFrame(census_earn_data)
pd.options.display.float_format = '${:,.2f}'.format
census_earn_pd_2018 = census_earn_pd_2018.rename(columns={"B24022_005E":"Male Management Earnings",
                               "B24022_006E":"Male Financial operations Earnings",
                               "B24022_008E":"Male Computer occupations Earnings",
                               "B24022_041E":"Female Management Earnings",
                               "B24022_042E":"Female Financial operations Earnings",
                               "B24022_044E":"Female Computer occupations Earnings"     
                                             
    })


In [None]:
census_earn_pd_2018["FIPS"] = census_earn_pd_2018.state.astype(str) + census_earn_pd_2018.county.astype(str)
census_earn_pd_2018.head()

In [None]:
census_earn_pd_2018["Management Earnings"] = census_earn_pd_2018['Male Management Earnings'] + census_earn_pd_2018['Female Management Earnings']
census_earn_pd_2018["Computer Occupations Earnings"] = census_earn_pd_2018['Male Computer occupations Earnings'] + census_earn_pd_2018['Female Computer occupations Earnings']
census_earn_pd_2018["Financial Operations Earnings"] = census_earn_pd_2018['Male Financial operations Earnings'] + census_earn_pd_2018['Female Financial operations Earnings']



In [None]:
census_earn_pd_2018.set_index("NAME")

In [None]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

In [None]:
fig = px.choropleth_mapbox(census_earn_pd_2018, 
                           geojson=counties, 
                           locations="FIPS",
                           color="NAME",
                           zoom=10,
                           opacity=.2,
                           center = {"lat":40.7128, "lon":-74.0060},
                            hover_data = ["Male Management Earnings",
                                          "Male Financial operations Earnings",
                                          "Female Management Earnings",
                                          "Female Financial operations Earnings"]
                           
                          )

In [None]:
# to format
def mpl_plot(avg):
    
    # to format the plot
    fig = px.choropleth_mapbox(census_earn_pd_2018, 
                           geojson=counties, 
                           locations="FIPS",
                           color="NAME",
                           zoom=10,
                           opacity=.2,
                           center = {"lat":40.7128, "lon":-74.0060},
                           hover_data=["Female Management Earnings"]
                           
                          )
    return fig


# to create the y variable for the chart
def find_outliers(variable='Male Management Earnings', window=30, sigma=10, view_fn=mpl_plot):
    
    
    avg = census_earn_pd_2018[variable].rolling(window=window).mean()
    
    return view_fn(avg)

In [None]:
find_outliers(variable='Male Management Earnings', window=20, sigma=10)

In [None]:
kw = dict(variable=sorted(list(census_earn_pd_2018.columns)), sigma=(1, 20))
i = pn.interact(find_outliers, **kw)

In [None]:
p = pn.Row(i[1][0], pn.Column(i[0][0], i[0][1]))
p

### Parallel Categories 

In [None]:
census_csv = pd.read_csv(Path("resources\census_data.csv"),infer_datetime_format=True, parse_dates= True)
census_csv.head()

In [None]:
census_csv = census_csv.drop(columns=["Employed_age16+_civilian","Employed_male",
                                      "Financial_managers_male","Male_business_operations",
                                      "Financial_specialists_male","Accountants_&_auditors_male",
                                      "Computer_male","Male_data_scientits","Software_devops_male",
                                      "Database_&_system_male","Employed_female",
                                      "Financial_managers_female","Female_business_operations",
                                      "Financial_specialists_female","Accountants_&_auditors_female",
                                      "Computer_female","Female_data_scientits","Software_devops_female",
                                      "Database_&_system_female","Information_age16+_total",
                                      "Finance_and_insurance","Real_estate","Unemployment Rate",
                                      "Male Business Operations","Male Data Scientits","Female Business Operations","Female Data Scientits"])
census_csv.head()

In [None]:
pop_bins = [300000,1000000,1700000,2700000]
hincome_bins = [25000,60000,70000,90000]
povert_bins = [0,200000,300000,600000]
pop_group_names = ["Small POP <100k","Medium POP", "Large POP >1.7mil"]
hincome_group_names = ["Low Income <60K","Middle Income","Large Income >70k"]
povert_group_names = ["Low <200k","Medium","High >300k"]
census_csv["Population Size"] = pd.cut(census_csv["Population"], pop_bins, labels=pop_group_names)
census_csv["Income Range"] = pd.cut(census_csv["Household Income"], hincome_bins, labels=hincome_group_names)
census_csv["Poverty Level"] = pd.cut(census_csv["Poverty Count"], povert_bins, labels=povert_group_names)

In [None]:
census_csv["Name"] = census_csv["Name"].str.replace(", New York","")
census_csv.head()

In [None]:
parallel_categories = px.parallel_categories(census_csv, dimensions=["Name","Population Size","Income Range","Poverty Level"], 
                                             color="Year", 
                                             color_continuous_scale=px.colors.sequential.Blues, 
                                             labels={"Name": "Borough"
                                                    }, width=970)
parallel_categories.show()