## Import Libraries

In [78]:
import json
import requests
import plotly.express as px
import plotly.graph_objects as go
import plotly
import pandas as pd
import numpy as np
from collections import defaultdict
import random
from traitlets.traitlets import default
from itertools import cycle

In [14]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


## Globals

In [2]:
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "Virgin Islands": "VI" }

states_abbrev = [ 'AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
            'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
            'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
            'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
            'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']

layout = go.Layout(
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    margin=go.layout.Margin(
        l=5, #left margin
        r=5, #right margin
        b=40, #bottom margin
        t=5, #top margin
    )
)

## add global dataframe constructed from firebase query 

## Firebase Queries

In [3]:
#get the occupation title, code, and mean annual income
def get_occ_title(title):
  
  firebase_url = 'https://dsci551project1-9a127-default-rtdb.firebaseio.com/.json?orderBy="occ_title"&equalTo="{}"'.format(title)
  response = requests.get(firebase_url)
  resp_json = response.json()

  if resp_json:
    return resp_json
  else:
    raise Exception('Occupational title not found in Firebase database.')


In [4]:
#function to convert the json firebase request json to a DataFrame (for plotting)
def fb_to_df(resp_json):

  return pd.DataFrame(resp_json).transpose()


In [None]:
test_df = fb_to_df(get_occ_title('Firefighters'))

# Maps

**Title:**

Mean Annual Income for a Given Profession by State

**Description:**

Mean annual income across US States. Aggregated (using mean) accross years (2019-2020).  

In [60]:
#takes a dataframe and a year and plots a map of 
def map_annual_income(df):

  occ_title = df['occ_title'][1]

  df['state'] = [us_state_to_abbrev[state] for state in list(df['area_title'])]

  years = list(df['year'].unique())

  #get list of states per year
  states = []
  for year in years:
    states.append(list(df[df['year'] == year]['state'].unique()))

  #get an (fast) intersection of all states across years
  if states:
    states_intersection = set.intersection(*map(set,states))
  else:
    raise Exception('Data missing state-level information.')

  #makes sure states are in "mappable" states (cont us, hawaii, alaska)
  states_intersection = list(set(states_intersection) & set(states_abbrev))
  
  means = []
  for state in states_intersection:
    means.append(df[df['state'] == state]['a_mean'].mean())

  data_dict = {'state':states_intersection, 'a_mean': means}
  plot_df = pd.DataFrame(data_dict)
    
  
  fig = go.Figure(data=go.Choropleth(
    locations=plot_df['state'], # Spatial coordinates
    z = plot_df['a_mean'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'matter',
    #text = df['hover_text'],
    colorbar_title = "USD",
  ))

  fig.update_layout(
    title_text = 'Mean Annual Income for {} by State'.format(occ_title),
    geo_scope='usa', # limite map scope to USA
  )
  fig.update_layout(layout)
    
  return json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)

In [61]:
map_annual_income(fb_to_df(get_occ_title('Prosthodontists')))

'{"data": [{"colorbar": {"title": {"text": "USD"}}, "colorscale": [[0.0, "rgb(253, 237, 176)"], [0.09090909090909091, "rgb(250, 205, 145)"], [0.18181818181818182, "rgb(246, 173, 119)"], [0.2727272727272727, "rgb(240, 142, 98)"], [0.36363636363636365, "rgb(231, 109, 84)"], [0.45454545454545453, "rgb(216, 80, 83)"], [0.5454545454545454, "rgb(195, 56, 90)"], [0.6363636363636364, "rgb(168, 40, 96)"], [0.7272727272727273, "rgb(138, 29, 99)"], [0.8181818181818182, "rgb(107, 24, 93)"], [0.9090909090909091, "rgb(76, 21, 80)"], [1.0, "rgb(47, 15, 61)"]], "locationmode": "USA-states", "locations": ["WI", "FL"], "z": [251835.0, 235790.0], "type": "choropleth"}], "layout": {"template": {"data": {"bar": [{"error_x": {"color": "#2a3f5f"}, "error_y": {"color": "#2a3f5f"}, "marker": {"line": {"color": "#E5ECF6", "width": 0.5}, "pattern": {"fillmode": "overlay", "size": 10, "solidity": 0.2}}, "type": "bar"}], "barpolar": [{"marker": {"line": {"color": "#E5ECF6", "width": 0.5}, "pattern": {"fillmode": "

**Title:**

Mean Hourly Income for a Given Profession by State

**Description:**

Mean hourly income across US States. Aggregated (using mean) accross years (2019-2020). 

In [62]:
#takes occupational title dataframe and returns json encoded figure
def map_hourly_income(df):

  occ_title = df['occ_title'][1]

  df['state'] = [us_state_to_abbrev[state] for state in list(df['area_title'])]

  years = list(df['year'].unique())

  #get list of states per year
  states = []
  for year in years:
    states.append(list(df[df['year'] == year]['state'].unique()))

  #get an (fast) intersection of all states across years
  if states:
    states_intersection = set.intersection(*map(set,states))
  else:
    raise Exception('Data missing state-level information.')

  #makes sure states are in "mappable" states (cont us, hawaii, alaska)
  states_intersection = list(set(states_intersection) & set(states_abbrev))
  
  means = []
  for state in states_intersection:
    means.append(df[df['state'] == state]['h_mean'].mean())

  data_dict = {'state':states_intersection, 'h_mean': means}
  plot_df = pd.DataFrame(data_dict)
    
  
  fig = go.Figure(data=go.Choropleth(
    locations=plot_df['state'], # Spatial coordinates
    z = plot_df['h_mean'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'matter',
    #text = df['hover_text'],
    colorbar_title = "USD",
  ))

  fig.update_layout(
    title_text = 'Mean Hourly Income for {} by State'.format(occ_title),
    geo_scope='usa', # limite map scope to USA
  )
  fig.update_layout(layout)
  
  return json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)

In [63]:
map_hourly_income(fb_to_df(get_occ_title('Prosthodontists')))

'{"data": [{"colorbar": {"title": {"text": "USD"}}, "colorscale": [[0.0, "rgb(253, 237, 176)"], [0.09090909090909091, "rgb(250, 205, 145)"], [0.18181818181818182, "rgb(246, 173, 119)"], [0.2727272727272727, "rgb(240, 142, 98)"], [0.36363636363636365, "rgb(231, 109, 84)"], [0.45454545454545453, "rgb(216, 80, 83)"], [0.5454545454545454, "rgb(195, 56, 90)"], [0.6363636363636364, "rgb(168, 40, 96)"], [0.7272727272727273, "rgb(138, 29, 99)"], [0.8181818181818182, "rgb(107, 24, 93)"], [0.9090909090909091, "rgb(76, 21, 80)"], [1.0, "rgb(47, 15, 61)"]], "locationmode": "USA-states", "locations": ["WI", "FL"], "z": [121.07, 113.36], "type": "choropleth"}], "layout": {"template": {"data": {"bar": [{"error_x": {"color": "#2a3f5f"}, "error_y": {"color": "#2a3f5f"}, "marker": {"line": {"color": "#E5ECF6", "width": 0.5}, "pattern": {"fillmode": "overlay", "size": 10, "solidity": 0.2}}, "type": "bar"}], "barpolar": [{"marker": {"line": {"color": "#E5ECF6", "width": 0.5}, "pattern": {"fillmode": "over

**Title:**

Total Employment for Given Profession Across US States

**Description:**

Total number of people employed in a given profession (not adjusted for population) across US States. Aggregated (by mean) across years (2019-2020).

In [64]:
#takes occupational title and returns json dump of figure
def map_total_employment(df):

  occ_title = df['occ_title'][1]

  df['state'] = [us_state_to_abbrev[state] for state in list(df['area_title'])]

  years = list(df['year'].unique())

  #get list of states per year
  states = []
  for year in years:
    states.append(list(df[df['year'] == year]['state'].unique()))

  #get an (fast) intersection of all states across years
  if states:
    states_intersection = set.intersection(*map(set,states))
  else:
    raise Exception('Data missing state-level information.')

  #makes sure states are in "mappable" states (cont us, hawaii, alaska)
  states_intersection = list(set(states_intersection) & set(states_abbrev))
  
  means = []
  for state in states_intersection:
    means.append(df[df['state'] == state]['tot_emp'].mean())

  data_dict = {'state':states_intersection, 'tot_emp': means}
  plot_df = pd.DataFrame(data_dict)
    
  
  fig = go.Figure(data=go.Choropleth(
    locations=plot_df['state'], # Spatial coordinates
    z = plot_df['tot_emp'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'matter',
    #text = df['hover_text'],
    colorbar_title = "Number of People Employed",
  ))

  fig.update_layout(
    title_text = 'Total Employment for {} by State'.format(occ_title),
    geo_scope='usa', # limite map scope to USA
  )
  fig.update_layout(layout)
  
  
  return json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)

In [66]:
map_total_employment(fb_to_df(get_occ_title('Firefighters')))

'{"data": [{"colorbar": {"title": {"text": "Number of People Employed"}}, "colorscale": [[0.0, "rgb(253, 237, 176)"], [0.09090909090909091, "rgb(250, 205, 145)"], [0.18181818181818182, "rgb(246, 173, 119)"], [0.2727272727272727, "rgb(240, 142, 98)"], [0.36363636363636365, "rgb(231, 109, 84)"], [0.45454545454545453, "rgb(216, 80, 83)"], [0.5454545454545454, "rgb(195, 56, 90)"], [0.6363636363636364, "rgb(168, 40, 96)"], [0.7272727272727273, "rgb(138, 29, 99)"], [0.8181818181818182, "rgb(107, 24, 93)"], [0.9090909090909091, "rgb(76, 21, 80)"], [1.0, "rgb(47, 15, 61)"]], "locationmode": "USA-states", "locations": ["UT", "MA", "OK", "PA", "OR", "WV", "NH", "MN", "AR", "WI", "NM", "HI", "KY", "GA", "IN", "FL", "NE", "MS", "NV", "ND", "VA", "KS", "ID", "MT", "RI", "NC", "LA", "SD", "AZ", "IL", "DE", "ME", "NY", "TX", "CA", "MI", "MD", "NJ", "TN", "OH", "CO", "WY", "IA", "AK", "WA", "CT", "MO", "AL", "SC"], "z": [2110.0, 11115.0, 3540.0, 4410.0, 3980.0, 840.0, 2480.0, 6935.0, 2295.0, 8975.0, 2

**Title:**

Employment for a Given Profession Per 1000 Jobs Across States

**Description:**

Number of people employed in a given profession per 1000 employed individuals (per capita). Aggregated (by mean) across years (2019-2020).

In [67]:
#takes occupational title and returns json dump of figure
def map_employment_per_1000(df):

  print(df.columns)
  occ_title = df['occ_title'][1]

  df['state'] = [us_state_to_abbrev[state] for state in list(df['area_title'])]

  years = list(df['year'].unique())

  #get list of states per year
  states = []
  for year in years:
    states.append(list(df[df['year'] == year]['state'].unique()))

  #get an (fast) intersection of all states across years
  if states:
    states_intersection = set.intersection(*map(set,states))
  else:
    raise Exception('Data missing state-level information.')

  #makes sure states are in "mappable" states (cont us, hawaii, alaska)
  states_intersection = list(set(states_intersection) & set(states_abbrev))
  
  means = []
  for state in states_intersection:
    means.append(df[df['state'] == state]['jobs_1000'].mean())

  data_dict = {'state':states_intersection, 'jobs_1000': means}
  plot_df = pd.DataFrame(data_dict)
    
  
  fig = go.Figure(data=go.Choropleth(
    locations=plot_df['state'], # Spatial coordinates
    z = plot_df['jobs_1000'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'matter',
    #text = df['hover_text'],
    colorbar_title = "Emp per 1k Jobs"
  ))

  fig.update_layout(
    title_text = 'Employment per 1000 Jobs for {} by State'.format(occ_title),
    geo_scope='usa', # limite map scope to USA
  )
  fig.update_layout(layout)  
  
  return json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)

In [69]:
map_employment_per_1000(fb_to_df(get_occ_title('Firefighters')))

Index(['Unnamed: 0', 'a_mean', 'a_median', 'a_pct10', 'a_pct25', 'a_pct75',
       'a_pct90', 'area_title', 'area_type', 'educational requirement',
       'emp_prse', 'h_mean', 'h_median', 'h_pct10', 'h_pct25', 'h_pct75',
       'h_pct90', 'jobs_1000', 'loc_quotient', 'mean_prse', 'naics',
       'naics_title', 'o_group', 'occ_code', 'occ_title', 'own_code',
       'tot_emp', 'year'],
      dtype='object')


'{"data": [{"colorbar": {"title": {"text": "Emp per 1k Jobs"}}, "colorscale": [[0.0, "rgb(253, 237, 176)"], [0.09090909090909091, "rgb(250, 205, 145)"], [0.18181818181818182, "rgb(246, 173, 119)"], [0.2727272727272727, "rgb(240, 142, 98)"], [0.36363636363636365, "rgb(231, 109, 84)"], [0.45454545454545453, "rgb(216, 80, 83)"], [0.5454545454545454, "rgb(195, 56, 90)"], [0.6363636363636364, "rgb(168, 40, 96)"], [0.7272727272727273, "rgb(138, 29, 99)"], [0.8181818181818182, "rgb(107, 24, 93)"], [0.9090909090909091, "rgb(76, 21, 80)"], [1.0, "rgb(47, 15, 61)"]], "locationmode": "USA-states", "locations": ["UT", "MA", "OK", "PA", "OR", "WV", "NH", "MN", "AR", "WI", "NM", "HI", "KY", "GA", "IN", "FL", "NE", "MS", "NV", "ND", "VA", "KS", "ID", "MT", "RI", "NC", "LA", "SD", "AZ", "IL", "DE", "ME", "NY", "TX", "CA", "MI", "MD", "NJ", "TN", "OH", "CO", "WY", "IA", "AK", "WA", "CT", "MO", "AL", "SC"], "z": [1.408, 3.1925, 2.226, 0.774, 2.1505, 1.2505000000000002, 3.8765, 2.4779999999999998, 1.913,

# Overview Information

In [32]:
"""
Function that takes an occ title and returns aggregated (mean) information across states & years of the form:
{'occ_title' :  title,
  'a_mean': value,
  'h_mean' : value,
  'tot_emp': value,
  'jobs_1000' : value,
  'educational requirement' : value}
"""

def get_occ_info(df):

  occ_title = df['occ_title'][1]
  
  info_dict = {}

  info_dict['occ_title'] = occ_title
  info_dict['a_mean'] = round(float(df['a_mean'].mean()),2)
  info_dict['h_mean'] = round(float(df['h_mean'].mean()),2)
  info_dict['tot_emp'] = round(float(df['tot_emp'].mean()),2)
  info_dict['jobs_1000'] = round(float(df['jobs_1000'].mean()),4)
  info_dict['educational requirement'] = str(df['educational requirement'].value_counts().index[0]) #this takes the mode if there are multiple

  return info_dict



In [None]:
get_occ_info(fb_to_df(get_occ_title('Firefighters')))

{'a_mean': 50383.37,
 'educational requirement': 'Postsecondary nondegree award',
 'h_mean': 24.22,
 'jobs_1000': 2.2826,
 'occ_title': 'Firefighters',
 'tot_emp': 6353.2}

# Change Plots

**Title:**

Annual Change in Income

**Description:**

Line plot showing the change income for a given profession agreggated across states over the years.

In [34]:
def plot_annual_income_change(df):

  occ_title = df['occ_title'][1]

  years = list(df['year'].unique())
  a_means = []

  for year in years:
    a_means.append(df[df['year'] == year]['a_mean'].mean())

  years = sorted([str(year) for year in years])

  fig = go.Figure()
  fig.add_trace(go.Scatter(x=years, y=a_means,
                    mode='lines+markers',
                    name='lines+markers',
                    marker_color='orange'))
  

  fig.update_layout(
    title='Annual Mean Income Changes for {}'.format(occ_title),
    xaxis_title="Year",
    xaxis_tickfont_size=14,
    yaxis=dict(
      title='Mean Annual Income',
      titlefont_size=16,
      tickfont_size=14,
    )
  )
  fig.update_layout(layout)
    

In [None]:
plot_annual_income_change(fb_to_df(get_occ_title('Firefighters')))

In [35]:
def plot_tot_emp_change_bar(df):

  occ_title = df['occ_title'][1]
  years = list(df['year'].unique())

  tot_emp = []

  for year in years:
    tot_emp.append(df[df['year'] == year]['tot_emp'].sum())

  years = sorted([str(year) for year in years])

  fig = go.Figure()
  fig.add_trace(go.Bar(x=years, y=tot_emp, marker_color='orange'))
  

  fig.update_layout(
    title='Yearly Total Number of {} Employed'.format(occ_title),
    xaxis_title="Year",
    xaxis_tickfont_size=14,
    yaxis=dict(
      title='Number of People Employed',
      titlefont_size=16,
      tickfont_size=14,
    )
  )
  fig.update_layout(layout)
    
  

  return json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)


**Title:**

Annual Change in Total Employment

**Description:**

Bar plot showing the change total employment for a given profession agreggated across states over the years.

In [None]:
plot_tot_emp_change_bar(fb_to_df(get_occ_title('Firefighters')))

'{"data": [{"marker": {"color": "orange"}, "x": ["2019", "2020"], "y": [324450.0, 310870.0], "type": "bar"}], "layout": {"template": {"data": {"bar": [{"error_x": {"color": "#2a3f5f"}, "error_y": {"color": "#2a3f5f"}, "marker": {"line": {"color": "#E5ECF6", "width": 0.5}, "pattern": {"fillmode": "overlay", "size": 10, "solidity": 0.2}}, "type": "bar"}], "barpolar": [{"marker": {"line": {"color": "#E5ECF6", "width": 0.5}, "pattern": {"fillmode": "overlay", "size": 10, "solidity": 0.2}}, "type": "barpolar"}], "carpet": [{"aaxis": {"endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f"}, "baxis": {"endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f"}, "type": "carpet"}], "choropleth": [{"colorbar": {"outlinewidth": 0, "ticks": ""}, "type": "choropleth"}], "contour": [{"colorbar": {"outlinewidth": 0, "ticks": ""}, "colorscale": [[0.0, "#0d0887"], [

# Income Percentile Change Bar Charts

**Title:**

Annual Income Percentiles

**Description:**

Annual income (aggregated across states) for a given profession shown by income percentiles (10th, 25th, 75th, 90th) across years. Used to show discrepancy between highest and lowest incomes for a given profession. 

In [37]:
#takes json data from a specific prefession and plots the hourly and annual income percentiles
def plot_annual_percentiles(df):

  occ_title = df['occ_title'][1]

  years = list(df['year'].unique())
  percentiles = ['a_pct10', 'a_pct25', 'a_pct75', 'a_pct90']
  pct_readable = ['10th', '25th', '75th', '90th']
  means = []
  palette = cycle(px.colors.qualitative.Plotly)

  year_dfs = []
  for year in years:
    year_dfs.append(df[df['year'] == year])


  for year_df in year_dfs:
    means.append(list(round(year_df[percentiles].mean(),4)))

  fig = go.Figure()

  for i, year in enumerate(years):
    fig.add_trace(go.Bar(x=pct_readable,
                  y=means[i],
                  name=year,
                  marker_color=next(palette)
                  ))


  fig.update_layout(
    title='Annual Income Percentile Changes for {}'.format(occ_title),
    xaxis_title="Income Percentile",
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='USD',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=0,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
  )
  fig.update_layout(layout)

  return json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)

In [None]:
plot_annual_percentiles(fb_to_df(get_occ_title('Firefighters')))

'{"data": [{"marker": {"color": "#636EFA"}, "name": "2019", "x": ["10th", "25th", "75th", "90th"], "y": [27830.3922, 37202.7451, 61977.6471, 73265.8824], "type": "bar"}, {"marker": {"color": "#EF553B"}, "name": "2020", "x": ["10th", "25th", "75th", "90th"], "y": [29618.4, 38511.8, 63523.2, 75320.8], "type": "bar"}], "layout": {"template": {"data": {"bar": [{"error_x": {"color": "#2a3f5f"}, "error_y": {"color": "#2a3f5f"}, "marker": {"line": {"color": "#E5ECF6", "width": 0.5}, "pattern": {"fillmode": "overlay", "size": 10, "solidity": 0.2}}, "type": "bar"}], "barpolar": [{"marker": {"line": {"color": "#E5ECF6", "width": 0.5}, "pattern": {"fillmode": "overlay", "size": 10, "solidity": 0.2}}, "type": "barpolar"}], "carpet": [{"aaxis": {"endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f"}, "baxis": {"endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2

**Title:**

Income Distribution

**Description:**

Distribution of for statewise income for a given profession binned by intervals income intervals of max-min/10. Aggregated across years (mean).

In [55]:
def plot_income_distribution(df):

  occ_title = df['occ_title'][1]

  min = df['a_mean'].min()
  max = df['a_mean'].max()
  interval = (max - min)/10

  ranges = np.arange(min, max + interval, interval)

  counts = list(df.groupby(pd.cut(df.a_mean, ranges)).count()['a_mean'])

  fig = go.Figure()
  fig.add_trace(go.Bar(x=ranges,
                  y=counts,
                  marker=dict(
                      color=ranges,
                      colorbar=dict(
                          title="Colorbar"
        ),
        colorscale="Matter"
      )
    )
  )
  
  fig.update_layout(
  title='Income Distribution for {}'.format(occ_title),
  xaxis_title="Income Bracket",
  xaxis_tickfont_size=14,
  yaxis=dict(
      title='Values (States) Per Income Bracket',
      titlefont_size=16,
      tickfont_size=14,
  ),

  legend=dict(
      x=0,
      y=1.0,
      bgcolor='rgba(255, 255, 255, 0)',
      bordercolor='rgba(255, 255, 255, 0)'
  ),
  barmode='group',
  bargap=0.15, # gap between bars of adjacent location coordinates.
  bargroupgap=0.1 # gap between bars of the same location coordinate.
  )
  fig.update_layout(layout)

  return json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)

In [56]:
plot_income_distribution(fb_to_df(get_occ_title('Firefighters')))

'{"data": [{"marker": {"color": [25240.0, 31404.0, 37568.0, 43732.0, 49896.0, 56060.0, 62224.0, 68388.0, 74552.0, 80716.0, 86880.0], "colorbar": {"title": {"text": "Colorbar"}}, "colorscale": [[0.0, "rgb(253, 237, 176)"], [0.09090909090909091, "rgb(250, 205, 145)"], [0.18181818181818182, "rgb(246, 173, 119)"], [0.2727272727272727, "rgb(240, 142, 98)"], [0.36363636363636365, "rgb(231, 109, 84)"], [0.45454545454545453, "rgb(216, 80, 83)"], [0.5454545454545454, "rgb(195, 56, 90)"], [0.6363636363636364, "rgb(168, 40, 96)"], [0.7272727272727273, "rgb(138, 29, 99)"], [0.8181818181818182, "rgb(107, 24, 93)"], [0.9090909090909091, "rgb(76, 21, 80)"], [1.0, "rgb(47, 15, 61)"]]}, "x": [25240.0, 31404.0, 37568.0, 43732.0, 49896.0, 56060.0, 62224.0, 68388.0, 74552.0, 80716.0, 86880.0], "y": [3, 12, 19, 21, 15, 8, 11, 3, 4, 4], "type": "bar"}], "layout": {"template": {"data": {"bar": [{"error_x": {"color": "#2a3f5f"}, "error_y": {"color": "#2a3f5f"}, "marker": {"line": {"color": "#E5ECF6", "width":

# Loader Function

In [None]:
def load_plotly_figure(fig, html_title):
  graphJSON = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)
  return render_template(html_title, graphJSON=graphJSON)

# Test Script

In [103]:
test_df = pd.read_csv('/content/drive/My Drive/DSCI551/Final_Project/Data/Clean Data/oews_clean_v1.csv')
occ_titles = list(test_df['occ_title'].unique())
ok = []
problems = {}
firebase_issues = []

plotting_functions = [map_annual_income,
  map_hourly_income,
  map_total_employment,
  map_employment_per_1000,
  #get_occ_info,
  plot_annual_income_change,
  plot_tot_emp_change_bar,
  plot_income_distribution]

for i, title in enumerate(occ_titles):
  try:
    df = fb_to_df(get_occ_title(title))
  except:
    firebase_issues.append('title')

  for func in plotting_functions:
    try:
      thing = func(df)
    except Exception as e:
      problems[title] = e
      
      

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
      dtype='object')
Index(['Unnamed: 0', 'area_title', 'area_type', 'educational requirement',
       'emp_prse', 'jobs_1000', 'loc_quotient', 'naics', 'naics_title',
       'o_group', 'occ_code', 'occ_title', 'own_code', 'tot_emp', 'year',
       'a_mean', 'a_median', 'a_pct10', 'a_pct25', 'a_pct75', 'a_pct90',
       'h_mean', 'h_median', 'h_pct10', 'h_pct25', 'h_pct75', 'h_pct90',
       'mean_prse', 'state'],
      dtype='object')
Index(['Unnamed: 0', 'a_mean', 'a_median', 'a_pct10', 'a_pct25', 'a_pct75',
       'a_pct90', 'area_title', 'area_type', 'educational requirement',
       'emp_prse', 'h_mean', 'h_median', 'h_pct10', 'h_pct25', 'h_pct75',
       'h_pct90', 'jobs_1000', 'loc_quotient', 'mean_prse', 'naics',
       'naics_title', 'o_group', 'occ_code', 'occ_title', 'own_code',
       'tot_emp', 'year', 'state'],
      dtype='object')
Index(['Unnamed: 0', 'a_mean', 'a_median', 'a_pct10', 'a_pct25', 'a_pct75',

In [107]:
problems

{'Actors': KeyError('a_mean'),
 'Agricultural Sciences Teachers, Postsecondary': KeyError('h_mean'),
 'Airline Pilots, Copilots, and Flight Engineers': KeyError('h_mean'),
 'Anthropology and Archeology Teachers, Postsecondary': KeyError('h_mean'),
 'Architecture Teachers, Postsecondary': KeyError('h_mean'),
 'Area, Ethnic, and Cultural Studies Teachers, Postsecondary': KeyError('h_mean'),
 'Art, Drama, and Music Teachers, Postsecondary': KeyError('h_mean'),
 'Athletes and Sports Competitors': KeyError('h_mean'),
 'Athletic Trainers': KeyError('h_mean'),
 'Atmospheric, Earth, Marine, and Space Sciences Teachers, Postsecondary': KeyError('h_mean'),
 'Biological Science Teachers, Postsecondary': KeyError('h_mean'),
 'Business Teachers, Postsecondary': KeyError('h_mean'),
 'Career/Technical Education Teachers, Middle School': KeyError('h_mean'),
 'Career/Technical Education Teachers, Secondary School': KeyError('h_mean'),
 'Chemistry Teachers, Postsecondary': KeyError('h_mean'),
 'Coaches 