In [1]:
import cvxpy as cp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import re
import squarify

from functions import run_proportional_scaling, run_linear_programming, save_data_to_time_series
from helpers import fetch_area_data, get_variables
from tree import build_tree, fetch_branch, fetch_values_given_key, get_subindustries_data, get_constraints, write_into
from config import settings

In [2]:
# Fetch all industry codes in a period
county_code = '13121'
years = list(range(2014,2021+1))
industry_codes = []
for year in years:
    df, url = fetch_area_data(year,4,county_code)
    county = build_tree(df, '10', 71)
    industry_codes+=fetch_values_given_key(county, 'ind', [])
employment_series = pd.DataFrame([], columns=np.unique(industry_codes))


In [3]:
# Run LP on all industries and all the years and save to time_series
for year in years:
    df, url = fetch_area_data(year,4,county_code)
    county = build_tree(df, '10', 71)
    county = run_linear_programming(county, 'emp')
    employment_series = save_data_to_time_series(
        employment_series, county, industry_codes, 'emp', year
    )    

In [4]:
lightcast = pd.read_csv('lightcast.csv', index_col=[0])

In [5]:
inds = list(employment_series.columns[employment_series.columns.str.len()==6])

In [6]:
percentage_differences = pd.DataFrame([], columns=list(range(2014,2021+1)))

In [7]:
years = list(range(2014,2021+1))
for year in years:
    comparisons = []
    differences = []
    index = []
    for ind in inds:
        if int(ind) in lightcast.index:
            if lightcast.at[int(ind),str(year)] == '<10':
                lightcast.at[int(ind),str(year)] = 5
                lightcast_value = 5
            else:
                lightcast_value = int(lightcast.at[int(ind), str(year)])
            edai_value = employment_series.at[year, ind]
            comparisons.append(
                {   
                    'industry_code': ind,
                    'lighthouse': lightcast_value,
                    'edai': edai_value
                }
            )
            if lightcast_value > 0:
                differences.append((lightcast_value - edai_value))
            else:
                differences.append(np.nan)
            index.append(ind)
    AE = []
    for comparison in comparisons:
        AE.append(np.abs(comparison['lighthouse'] - comparison['edai']))
    print(np.nanmean(AE))
    percentage_differences[year] = differences


69.57485728197045
63.81434403471668
70.36406018992399
74.36955937549278
88.72276179446042
71.71237910129315
79.97205981011943
89.25370796232662


In [8]:
percentage_differences.index = index

In [9]:
percentage_differences.to_csv('comparison 6 digits.csv')

In [10]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = years,
        y = lightcast.loc[722511].astype(float),
        name = 'Lightcast'
    )
)
fig.add_trace(
    go.Scatter(
        x = years,
        y = employment_series['722511'].astype(float),
        name = 'EDai'
    )
)
fig.update_layout(
    title='Employment in 722511',
    width=800,
    height=450,
    legend=dict(
                y=0.99,
                x=0.01
                )
)

In [11]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = years,
        y = lightcast.loc[444110].astype(float),
        name = 'Lightcast 444110',
        line={'color': 'blue'}

    )
)
fig.add_trace(
    go.Scatter(
        x = years,
        y = employment_series['444110'].astype(float),
        name = 'EDai 444110',
        line={'color': 'blue', 'dash': 'dash'}
    )
)
fig.add_trace(
    go.Scatter(
        x = years,
        y = lightcast.loc[444120].astype(float),
        name = 'Lightcast 444120',
        line = {'color': 'red'}
    )
)
fig.add_trace(
    go.Scatter(
        x = years,
        y = employment_series['444120'].astype(float),
        name = 'EDai 444120',
        line={'color': 'red', 'dash': 'dash'}
    )
)

fig.update_layout(
    title='Employment in 44411',
    width=800,
    height=450,
    legend=dict(
                y=0.99,
                x=0.01
                )
)

In [24]:
industry = fetch_branch(county, 'ind', '4441')
print(industry['est'])
print(industry['emp'])
if industry.get('emp_ps') is not None:
    print(f"emp_ps {industry['emp_ps']}")
if industry.get('emp_lp') is not None:
    print(f"emp_lp {industry['emp_lp']}")
for child in industry['children']:
    print(f"*** ind {child['ind']} *** ")
    print(child['est'])
    print(child['emp'])
    if child.get('emp_ps') is not None:
        print(f"emp_ps {child['emp_ps']}")
    if child.get('emp_lp') is not None:
        print(f"emp_lp {child['emp_lp']}")

115
2975
*** ind 44411 *** 
18
0
emp_lp 1097.9999999998206
*** ind 44412 *** 
18
0
emp_lp 1098.0000000001792
*** ind 44413 *** 
21
244
*** ind 44419 *** 
58
535


In [12]:
year = 2018
df, url = fetch_area_data(year,4,county_code)
county = build_tree(df, '10', 71)
county = run_linear_programming(county, 'emp')


In [14]:
year = 2017
df, url = fetch_area_data(year,4,county_code)
county = build_tree(df, '10', 71)
county = run_linear_programming(county, 'emp')

In [15]:
industry = fetch_branch(county, 'ind', '45231')
print(industry['est'])
print(industry['emp'])
if industry.get('emp_ps') is not None:
    print(f"emp_ps {industry['emp_ps']}")
if industry.get('emp_lp') is not None:
    print(f"emp_lp {industry['emp_lp']}")
for child in industry['children']:
    print(f"*** ind {child['ind']} *** ")
    print(child['est'])
    print(child['emp'])
    if child.get('emp_ps') is not None:
        print(f"emp_ps {child['emp_ps']}")
    if child.get('emp_lp') is not None:
        print(f"emp_lp {child['emp_lp']}")

113
6363
*** ind 452311 *** 
21
4723
*** ind 452319 *** 
92
1640


In [16]:
ind = 452311
year = 2019
df, url = fetch_area_data(year,4,county_code)
county = build_tree(df, '10', 71)
county = run_linear_programming(county, 'emp')

In [17]:
industry = fetch_branch(county, 'ind', '45231')
print(industry['est'])
print(industry['emp'])
if industry.get('emp_ps') is not None:
    print(f"emp_ps {industry['emp_ps']}")
if industry.get('emp_lp') is not None:
    print(f"emp_lp {industry['emp_lp']}")
for child in industry['children']:
    print(f"*** ind {child['ind']} *** ")
    print(child['est'])
    print(child['emp'])
    if child.get('emp_ps') is not None:
        print(f"emp_ps {child['emp_ps']}")
    if child.get('emp_lp') is not None:
        print(f"emp_lp {child['emp_lp']}")

109
6599
*** ind 452311 *** 
19
4582
*** ind 452319 *** 
90
2017


In [18]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = years,
        y = lightcast.loc[452311].astype(float),
        name = 'Lightcast 452311',
        line={'color': 'blue'}

    )
)
fig.add_trace(
    go.Scatter(
        x = years,
        y = employment_series['452311'].astype(float),
        name = 'EDai 452311',
        line={'color': 'blue', 'dash': 'dash'}
    )
)
fig.add_trace(
    go.Scatter(
        x = years,
        y = lightcast.loc[452319].astype(float),
        name = 'Lightcast 452319',
        line = {'color': 'red'}
    )
)
fig.add_trace(
    go.Scatter(
        x = years,
        y = employment_series['452319'].astype(float),
        name = 'EDai 452319',
        line={'color': 'red', 'dash': 'dash'}
    )
)

fig.update_layout(
    title='Employment comparisons',
    width=800,
    height=450,
    legend=dict(
                y=0.99,
                x=0.01
                )
)

In [19]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = years,
        y = lightcast.loc[622110].astype(float),
        name = 'Lightcast 622110',
        line={'color': 'blue'}

    )
)
fig.add_trace(
    go.Scatter(
        x = years,
        y = employment_series['622110'].astype(float),
        name = 'EDai 622110',
        line={'color': 'blue', 'dash': 'dash'}
    )
)
fig.add_trace(
    go.Scatter(
        x = years,
        y = lightcast.loc[622210].astype(float),
        name = 'Lightcast 622210',
        line = {'color': 'red'}
    )
)
fig.add_trace(
    go.Scatter(
        x = years,
        y = employment_series['622210'].astype(float),
        name = 'EDai 622210',
        line={'color': 'red', 'dash': 'dash'}
    )
)

fig.update_layout(
    title='Employment comparisons',
    width=800,
    height=450,
    legend=dict(
                y=0.99,
                x=0.01
                )
)