In [181]:
import cvxpy as cp
import matplotlib.pyplot as plt
import numexpr as ne
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import re
import squarify

from functions import run_linear_programming, set_optimization_problem, save_data_to_time_series
from helpers import fetch_area_data, fetch_industry_data, get_node_variables, get_optimization_variables
from tree import build_county_tree, get_objective, get_constraints, build_state_tree, fetch_branch, \
    fetch_values_given_key, write_into
from config import settings

In [2]:
# Fetch state and county codes
# Georgia
state_code = '13'
year = 2018
df, url = fetch_industry_data(year, 4, '102')
codes = list(np.unique(df[df['area_fips'].str.startswith(state_code)]['area_fips']))

In [7]:
df, _ = fetch_area_data(year, 4, codes[0])
state = build_state_tree(df, '10', 51)
counties = {}
for code in codes[1:]:
    df, _ = fetch_area_data(year, 4, code)
    county = build_county_tree(df, '10', 71)
    counties[code] = county

In [97]:
def get_state_objective(tree_code, tree, key, objective):
    """
    Return the objective function as key_10 - sum (key_6digits)
    """
    if len(tree['children'])==0:
        if tree[key] == 0:
            objective += f" - {tree['est']}*{key[0]}pe_{tree_code}_{tree['ind']}"
        else:
            objective += f" - {tree[key]}"
    for child in tree['children']:
        objective = get_state_objective(tree_code, child, key, objective)
    return objective

In [98]:
# Objective function
objective = str(state['emp'])
for code in codes[1:]:
    objective = get_state_objective(code, counties[code], 'emp', objective)

In [99]:
def get_tree_constraints(tree_code, tree, key, constraints):
    """
    Return all the constraints in a tree
    """
    if len(tree['children'])>0:
        if tree[key] == 0:
            constraint = f"{tree['est']}*{key[0]}pe_{tree_code}_{tree['ind']} = "
        else:
            constraint = f"{tree[key]} = "
        for i,child in enumerate(tree['children']):
            if i > 0:
                constraint += ' + '
            if child[key] == 0:
                constraint+= f"{child['est']}*{key[0]}pe_{tree_code}_{child['ind']} "
            else:
                constraint+= f"{child[key]}"
        if 'epe' in constraint:
            check = constraint.split(' = ')
            if check[0] != check[1]:
                constraints.append(constraint)
    for child in tree['children']:
        constraints = get_tree_constraints(tree_code, child, key, constraints)
    return constraints

In [100]:
# County constraints
counties_constraints = [] 
for code in codes[1:]:
    counties_constraints = get_tree_constraints(code, counties[code], 'emp', counties_constraints)

In [101]:
# State own constraints
state_own_constraints = get_tree_constraints(codes[0], state, 'emp', [])

In [102]:
# State-county constraints
def get_state_county_constraints(state, counties, key):
    """
    Return all the constraints in a tree
    """
    state_county_constraints = []
    state_inds = fetch_values_given_key(state, 'ind', [])
    for state_ind in state_inds[1:]:
        state_node = fetch_branch(state, 'ind', state_ind)
        if state_node[key] == 0:
            state_county_constraint = f"{state_node['est']}*{key[0]}pe_{codes[0]}_{state_ind} = "
        else:
            state_county_constraint = f"{state_node[key]} = "
        for i,code in enumerate(counties.keys()):
            county_node = fetch_branch(counties[code], 'ind', state_ind)
            if county_node is not None:
                if county_node[key] == 0:
                    state_county_constraint += f" + {county_node['est']}*{key[0]}pe_{code}_{state_ind}"
                else:
                    state_county_constraint += f" + {county_node[key]}"
        if f'{key[0]}pe' in state_county_constraint:
            state_county_constraints.append(
                state_county_constraint.replace('=  + ','= ')
            )
    return state_county_constraints

In [103]:
state_county_constraints = get_state_county_constraints(state, counties, 'emp')

In [109]:
constraints = counties_constraints + state_own_constraints + state_county_constraints
len(constraints)

54550

In [111]:
variables = get_optimization_variables(constraints, 'emp')
len(variables)

73476

In [135]:
for variable in variables:
    exec(f"{variable} = cp.Variable()")
numerical_constraints = []
for i,constraint in enumerate(constraints):
    numerical_constraints.append(eval(f"{constraint.replace('=','>=')}"))
for variable in variables:
    numerical_constraints.append(eval(f"{variable}>= 0"))

In [138]:
import sys
sys.setrecursionlimit(100000)
exec(f"objective = cp.Minimize(cp.abs({objective}))")

In [139]:
problem = cp.Problem(objective, numerical_constraints)
problem.solve(solver=cp.ECOS)



2.7372880140319467e-07

In [176]:
def extract_codes(variable):
    """
    Return county code and industry code from a variable's name
    """
    positions = [i for i, letter in enumerate(variable) if letter == '_']
    county_code = variable[positions[0]+1:positions[1]]
    industry_code = variable[positions[1]+1:].strip()
    return county_code, industry_code

In [177]:
key = 'emp'
for variable in variables:
    county_code, ind = extract_codes(variable)
    if county_code == '13121':
        if county_code in counties.keys():
            county = counties[county_code]
            branch = fetch_branch(county, 'ind', ind)
            write_into(
                county, 
                'ind', 
                branch['ind'],
                {f'{key}_lp': branch['est'] * eval(f"float({variable}.value)")}
            )

In [201]:
# Fetch all industry codes in a period
county_code = '13121'
industry_codes=fetch_values_given_key(county, 'ind', [])
employment_series = pd.DataFrame([], columns=np.unique(industry_codes))

In [202]:
employment_series = save_data_to_time_series(
        employment_series, county, industry_codes, 'emp', year
) 

In [203]:
lightcast = pd.read_csv('lightcast.csv', index_col=[0])
inds = list(employment_series.columns[employment_series.columns.str.len()==6])
percentage_differences = pd.DataFrame([], columns=[year])

In [204]:
differences = []
index = []
for ind in inds:
    if int(ind) in lightcast.index:
        if lightcast.at[int(ind),str(year)] == '<10':
            lightcast.at[int(ind),str(year)] = 5
            lightcast_value = 5
        else:
            lightcast_value = int(lightcast.at[int(ind), str(year)])
        edai_value = employment_series.at[year, ind]
        if lightcast_value > 0:
            differences.append((lightcast_value - edai_value))
        else:
            differences.append(np.nan)
        index.append(ind)
print(np.nanmean(np.abs(differences)))
percentage_differences[year] = differences
percentage_differences.index = index
percentage_differences.to_csv('comparison 6 digits.csv')


75.74664517381274


In [208]:
industry = fetch_branch(county, 'ind', '56132')
print(industry['est'])
print(industry['emp'])
if industry.get('emp_ps') is not None:
    print(f"emp_ps {industry['emp_ps']}")
if industry.get('emp_lp') is not None:
    print(f"emp_lp {industry['emp_lp']}")
for child in industry['children']:
    print(f"*** ind {child['ind']} *** ")
    print(child['est'])
    print(child['emp'])
    if child.get('emp_ps') is not None:
        print(f"emp_ps {child['emp_ps']}")
    if child.get('emp_lp') is not None:
        print(f"emp_lp {child['emp_lp']}")

762
32467
*** ind 561320 *** 
762
32467


In [214]:
df, url = fetch_area_data(2021, 4, '13121')
print(df[df['industry_code']=='561320']['month3_emplvl'])

df, url = fetch_area_data(2021, 'a', '13121')
print(df[df['industry_code']=='561320']['annual_avg_emplvl'])


1748    32467
Name: month3_emplvl, dtype: int64
1748    30329
Name: annual_avg_emplvl, dtype: int64
