In [3]:
import json
from pprint import pprint

import requests
import pandas as pd
from sqlalchemy import text

from detroit_tech_employment import db_engine
from detroit_tech_employment.auth import refresh_auth
from detroit_tech_employment.reference import DETROIT_ZIPS
from detroit_tech_employment.throttle import throttle

In [5]:
top_lev_codes_q = text("""
select *
from naics.codes
where length(code) = 2;
""")


with db_engine.connect() as db:
    result = db.execute(top_lev_codes_q)

In [6]:
def convert_to_df(response):
    fields =  response["data"]
    return pd.DataFrame({field["name"]: field["rows"] for field in fields})

In [62]:
auth = refresh_auth()
headers = {
    "Authorization": f"Bearer {auth['access_token']}",
    "Content-Type": "application/json"
}

In [8]:

url = "https://agnitio.emsicloud.com/emsi.us.industry/2024.2"

payload = {

    "metrics": [
        {
            "name": "Jobs.2012",
            "as": "2012 Jobs",
        }, 
        {
            "name": "Jobs.2013",
            "as": "2013 Jobs",
        }, 
        {
            "name": "Jobs.2014",
            "as": "2014 Jobs",
        }, 
        {
            "name": "Jobs.2015",
            "as": "2015 Jobs",
        }, 
        {
            "name": "Jobs.2016",
            "as": "2016 Jobs",
        }, 
        {
            "name": "Jobs.2017",
            "as": "2017 Jobs",
        }, 
        {
            "name": "Jobs.2018",
            "as": "2018 Jobs",
        }, 
        {
            "name": "Jobs.2019",
            "as": "2019 Jobs",
        }, 
        {
            "name": "Jobs.2020",
            "as": "2020 Jobs",
        }, 
        {
            "name": "Jobs.2021",
            "as": "2021 Jobs",
        }, 
        {
            "name": "Jobs.2022",
            "as": "2022 Jobs",
        }, 
        {
            "name": "Jobs.2023",
            "as": "2023 Jobs",
        }, 
        {
            "name": "Earnings.2013",
            "as": "2013 Earnings",
        }, 
        {
            "name": "Earnings.2014",
            "as": "2014 Earnings",
        }, 
        {
            "name": "Earnings.2015",
            "as": "2015 Earnings",
        }, 
        {
            "name": "Earnings.2016",
            "as": "2016 Earnings",
        }, 
        {
            "name": "Earnings.2017",
            "as": "2017 Earnings",
        }, 
        {
            "name": "Earnings.2018",
            "as": "2018 Earnings",
        }, 
        {
            "name": "Earnings.2019",
            "as": "2019 Earnings",
        }, 
        {
            "name": "Earnings.2020",
            "as": "2020 Earnings",
        }, 
        {
            "name": "Earnings.2021",
            "as": "2021 Earnings",
        }, 
        {
            "name": "Earnings.2022",
            "as": "2022 Earnings",
        }, 
        {
            "name": "Earnings.2023",
            "as": "2023 Earnings",
        }, 
        {
            "name": "Earnings.2012",
            "as": "2012 Earnings",
        }, 
        {
            "name": "Earnings.2013",
            "as": "2013 Earnings",
        }, 
        {
            "name": "Earnings.2014",
            "as": "2014 Earnings",
        }, 
        {
            "name": "Earnings.2015",
            "as": "2015 Earnings",
        }, 
        {
            "name": "Earnings.2016",
            "as": "2016 Earnings",
        }, 
        {
            "name": "Earnings.2017",
            "as": "2017 Earnings",
        }, 
        {
            "name": "Earnings.2018",
            "as": "2018 Earnings",
        }, 
        {
            "name": "Earnings.2019",
            "as": "2019 Earnings",
        }, 
        {
            "name": "Earnings.2020",
            "as": "2020 Earnings",
        }, 
        {
            "name": "Earnings.2021",
            "as": "2021 Earnings",
        }, 
        {
            "name": "Earnings.2022",
            "as": "2022 Earnings",
        }, 
        {
            "name": "Earnings.2023",
            "as": "2023 Earnings",
        }, 
        {
            "name": "Earnings.2013",
            "as": "2013 Earnings",
        }, 
        {
            "name": "Earnings.2014",
            "as": "2014 Earnings",
        }, 
        {
            "name": "Earnings.2015",
            "as": "2015 Earnings",
        }, 
        {
            "name": "Earnings.2016",
            "as": "2016 Earnings",
        }, 
        {
            "name": "Earnings.2017",
            "as": "2017 Earnings",
        }, 
        {
            "name": "Earnings.2018",
            "as": "2018 Earnings",
        }, 
        {
            "name": "Earnings.2019",
            "as": "2019 Earnings",
        }, 
        {
            "name": "Earnings.2020",
            "as": "2020 Earnings",
        }, 
        {
            "name": "Earnings.2021",
            "as": "2021 Earnings",
        }, 
        {
            "name": "Earnings.2022",
            "as": "2022 Earnings",
        }, 
        {
            "name": "Earnings.2023",
            "as": "2023 Earnings",
        }, 
    ],

    "constraints": [
        {
            "dimensionName": "Area",
            "map": {
                "04000US26": ["26"], # Michigan
                "05000US26163": ["26163"], # Wayne County
                "05000US26125": ["26125"], # Oakland County
                "05000US26099": ["26099"], # Macomb
                "05000US26161": ["26161"], # Washtenaw
                "31000US19820": ["MSA19820"], # Detroit-Warren-Dearborn MSA
                **DETROIT_ZIPS
            }
        },
        { 
            "dimensionName": "Industry", 
            "map": { 
                "54": ["54"],
            },
        } 
    ]
}


response = requests.request("post", url, headers=headers, json=payload)


In [51]:
from detroit_tech_employment.throttle import throttle


@throttle(0.5)
def pull_metadata(dataset: str):

    url = f"https://agnitio.emsicloud.com/meta/dataset/{dataset}/2024.4"
    response = requests.request("get", url, headers=headers)

    try:
        meta = response.json()

        with open(f"lmi_datasets__{dataset}.json", "w") as f:
            json.dump(meta, f, indent=4)

    except json.JSONDecodeError as e:
        print(response.content)


In [52]:
with open("lmi_datasets.json") as f:
    datasets = json.load(f)


# for dataset in datasets["datasets"]:
    # pull_metadata(dataset["name"])

In [61]:
for dataset in datasets["datasets"]:
    with open(f"lmi_datasets__{dataset['name']}.json") as f:
        dictionary = json.load(f)

    print(dataset["name"], end=": ")
    found = False
    for item in dictionary["dimensions"]:
        if item["name"] == "Area":
            print(max(item["levelsStored"]))
            found = True
        
    if not found:
        print("No AREA")

emsi.us.ind.firm: 3
emsi.us.staffing: 4
emsi.us.staffing.earn: No AREA
emsi.us.occ.hires.seps: 3
emsi.us.industry: 4
emsi.us.industry.raceethnicity: 3
emsi.us.occupation: 4
emsi.us.occupation.detailed: 3
emsi.us.occupation.raceethnicity: 3
emsi.us.unemployment.occupation: 3


In [4]:
# Gather all occupations

q = text("""
select *
from soc.definitions
where definition is not null;
""")


with db_engine.connect() as db:
    result = db.execute(q)
    rows = result.fetchall()

In [14]:
auth = refresh_auth()
headers = {
    "Authorization": f"Bearer {auth['access_token']}",
    "Content-Type": "application/json"
}


@throttle(0.5)
def staffing_pull(codes: list[str]):
    url = "https://agnitio.emsicloud.com/emsi.us.staffing/2024.2"

    payload = {
        "metrics": [
            {"name": "Jobs.2001"},
            {"name": "Jobs.2002"},
            {"name": "Jobs.2003"},
            {"name": "Jobs.2004"},
            {"name": "Jobs.2005"},
            {"name": "Jobs.2006"},
            {"name": "Jobs.2007"},
            {"name": "Jobs.2008"},
            {"name": "Jobs.2009"},
            {"name": "Jobs.2010"},
            {"name": "Jobs.2011"},
            {"name": "Jobs.2012"},
            {"name": "Jobs.2013"},
            {"name": "Jobs.2014"},
            {"name": "Jobs.2015"},
            {"name": "Jobs.2016"},
            {"name": "Jobs.2017"},
            {"name": "Jobs.2018"},
            {"name": "Jobs.2019"},
            {"name": "Jobs.2020"},
            {"name": "Jobs.2021"},
            {"name": "Jobs.2022"},
            {"name": "Jobs.2023"},
            {"name": "Jobs.2024"},
            {"name": "Jobs.2025"},
            {"name": "Jobs.2026"},
            {"name": "Jobs.2027"},
            {"name": "Jobs.2028"},
            {"name": "Jobs.2029"},
            {"name": "Jobs.2030"},
            {"name": "Jobs.2031"},
            {"name": "Jobs.2032"},
            {"name": "Jobs.2033"},
            {"name": "Jobs.2034"}
        ],

        "constraints": [
            {
                "dimensionName": "Area",
                "map": {
                    "04000US26": ["26"], # Michigan
                    "05000US26163": ["26163"], # Wayne County
                    "05000US26125": ["26125"], # Oakland County
                    "05000US26099": ["26099"], # Macomb
                    "05000US26161": ["26161"], # Washtenaw
                    "31000US19820": ["MSA19820"], # Detroit-Warren-Dearborn MSA
                    **DETROIT_ZIPS
                }
            },
            { 
                "dimensionName": "Occupation", 
                "map": {
                    code.code: [code.code]
                    for code in codes
                },
            },
        ]
    }

    response = requests.request("post", url, headers=headers, json=payload)

    return response.json()

In [15]:
from itertools import islice

def chunked(iterable, chunksize):
    iterator = iter(iterable)
    while chunk := list(islice(iterator, chunksize)):
        yield chunk


results = []
for i, codes in enumerate(chunked(rows, 10)):
    print(f"Starting pull {i+1}/87:", end=" ")
    results.append(staffing_pull(codes))

    with open("staffing_results_2001_2034.json", "a") as f:
        f.write(json.dumps(results) + '\n')

    print("COMPLETE")

