In [2]:
import pandas as pd
import plotly.express as px
import requests
import json

In [3]:
# Create an empty dictionary to store DataFrames
ufcpsdata = {}

# retreive files from previous API query code
for year in range(2010, 2025):
    # Store each year's csv and store in dictionary
    file_path = f"/your/file/path/comb_cps_data_{year}.csv"
    ufcpsdata[year] = pd.read_csv(file_path)
    
    # Exclude datas with no location code
    ufcpsdata[year] = ufcpsdata[year][ufcpsdata[year]['GTCBSA'] != 0].reset_index(drop=True)
    ufcpsdata[year] = ufcpsdata[year].drop('Unnamed: 0', axis = 1)

   # Extract year and month
    ufcpsdata[year]['year'] = ufcpsdata[year]['month_year'].str.split('-').str[1].astype(int)
    ufcpsdata[year]['month'] = ufcpsdata[year]['month_year'].str.split('-').str[0]

    print(f"Data for {year} retreived")

Data for 2010 retreived
Data for 2011 retreived
Data for 2012 retreived
Data for 2013 retreived
Data for 2014 retreived
Data for 2015 retreived
Data for 2016 retreived
Data for 2017 retreived
Data for 2018 retreived
Data for 2019 retreived
Data for 2020 retreived
Data for 2021 retreived
Data for 2022 retreived
Data for 2023 retreived
Data for 2024 retreived


In [4]:
# Pull all categorical Census API maps
# state map
statelink = ("https://api.census.gov/data/2024/cps/basic/jan/variables/STATE.json")
statereq = requests.get(statelink).text
statedict = json.loads(statereq)['values']['item']
state_mapping = {int(key): value for key, value in statedict.items()}

# city map
arealink = ("https://api.census.gov/data/2023/cps/basic/jan/variables/GTCBSA.json")
areareq = requests.get(arealink).text
areadict = json.loads(areareq)['values']['item']
area_mapping = {int(key): value for key, value in areadict.items()}

# region map
reglink = ("https://api.census.gov/data/2021/cps/basic/mar/variables/GEREG.json")
regreq = requests.get(reglink).text
regdict = json.loads(regreq)['values']['item']
reg_mapping = {int(key): value for key, value in regdict.items()}

# sex mapping
sexlink = ("https://api.census.gov/data/2023/cps/basic/jan/variables/PESEX.json")
sexreq = requests.get(sexlink).text
sexdict = json.loads(sexreq)['values']['item']
sex_mapping = {int(key): value for key, value in sexdict.items()}

# layoff mapping
layofflink = ("https://api.census.gov/data/2023/cps/basic/jan/variables/PELAYFTO.json")
layoffreq = requests.get(layofflink).text
layoffdict = json.loads(layoffreq)['values']['item']
layoff_mapping = {int(key): value for key, value in layoffdict.items()}

# race mapping
racelink = ("https://api.census.gov/data/2023/cps/basic/jan/variables/PTDTRACE.json")
racereq = requests.get(racelink).text
racedict = json.loads(racereq)['values']['item']
race_mapping = {int(key): value for key, value in racedict.items()}

# educ mapping
edulink = ("https://api.census.gov/data/2023/cps/basic/jan/variables/PEEDUCA.json")
edureq = requests.get(edulink).text
edudict = json.loads(edureq)['values']['item']
edu_mapping = {int(key): value for key, value in edudict.items()}

# marital mapping
marlink = ("https://api.census.gov/data/2023/cps/basic/jan/variables/PEMARITL.json")
marreq = requests.get(marlink).text
mardict = json.loads(marreq)['values']['item']
mar_mapping = {int(key): value for key, value in mardict.items()}

# income mapping
inclink = ("https://api.census.gov/data/2023/cps/basic/jan/variables/HEFAMINC.json")
increq = requests.get(inclink).text
incdict = json.loads(increq)['values']['item']
inc_mapping = {int(key): value for key, value in incdict.items()}

# family mapping
famlink = ("https://api.census.gov/data/2024/cps/basic/jan/variables/HRHTYPE.json")
famreq = requests.get(famlink).text
famdict = json.loads(famreq)['values']['item']
fam_mapping = {int(key): value for key, value in famdict.items()}

In [5]:
cpsdata = ufcpsdata.copy()

for year in range(2010, 2025):
    # map location code to name
    cpsdata[year]['state'] = cpsdata[year]['GESTFIPS'].map(state_mapping)
    cpsdata[year]['city'] = cpsdata[year]['GTCBSA'].map(area_mapping).str.split(',').str[0]
    cpsdata[year]['region'] = cpsdata[year]['GEREG'].map(reg_mapping).str.split(' ').str[0]
   
    # map new variables demographic codes to names
    cpsdata[year]['sex'] = cpsdata[year]['PESEX'].map(sex_mapping)
    cpsdata[year]['layoff'] = cpsdata[year]['PELAYFTO'].map(layoff_mapping)
    cpsdata[year]['race'] = cpsdata[year]['PTDTRACE'].map(race_mapping)
    cpsdata[year]['educ'] = cpsdata[year]['PEEDUCA'].map(edu_mapping)
    cpsdata[year]['marstatus'] = cpsdata[year]['PEMARITL'].map(mar_mapping)
    cpsdata[year]['famincome'] = cpsdata[year]['HEFAMINC'].map(inc_mapping)
    cpsdata[year]['famtype'] = cpsdata[year]['HRHTYPE'].map(fam_mapping)

    print(f"Data for {year} mapped")

Data for 2010 mapped
Data for 2011 mapped
Data for 2012 mapped
Data for 2013 mapped
Data for 2014 mapped
Data for 2015 mapped
Data for 2016 mapped
Data for 2017 mapped
Data for 2018 mapped
Data for 2019 mapped
Data for 2020 mapped
Data for 2021 mapped
Data for 2022 mapped
Data for 2023 mapped
Data for 2024 mapped


In [7]:
    # Restructure, drop duplicate columns,  rename columns
for year in range(2010, 2025):
    cpsdata[year] = cpsdata[year][['HRHHID', 'HWHHWGT', 'year', 'month', 'region', 'state', 'city', 'layoff', 'PELAYDUR', 'sex', 'race',
       'educ', 'marstatus', 'famincome', 'famtype', 'HRNUMHOU']]
    cpsdata[year].columns = ['HRHHID', 'HWHHWGT', 'year', 'month', 'region', 'state', 'city', 'layoff', 'layoffdur', 'sex', 'race',
       'educ', 'marstatus', 'famincome', 'famtype', 'famnum']

Unnamed: 0,HRHHID,HWHHWGT,year,month,region,state,city,layoff,layoffdur,sex,race,educ,marstatus,famincome,famtype,famnum
0,507001471110674,1877.4443,2016,Jan,South,AL,Montgomery,Not in Universe,-1,Female,White only,"MASTER'S DEGREE(EX:MA,MS,MEng,MEd,MSW)",Married - Spouse Present,"50,000 To 59,999",Husband/Wife Primary Family(neither AF),6
1,507001471110674,1877.4443,2016,Jan,South,AL,Montgomery,Not in Universe,-1,Male,White only,High School Grad-Diploma Or Equiv (ged),Married - Spouse Present,"50,000 To 59,999",Husband/Wife Primary Family(neither AF),6
2,507001471110674,1877.4443,2016,Jan,South,AL,Montgomery,Not in Universe,-1,Male,White only,9th Grade,Never Married,"50,000 To 59,999",Husband/Wife Primary Family(neither AF),6
3,507001471110674,1877.4443,2016,Jan,South,AL,Montgomery,Not in Universe,-1,Female,White only,"1st,2nd,3rd Or 4th Grade",Never Married,"50,000 To 59,999",Husband/Wife Primary Family(neither AF),6
4,507001471110674,1877.4443,2016,Jan,South,AL,Montgomery,Not in Universe,-1,Male,White only,Not in Universe,Not in Universe,"50,000 To 59,999",Husband/Wife Primary Family(neither AF),6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1148772,780137001940503,456.2797,2016,Dec,Northeast,VT,Burlington-South Burlington,Not in Universe,-1,Male,Asian only,Associate Deg.-Academic Program,Married - Spouse Present,"75,000 To 99,999",Husband/Wife Primary Family(neither AF),3
1148773,780137001940503,456.2797,2016,Dec,Northeast,VT,Burlington-South Burlington,Not in Universe,-1,Female,Asian only,Not in Universe,Not in Universe,"75,000 To 99,999",Husband/Wife Primary Family(neither AF),3
1148774,416330596601005,595.3233,2016,Dec,South,WV,Huntington-Ashland,Not in Universe,-1,Female,White only,"Bachelor's Degree(ex:ba,ab,bs)",Never Married,"5,000 To 7,499",Civilian Female Primary Individual,1
1148775,806507627410112,602.8421,2016,Dec,South,WV,Morgantown,Not in Universe,-1,Male,White only,Some College But No Degree,Married - Spouse Present,"35,000 To 39,999",Husband/Wife Primary Family(neither AF),2


In [8]:
# create empty dictionary to store new datas
placeholder = cpsdata.copy()
layoffdata = {}

for year in range(2010, 2025):
    # Filter data where 'layoff' == 'Yes' or 'No'
    layoffdata[year] = placeholder[year][placeholder[year]['layoff'] != 'Not in Universe'].reset_index(drop=True)
    layoffdata[year]['layoffdur'] = layoffdata[year]['layoffdur'].astype(int)
    layoffdata[year] = layoffdata[year][layoffdata[year]['layoffdur'] <= 26].reset_index(drop=True)

# sort dataframes to improve readability once concatenated
lodatasort = [layoffdata[key] for key in sorted(layoffdata.keys())]
# concatenate 'lo' dataframes
lodatacomb = pd.concat(lodatasort, ignore_index=True)

lodatacomb

Unnamed: 0,HRHHID,HWHHWGT,year,month,region,state,city,layoff,layoffdur,sex,race,educ,marstatus,famincome,famtype,famnum
0,725193598100479,308.0794,2010,Jan,South,DC,Washington-Arlington-Alexandria,Yes,7,Female,Black only,High School Grad-Diploma Or Equiv (ged),Married - Spouse Present,"50,000 To 59,999",Husband/Wife Primary Family(neither AF),4
1,260320097312539,3457.3851,2010,Jan,South,FL,Miami-Fort Lauderdale-West Palm Beach,Yes,2,Female,Black only,7th Or 8th Grade,Never Married,"5,000 To 7,499",Civilian Female Primary Individual,1
2,993819070809109,2689.8868,2010,Jan,South,GA,Atlanta-Sandy Springs-Roswell,Yes,2,Male,Black only,High School Grad-Diploma Or Equiv (ged),Married - Spouse Present,"50,000 To 59,999",Husband/Wife Primary Family(neither AF),5
3,618291499100824,1988.1404,2010,Jan,South,KY,Louisville/Jefferson,No,4,Female,Asian only,5th Or 6th Grade,Married - Spouse Present,"5,000 To 7,499",Husband/Wife Primary Family(neither AF),5
4,752547003908495,2938.7980,2010,Jan,Northeast,MA,,Yes,11,Male,White only,Some College But No Degree,Divorced,"40,000 To 49,999",Civilian Male Primary Individual,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57564,950623093061200,997.1096,2024,Apr,West,MT,Billings,No,2,Male,White only,High School Grad-Diploma Or Equiv (ged),Married - Spouse Present,"15,000 To 19,999",Husband/Wife Primary Family(neither AF),3
57565,960921180100828,3437.5519,2024,Apr,West,CA,San Francisco-Oakland-Hayward,Yes,4,Male,White only,7th Or 8th Grade,Never Married,"60,000 To 74,999",Civilian Male Primary Individual,3
57566,960921180100828,3437.5519,2024,Apr,West,CA,San Francisco-Oakland-Hayward,Yes,2,Male,White only,"Bachelor's Degree(ex:ba,ab,bs)",Never Married,"60,000 To 74,999",Civilian Male Primary Individual,3
57567,981400202611713,4927.1623,2024,Apr,Midwest,IL,Chicago-Naperville-Elgin,Yes,1,Male,Asian only,"Bachelor's Degree(ex:ba,ab,bs)",Married - Spouse Present,"150,000 or More",Husband/Wife Primary Family(neither AF),3


In [9]:
# Define a map function to simplify categories
def simplefamtype(category):
    if category in ['Civilian Male Primary Individual', 'Civilian Female Primary Individual', 'Grp Quarters Without Family']:
        return 'Individual'
    elif category == 'Primary Individual Hhld-Rp In AF':
        return 'Individual-Military'
    elif category in [
        'Husband/Wife Primary Family(neither AF)',
        'Group Quarters With Family',
        'Unmarried Civilian Male-Prim Fam Hhlder',
        'Unmarried Civ. Female-Prim Fam Hhlder',
        ]:
        return 'Family'
    elif category in [
        'Husb/Wife Prim. Family(either/Both AF)',
        'Primary Family Hhlder-Rp In AF,unmar.'
        ]:
        return 'Family-Military'
    else:
        return 'Not in Universe'

def simplemarstatus(category):
    if category in ['Married - Spouse Present', 'Married-Spouse Absent']:
        return 'Married'
    elif category in ['Divorced', 'Separated', 'Widowed']:
        return 'Previously Married'
    elif category == 'Never Married':
        return 'Never Married'
    else:
        return 'Not In Universe'

def simpleeduc(category):
    if category in ['Less Than 1st Grade', '1st,2nd,3rd Or 4th Grade', '5th Or 6th Grade', '7th Or 8th Grade', '9th Grade', '10th Grade', '11th Grade', '12th Grade No Diploma']:
        return 'Low Education'
    elif category in ['High School Grad-Diploma Or Equiv (ged)', 'Some College But No Degree']:
        return 'High School Diploma/GED'
    elif category in ['Associate Degree-Occupational/Vocationl', 'Associate Deg.-Academic Program']:
        return 'Associate Degree'
    elif category == "Bachelor's Degree(ex:ba,ab,bs)":
        return "Bachelor's Degree"
    elif category == "MASTER'S DEGREE(EX:MA,MS,MEng,MEd,MSW)":
        return "Master's Degree"
    elif category in ['Professional School Deg(ex:md,dds,dvm)', 'DOCTORATE DEGREE(EX:PhD,EdD)']:
        return "Professional/Doctorate Degree"
    else:
        return 'Not In Universe'

# apply the simplified maps
lodatacomb['marstatus'] = lodatacomb['marstatus'].apply(simplemarstatus)
lodatacomb['famtype'] = lodatacomb['famtype'].apply(simplefamtype)
lodatacomb['educ'] = lodatacomb['educ'].apply(simpleeduc)
lodatacomb

Unnamed: 0,HRHHID,HWHHWGT,year,month,region,state,city,layoff,layoffdur,sex,race,educ,marstatus,famincome,famtype,famnum
0,725193598100479,308.0794,2010,Jan,South,DC,Washington-Arlington-Alexandria,Yes,7,Female,Black only,High School Diploma/GED,Married,"50,000 To 59,999",Family,4
1,260320097312539,3457.3851,2010,Jan,South,FL,Miami-Fort Lauderdale-West Palm Beach,Yes,2,Female,Black only,Low Education,Never Married,"5,000 To 7,499",Individual,1
2,993819070809109,2689.8868,2010,Jan,South,GA,Atlanta-Sandy Springs-Roswell,Yes,2,Male,Black only,High School Diploma/GED,Married,"50,000 To 59,999",Family,5
3,618291499100824,1988.1404,2010,Jan,South,KY,Louisville/Jefferson,No,4,Female,Asian only,Low Education,Married,"5,000 To 7,499",Family,5
4,752547003908495,2938.7980,2010,Jan,Northeast,MA,,Yes,11,Male,White only,High School Diploma/GED,Previously Married,"40,000 To 49,999",Individual,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57564,950623093061200,997.1096,2024,Apr,West,MT,Billings,No,2,Male,White only,High School Diploma/GED,Married,"15,000 To 19,999",Family,3
57565,960921180100828,3437.5519,2024,Apr,West,CA,San Francisco-Oakland-Hayward,Yes,4,Male,White only,Low Education,Never Married,"60,000 To 74,999",Individual,3
57566,960921180100828,3437.5519,2024,Apr,West,CA,San Francisco-Oakland-Hayward,Yes,2,Male,White only,Bachelor's Degree,Never Married,"60,000 To 74,999",Individual,3
57567,981400202611713,4927.1623,2024,Apr,Midwest,IL,Chicago-Naperville-Elgin,Yes,1,Male,Asian only,Bachelor's Degree,Married,"150,000 or More",Family,3


In [10]:
# convert layoff to a binary for total population weights
lodatacomb['layoffbin'] = (lodatacomb['layoff'] == 'Yes').astype(int)
lodatacomb['layoffweighted'] = (lodatacomb['layoffbin'] * lodatacomb['HWHHWGT'])
lodatacomb

Unnamed: 0,HRHHID,HWHHWGT,year,month,region,state,city,layoff,layoffdur,sex,race,educ,marstatus,famincome,famtype,famnum,layoffbin,layoffweighted
0,725193598100479,308.0794,2010,Jan,South,DC,Washington-Arlington-Alexandria,Yes,7,Female,Black only,High School Diploma/GED,Married,"50,000 To 59,999",Family,4,1,308.0794
1,260320097312539,3457.3851,2010,Jan,South,FL,Miami-Fort Lauderdale-West Palm Beach,Yes,2,Female,Black only,Low Education,Never Married,"5,000 To 7,499",Individual,1,1,3457.3851
2,993819070809109,2689.8868,2010,Jan,South,GA,Atlanta-Sandy Springs-Roswell,Yes,2,Male,Black only,High School Diploma/GED,Married,"50,000 To 59,999",Family,5,1,2689.8868
3,618291499100824,1988.1404,2010,Jan,South,KY,Louisville/Jefferson,No,4,Female,Asian only,Low Education,Married,"5,000 To 7,499",Family,5,0,0.0000
4,752547003908495,2938.7980,2010,Jan,Northeast,MA,,Yes,11,Male,White only,High School Diploma/GED,Previously Married,"40,000 To 49,999",Individual,1,1,2938.7980
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57564,950623093061200,997.1096,2024,Apr,West,MT,Billings,No,2,Male,White only,High School Diploma/GED,Married,"15,000 To 19,999",Family,3,0,0.0000
57565,960921180100828,3437.5519,2024,Apr,West,CA,San Francisco-Oakland-Hayward,Yes,4,Male,White only,Low Education,Never Married,"60,000 To 74,999",Individual,3,1,3437.5519
57566,960921180100828,3437.5519,2024,Apr,West,CA,San Francisco-Oakland-Hayward,Yes,2,Male,White only,Bachelor's Degree,Never Married,"60,000 To 74,999",Individual,3,1,3437.5519
57567,981400202611713,4927.1623,2024,Apr,Midwest,IL,Chicago-Naperville-Elgin,Yes,1,Male,Asian only,Bachelor's Degree,Married,"150,000 or More",Family,3,1,4927.1623


In [26]:
# rename columns and standardize data for final file
col_names = {
    'HRHHID': 'houseid',
    'HWHHWGT': 'propweight',
    'year': 'year',
    'month': 'month',
    'region': 'region',
    'state': 'state',
    'city': 'city',
    'layoff': 'layoff',
    'layoffdur': 'layoffdur',
    'sex': 'sex',
    'race': 'race',
    'educ': 'educlevel',
    'marstatus': 'marstatus',
    'famincome': 'famincome',
    'famtype': 'famtype',
    'famnum': 'famsize',
    'layoffbin': 'layoffbin',
    'layoffweighted': 'layoffweighted'
}
col_order = ['year', 'month', 'region', 'state', 'city', 'propweight', 'layoffweighted', 'layoff', 'layoffdur', 'sex', 'race', 'educlevel', 'marstatus', 'famincome', 'famtype', 'famsize', 'houseid', 'layoffbin']
lodatacomb = lodatacomb.rename(columns=col_names)
lodatacomb = lodatacomb.reindex(columns=col_order)

# convert income ranges to a standard format
lodatacomb['famincome'] = lodatacomb['famincome'].str.replace(',', '').str.replace(' To ', '-').str.replace(' or More', '+')

# convert 'famsize' to integer
lodatacomb['famsize'] = lodatacomb['famsize'].astype(int)
lodatacomb

Unnamed: 0,year,month,region,state,city,propweight,layoffweighted,layoff,layoffdur,sex,race,educlevel,marstatus,famincome,famtype,famsize,houseid,layoffbin
0,2010,Jan,South,DC,Washington-Arlington-Alexandria,308.0794,308.0794,Yes,7,Female,Black only,High School Diploma/GED,Married,50000-59999,Family,4,725193598100479,1
1,2010,Jan,South,FL,Miami-Fort Lauderdale-West Palm Beach,3457.3851,3457.3851,Yes,2,Female,Black only,Low Education,Never Married,5000-7499,Individual,1,260320097312539,1
2,2010,Jan,South,GA,Atlanta-Sandy Springs-Roswell,2689.8868,2689.8868,Yes,2,Male,Black only,High School Diploma/GED,Married,50000-59999,Family,5,993819070809109,1
3,2010,Jan,South,KY,Louisville/Jefferson,1988.1404,0.0000,No,4,Female,Asian only,Low Education,Married,5000-7499,Family,5,618291499100824,0
4,2010,Jan,Northeast,MA,,2938.7980,2938.7980,Yes,11,Male,White only,High School Diploma/GED,Previously Married,40000-49999,Individual,1,752547003908495,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57564,2024,Apr,West,MT,Billings,997.1096,0.0000,No,2,Male,White only,High School Diploma/GED,Married,15000-19999,Family,3,950623093061200,0
57565,2024,Apr,West,CA,San Francisco-Oakland-Hayward,3437.5519,3437.5519,Yes,4,Male,White only,Low Education,Never Married,60000-74999,Individual,3,960921180100828,1
57566,2024,Apr,West,CA,San Francisco-Oakland-Hayward,3437.5519,3437.5519,Yes,2,Male,White only,Bachelor's Degree,Never Married,60000-74999,Individual,3,960921180100828,1
57567,2024,Apr,Midwest,IL,Chicago-Naperville-Elgin,4927.1623,4927.1623,Yes,1,Male,Asian only,Bachelor's Degree,Married,150000+,Family,3,981400202611713,1


In [27]:
# download finalized dataset as csv
lodatacomb.to_csv('/your/file/path/here/layoff_data.csv')