In [2]:
import requests
import json
from API_Key import api_key
import pandas as pd

In [3]:
# Using the API key to access BLS data
url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"

series_id_lp = "PRS85006092"

payload = {
    "seriesid": [series_id_lp],
    "startyear": "2019",
    "endyear": "2024",
    "registrationkey": api_key
}

response = requests.post(url, json=payload)

if response.status_code == 200:
    data = response.json()
    print(json.dumps(data, indent=4))
else:
    print(f"Error {response.status_code}")
    

{
    "status": "REQUEST_SUCCEEDED",
    "responseTime": 171,
    "message": [],
    "Results": {
        "series": [
            {
                "seriesID": "PRS85006092",
                "data": [
                    {
                        "year": "2024",
                        "period": "Q02",
                        "periodName": "2nd Quarter",
                        "latest": "true",
                        "value": "2.5",
                        "footnotes": [
                            {
                                "code": "R",
                                "text": "revised"
                            }
                        ]
                    },
                    {
                        "year": "2024",
                        "period": "Q01",
                        "periodName": "1st Quarter",
                        "value": "0.4",
                        "footnotes": [
                            {}
                        ]
                    },
   

In [4]:
# Converting the data into a dataframe
series_data = data['Results']['series'][0]['data']
labor_productivity_df = pd.DataFrame(series_data)

# Formatting the data
quarter_mapping = {
    "Q01": "Q1: Jan 1 - Mar 31",
    "Q02": "Q2: Apr 1 - Jun 30",
    "Q03": "Q3: Jul 1 - Sep 30",
    "Q04": "Q4: Oct 1 - Dec 31"
}

# Extra formatting for the data that came from the API request
labor_productivity_df['quarter'] = labor_productivity_df['period'].map(quarter_mapping)
labor_productivity_df['year'] = pd.to_numeric(labor_productivity_df['year'])
labor_productivity_df['percent_change'] = pd.to_numeric(labor_productivity_df['value'])
labor_productivity_df = labor_productivity_df[['year', 'quarter', 'percent_change']]
labor_productivity_df = labor_productivity_df.sort_values(by=['year', 'quarter']).reset_index(drop=True)

In [5]:
# Reading in the Excel file that was used for this data frame
file_path = "unit_labor_costs.xlsx"
sheet_name = "BLS Data Series"
unit_labor_costs_df = pd.read_excel(file_path)
unit_labor_costs_df = unit_labor_costs_df.melt(id_vars =["Year"],
                                               var_name="quarter",
                                               value_name="percent_change")

#Formatting the data so that it matches the previous data frame
quarter_mapping2 = {
    "Qtr1": "Q1: Jan 1 - Mar 31",
    "Qtr2": "Q2: Apr 1 - Jun 30",
    "Qtr3": "Q3: Jul 1 - Sep 30",
    "Qtr4": "Q4: Oct 1 - Dec 31"
}
unit_labor_costs_df["quarter"] = unit_labor_costs_df["quarter"].map(quarter_mapping2)
unit_labor_costs_df = unit_labor_costs_df.sort_values(by=["Year", "quarter"]).reset_index(drop=True)
unit_labor_costs_df = unit_labor_costs_df.dropna(subset=["percent_change"])

In [7]:
# Rinse and repeat for the next excel sheet
file_path2 = "real_hourly_compensation.xlsx"
sheet_name2 = sheet_name
real_hourly_compensation_df = pd.read_excel(file_path2)
real_hourly_compensation_df = real_hourly_compensation_df.melt(id_vars = ["Year"],
                                                               var_name="quarter",
                                                               value_name="percent_change")

real_hourly_compensation_df["quarter"] = real_hourly_compensation_df["quarter"].map(quarter_mapping2)
real_hourly_compensation_df = real_hourly_compensation_df.sort_values(by=["Year", "quarter"]).reset_index(drop=True)
real_hourly_compensation_df = real_hourly_compensation_df.dropna(subset=["percent_change"])

In [8]:
# Creating three distinct time frames with the same amount of data
labor_productivity_df = labor_productivity_df.iloc[1:].reset_index(drop=True)
unit_labor_costs_df = unit_labor_costs_df.iloc[1:].reset_index(drop=True)
real_hourly_compensation_df = real_hourly_compensation_df.iloc[1:].reset_index(drop=True)
real_hourly_compensation_df

Unnamed: 0,Year,quarter,percent_change
0,2019,Q2: Apr 1 - Jun 30,-2.7
1,2019,Q3: Jul 1 - Sep 30,-1.1
2,2019,Q4: Oct 1 - Dec 31,4.1
3,2020,Q1: Jan 1 - Mar 31,9.2
4,2020,Q2: Apr 1 - Jun 30,28.8
5,2020,Q3: Jul 1 - Sep 30,-10.5
6,2020,Q4: Oct 1 - Dec 31,7.5
7,2021,Q1: Jan 1 - Mar 31,-3.6
8,2021,Q2: Apr 1 - Jun 30,-1.7
9,2021,Q3: Jul 1 - Sep 30,-1.0


In [9]:
file_path3 = "total_factor_productivity.xlsx"
sheet_name3 = sheet_name
total_factor_productivity_df = pd.read_excel(file_path3)
total_factor_productivity_df

Unnamed: 0,Year,Annual
0,2019,101.987
1,2020,101.756
2,2021,105.053
3,2022,103.343
4,2023,104.107
