# Inflation

## BLS: Producer Price Index (PPI)

Information about the survey [here](https://download.bls.gov/pub/time.series/pc/pc.txt)

In [40]:
import os
import pandas as pd

# path for the folder "project"
path = "C:\\Users\\pedro\\OneDrive\\NYU\\CSS\\II. Data Skills\\project"
os.chdir(path)

&nbsp;<br>

Importing `PPI Aggregates` file directly from [BLS](https://download.bls.gov/pub/time.series/pc/), and saving as a .parquet file:

In [41]:
ppi = pd.read_csv("https://download.bls.gov/pub/time.series/pc/pc.data.01.aggregates", delimiter="\t")
#https://download.bls.gov/pub/time.series/pc/pc.data.0.Current (All data)
ppi.head()

Unnamed: 0,series_id,year,period,value,footnote_codes
0,PCUADLVWRADLVWR,2006,M12,100.0,
1,PCUADLVWRADLVWR,2007,M01,103.7,
2,PCUADLVWRADLVWR,2007,M02,103.8,
3,PCUADLVWRADLVWR,2007,M03,103.4,
4,PCUADLVWRADLVWR,2007,M04,103.7,


In [42]:
ppi.columns = ppi.columns.str.strip()
ppi["series_id"] = ppi["series_id"].str.strip()
months = ["M01", "M02","M03","M04","M05","M06","M07","M08","M09","M10","M11","M12"]
ppi = ppi[ppi["period"].isin(months)].drop(columns = "footnote_codes")
ppi["period"] = ppi["period"].str.replace("M","")
ppi["date"] = ppi["year"].astype(str)+"-"+ppi["period"]+"-1"
ppi["date"] = pd.to_datetime(ppi["date"])
ppi = ppi.drop(columns = ["year","period"])[["series_id","date","value"]].drop_duplicates()

In [43]:
ppi.head()

Unnamed: 0,series_id,date,value
0,PCUADLVWRADLVWR,2006-12-01,100.0
1,PCUADLVWRADLVWR,2007-01-01,103.7
2,PCUADLVWRADLVWR,2007-02-01,103.8
3,PCUADLVWRADLVWR,2007-03-01,103.4
4,PCUADLVWRADLVWR,2007-04-01,103.7


In [44]:
codes = list(ppi.series_id.unique())

In [45]:
ppi.to_parquet("data\\inflation\\data_bls_ppi.parquet")

Bulding Dictionary

In [49]:
# importing different information for the data
series = pd.read_csv("https://download.bls.gov/pub/time.series/pc/pc.series", delimiter="\t")

industry = pd.read_csv("https://download.bls.gov/pub/time.series/pc/pc.industry", delimiter="\t")
product = pd.read_csv("https://download.bls.gov/pub/time.series/pc/pc.product", delimiter="\t")

In [50]:
# removing blank space of column names
series.columns = series.columns.str.strip()
industry.columns = industry.columns.str.strip()
product.columns = product.columns.str.strip()

In [51]:
series.head()

Unnamed: 0,series_id,industry_code,product_code,seasonal,base_date,series_title,footnote_codes,begin_year,begin_period,end_year,end_period
0,PCU1133--1133--,1133--,1133--,U,198112,"PPI industry group data for Logging, not seaso...",,1981,M12,2022,M10
1,PCU11331-11331-,11331-,11331-,U,198112,"PPI industry data for Logging, not seasonally ...",,1981,M12,2022,M10
2,PCU113310113310,113310,113310,U,198112,"PPI industry data for Logging, not seasonally ...",,1981,M12,2022,M10
3,PCU113310113310M,113310,113310M,U,198606,PPI industry data for Logging-Miscellaneous re...,,1986,M06,2022,M06
4,PCU113310113310MM,113310,113310MM,U,198606,PPI industry data for Logging-Miscellaneous re...,,1986,M06,1998,M08


In [52]:
# Merge dfs
ppi_dict = pd.merge(series, industry, how = "left")
ppi_dict = pd.merge(ppi_dict, product, how = "left")

ppi_dict["series_id"] = ppi_dict["series_id"].str.strip()

ppi_dict = ppi_dict[
    ["series_id","industry_code","industry_name","product_code","product_name",
     "seasonal","series_title","base_date",
     "begin_period","begin_year","end_period","end_year"]]

ppi_dict = ppi_dict[ppi_dict["series_id"].isin(codes)]

ppi_dict.head()

Unnamed: 0,series_id,industry_code,industry_name,product_code,product_name,seasonal,series_title,base_date,begin_period,begin_year,end_period,end_year
4957,PCUADLVWRADLVWR,ADLVWR,Delivery and warehouse industries,ADLVWR,Delivery and warehouse industries,U,PPI industry data for Delivery and warehouse i...,200612,M12,2006,M10,2022
4958,PCUAINFO-AINFO-,AINFO-,Information,AINFO-,Information,U,"PPI industry data for Information, not seasona...",200612,M12,2006,M10,2022
4959,PCUAMUM--AMUM--,AMUM--,"Total mining, utilities, and manufacturing ind...",AMUM--,"Total mining, utilities, and manufacturing ind...",U,"PPI industry group data for Total mining, util...",200612,M12,2006,M10,2022
4960,PCUARETTRARETTR,ARETTR,Total retail trade industries,ARETTR,Total retail trade industries,U,PPI industry data for Total retail trade indus...,200612,M12,2006,M10,2022
4961,PCUASHC--ASHC--,ASHC--,Selected health care industries,ASHC--,Selected health care industries,U,PPI industry group data for Selected health ca...,200612,M12,2006,M10,2022


In [53]:
ppi_dict.to_parquet("data\\inflation\\dict_bls_ppi.parquet")