In [42]:
# setup
import pandas as pd
from pathlib import Path

In [66]:
## path setup
# choose base for uploading
base_folder = Path().resolve().parent

data_folder = base_folder / "data"
cleaned_folder = data_folder / "clean_data"

# create folder
cleaned_folder.mkdir(parents = True, exist_ok = True)

In [68]:
## upload data
# CPI file path
cpi_path = data_folder / "CPI All Urban Consumers Dairy and Related Products in U.S. City Average.xlsx"

# upload FRED data
cpi_control = pd.read_excel(cpi_path, sheet_name = "Annual")

## milk_df path
milk_df_path = cleaned_folder / "milk_df_clean.xlsx"

# upload milk_df
milk_df = pd.read_excel(milk_df_path)

In [70]:
## data cleaning
# rename columns
cpi_control.columns = ["year", "cpi"] 

# get year column to just be years and not months
cpi_control["year"] = pd.to_datetime(cpi_control["year"]).dt.year

# check to see if it worked
cpi_control.head()

Unnamed: 0,year,cpi
0,1935,15.325
1,1936,15.983
2,1937,16.567
3,1938,15.667
4,1939,15.058


In [72]:
## setting index for inflation
# identify base year
base_year = 2005
base_cpi = cpi_control.loc[cpi_control["year"] == base_year, "cpi"].iloc[0]

cpi_control["cpi_index"] = cpi_control["cpi"] / base_cpi
cpi_control

cpi_control = cpi_control[cpi_control["year"].isin([2005, 2010, 2016])]

cpi_control

Unnamed: 0,year,cpi,cpi_index
70,2005,182.383,1.0
75,2010,199.245,1.092454
81,2016,217.306,1.191482


In [84]:
## merge all datasets
# add CPI into milk_df
milk_df = milk_df.merge(cpi_control[["year", "cpi_index"]],
                        how = "left", left_on = "Year", right_on = "year")

# cleanup
milk_df = milk_df.drop(columns=["year"])

In [86]:
## update variables
# identify money variables
money_variables = ["milk_sold", "gross_value", "feed_costs", "vet_costs", "bedding_litter_costs", 
                   "marketing_costs", "service_costs", "utility_costs", "repair_costs",
                   "total_operating_costs", "overhead", "total_costs"]

# switch money variables to real values with CPI control
for col in money_variables:
    milk_df[col + "_real"] = milk_df[col] / milk_df["cpi_index"]

milk_df

Unnamed: 0,Year,State,Organic,milk_sold,gross_value,feed_costs,vet_costs,bedding_litter_costs,marketing_costs,service_costs,...,bedding_litter_costs_real,marketing_costs_real,service_costs_real,utility_costs_real,repair_costs_real,total_operating_costs_real,overhead_real,total_costs_real,cpi_index_y,cpi_index
0,2005,Minnesota,1,20.88,23.17,10.11,0.54,0.38,0.43,0.49,...,0.38,0.43,0.49,0.89,1.07,14.3,16.31,30.61,1.0,1.0
1,2005,New York,1,24.03,26.87,13.08,0.35,0.37,0.24,0.1,...,0.37,0.24,0.1,1.12,0.94,16.63,21.37,38.0,1.0,1.0
2,2005,Pennsylvania,1,24.23,26.85,13.23,0.68,0.46,0.31,0.23,...,0.46,0.31,0.23,0.73,0.94,17.1,14.61,31.71,1.0,1.0
3,2005,Vermont,1,24.23,26.87,13.61,0.81,0.52,0.24,0.22,...,0.52,0.24,0.22,0.98,0.87,17.62,18.88,36.5,1.0,1.0
4,2005,Wisconsin,1,21.19,23.63,13.47,0.6,0.39,0.22,0.33,...,0.39,0.22,0.33,0.84,0.85,17.11,14.95,32.07,1.0,1.0
5,2010,Minnesota,1,25.17,27.81,14.01,0.49,0.47,0.25,0.6,...,0.430224,0.228843,0.549222,1.098445,0.594991,16.375979,20.128998,36.504976,1.092454,1.092454
6,2010,New York,1,24.29,27.22,11.58,0.62,0.52,0.39,0.68,...,0.475993,0.356995,0.622452,1.135059,1.089291,15.002923,16.568206,31.571129,1.092454,1.092454
7,2010,Pennsylvania,1,28.37,30.92,14.05,0.51,0.44,0.23,0.41,...,0.402763,0.210535,0.375302,1.318134,1.418824,17.21812,18.783403,36.001523,1.092454,1.092454
8,2010,Vermont,1,25.97,28.61,15.88,0.57,0.5,0.23,0.45,...,0.457685,0.210535,0.411917,1.208289,1.794126,19.26855,20.550068,39.818618,1.092454,1.092454
9,2010,Wisconsin,1,27.9,30.89,14.75,0.8,0.58,0.19,0.93,...,0.530915,0.17392,0.851295,0.997754,1.189982,18.170105,19.360087,37.530191,1.092454,1.092454


In [88]:
# export file
inflation_path = cleaned_folder / "milk_df_inflation_adjusted.xlsx"

milk_df.to_excel(inflation_path, index = False)