# Analysis of the historical price of a troy ounce of gold adjusted for inflation

## Setup

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import signal
from statsmodels.tsa.seasonal import STL

In [None]:
# Get gold price and US CPI dfs
df_au = pd.read_csv("../data/AU.csv")
df_cpi = pd.read_csv("../data/US_CPI_M.csv")

In [None]:
# Append the CPI estimate for April of 2024 to the CPI df
new_row = pd.DataFrame({"year_month": ["2024-04"], "rate": [3.4]})
df_cpi = pd.concat([df_cpi, new_row], ignore_index=True)

In [None]:
# Set date as index for both dfs
df_au["date"] = pd.to_datetime(df_au["date"])
df_au.set_index("date", inplace=True)
df_cpi["year_month"] = pd.to_datetime(df_cpi["year_month"])
df_cpi.set_index("year_month", inplace=True)

In [None]:
# Set charts theme
sns.set_theme(style="darkgrid", rc={"grid.alpha": 0.33})
plt.style.use("dark_background")

### Adjust gold price to inflation (using 2023 prices)

In [None]:
# Make CPI rates decimal
df_cpi["rate"] = df_cpi["rate"] / 100

In [None]:
# Get cumulative product of rates until 2023 (which is the base year)
all_months = []
for month in range(1, 12 + 1):
    # Cumulative product of year over year CPI rate for each month number
    cumulative_product = np.cumprod((1 + df_cpi[:f"2023-{month:02}"]["rate"])[::-12])
    # Shift the cumulative product to align with the prior year
    cumulative_product = cumulative_product.shift(1).fillna(1)
    all_months.append(cumulative_product)

In [None]:
# Add cumulative CPI rates to the df
df_cpi["cumulative_rate"] = pd.concat(all_months)

In [None]:
# Get 2024 months cumulative rate by just inverting respective rate
df_cpi.loc[df_cpi.index.year == 2024, "cumulative_rate"] = 1 / (1 + df_cpi["rate"])

In [None]:
# Add the cumulative CPI rates to the gold price df (May values will have the same rate as April)
df_cpi_reindexed = df_cpi.reindex(df_au.index, method="ffill")
df_au["cpi_cumulative_rate"] = df_cpi_reindexed["cumulative_rate"]

In [None]:
# Adjust all prices to 2023 dollars
df_au["real_price"] = df_au["price"] * df_au["cpi_cumulative_rate"]

## The price of gold adjusted for inflation across time

In [None]:
# Get 1-year moving average
df_au["real_price_1y_ma"] = df_au["real_price"].rolling(window=252).mean()

In [None]:
plt.figure(figsize=(14, 8))

sns.lineplot(data=df_au, x=df_au.index, y="real_price", label="Price", color="yellow", linewidth=0.25)
sns.lineplot(data=df_au, x=df_au.index, y="real_price_1y_ma", label="Moving average (1-year)", color="aqua", linewidth=0.75)

plt.title("Price of a troy ounce of gold across time in 2023 dollars")
plt.xlabel("Date")
plt.ylabel("Price")

plt.show()

In [None]:
df_au.loc[df_au["real_price"].idxmin()]

In [None]:
df_au.loc[df_au["real_price"].idxmax()]

In [None]:
# Peaks
peaks, _ = signal.find_peaks(df_au["real_price"], distance=800)
df_au.iloc[peaks].nlargest(10, "real_price").sort_values("date").transpose()

In [None]:
# Valleys
valleys, _ = signal.find_peaks(-df_au["real_price"], distance=1000)
df_au.iloc[valleys].nsmallest(10, "real_price").sort_values("date").transpose()

- huge valley 80s-10s
- well is it a good inflation hedge?
- the all time high is different now. instead of being on 2024-04-12.

### let's look at price and real price in the same chart

In [None]:
plt.figure(figsize=(14, 8))

sns.lineplot(data=df_au, x=df_au.index, y="real_price", label="Price adjusted for inflation", color="yellow", linewidth=0.4)
sns.lineplot(data=df_au, x=df_au.index, y="price", label="Price", color="violet", linewidth=0.4)

plt.title("Price of a troy ounce of gold across time")
plt.xlabel("Date")
plt.ylabel("Price")

plt.show()

### Time series STL decomposition to observe trend, seasonality, and residuals

In [None]:
stl = STL(df_au["real_price"], period=252).fit()

In [None]:
fig, axes = plt.subplots(4, 1, figsize=(14, 8), sharex=True)

axes[0].plot(df_au.index, df_au["real_price"], label="Original", color="yellow", linewidth=0.5)
axes[0].set_title("Price of a troy ounce of gold across time in 2023 dollars")

axes[1].plot(df_au.index, stl.trend, label="Trend", color="aqua", linewidth=1)
axes[1].set_title("Trend component")

axes[2].plot(df_au.index, stl.seasonal, label="Seasonal", color="fuchsia", linewidth=0.5)
axes[2].set_title("Seasonal component")

axes[3].plot(df_au.index, stl.resid, label="Residual", color="red", linewidth=0.5)
axes[3].set_title("Residual component")

# plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(14, 8))

plt.plot(stl.trend, color="aqua", linewidth=1)

plt.title("Trend component of the real price of gold across time")
plt.xlabel("Date")
plt.ylabel("Trend")

plt.show()

In [None]:
# get the value of peaks
# 

- valley is more clear, judging by trend. the 2010s peak is actually higher than 80s peak (interesting...).

In [None]:
# check if this is worth it... (work with avgs)

seasonal = stl.seasonal
specific_year = 2008
seasonal_year = seasonal[seasonal.index.year == specific_year]

plt.figure(figsize=(14, 8))

plt.plot(seasonal_year.index, seasonal_year, color="fuchsia", linewidth=0.75)

plt.title(f"Seasonal component of the real price of gold for the year {specific_year}")
plt.xlabel("Date")
plt.ylabel("Seasonality")

plt.show()

- conclusion ...

 No need to check anything related to volatility or short-term price analysis.