# Analysis of the historical price of a troy ounce of gold

## Setup

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import signal
from statsmodels.tsa.seasonal import STL

In [None]:
# Get gold price df
df_au = pd.read_csv("../data/AU.csv")

In [None]:
# Set date as index of the df, for more efficiency and simplicity
df_au["date"] = pd.to_datetime(df_au["date"])
df_au.set_index("date", inplace=True)

In [None]:
# Set charts theme
sns.set_theme(style="darkgrid", rc={"grid.alpha": 0.33})
plt.style.use("dark_background")

## Dataset basic info

In [None]:
df_au

In [None]:
df_au.describe()

In [None]:
# check actual year period. 252 days?

## Price of gold across time (long-term)

In [None]:
# Get 1-year moving average (252 trading days per year)
df_au["price_1y_ma"] = df_au["price"].rolling(window=252).mean()

In [None]:
plt.figure(figsize=(14, 8))

sns.lineplot(data=df_au, x=df_au.index, y="price", label="Price", color="yellow", linewidth=0.25)
sns.lineplot(data=df_au, x=df_au.index, y="price_1y_ma", label="Moving average (1-year)", color="aqua", linewidth=0.75)

plt.title("Price of a troy ounce of gold across time")
plt.xlabel("Date")
plt.ylabel("Price")

plt.show()

it's more easy to see with log x ...

In [None]:
plt.figure(figsize=(14, 8))

sns.lineplot(data=df_au, x=df_au.index, y="price", label="Price", color="yellow", linewidth=0.25)
sns.lineplot(data=df_au, x=df_au.index, y="price_1y_ma", label="Moving average (1-year)", color="aqua", linewidth=0.75)

# Gotta compress the axis to see early price fluctuations
plt.yscale("log")

plt.title("Price of a troy ounce of gold across time")
plt.xlabel("Date")
plt.ylabel("Price")

plt.show()

In [None]:
# All time low (of the dataset)
df_au.loc[df_au['price'].idxmin()]

In [None]:
# All time high
df_au.loc[df_au['price'].idxmax()]

In [None]:
# Peaks
peaks, _ = signal.find_peaks(df_au["price"], distance=1000)
df_au.iloc[peaks].nlargest(12, "price").sort_values("date")[["price"]].transpose()

In [None]:
# Valleys
valleys, _ = signal.find_peaks(-df_au["price"], distance=1000)
df_au.iloc[valleys].nsmallest(12, "price").sort_values("date")[["price"]].transpose()

In [None]:
# how much does the price change yearly on avg? make table
#...

**Main info**
- The price rose rapidly since the early 70s, following the collapse of the Bretton Woods system in 1973.
- Peaked in 1980.
- Declined and stabilized until the early 2000s.
- Rose consistently until the early 2010s.
- Stabilized up until 2019.
- Started rising again in 2019.

### Volatility

In [None]:
# Get daily price change
df_au["price_change"] = df_au["price"].pct_change()

In [None]:
# Get 1-year moving standard deviation
df_au["volatility_1y"] = df_au["price_change"].rolling(window=252).std()
df_au.dropna(subset=["volatility_1y"], inplace=True)

In [None]:
plt.figure(figsize=(14, 8))

sns.lineplot(data=df_au,x=df_au.index,y="volatility_1y", color="red", linewidth=0.75)

plt.title("Yearly volatility of the price of gold across time")
plt.xlabel("Date")
plt.ylabel("Volatility")

plt.show()

In [None]:
# Biggest price changes
df_au.loc[df_au["price_change"].abs().sort_values(ascending=False).head(10).index, ["price_change"]].transpose()

In [None]:
# First time volatility 1,5%
df_au[df_au["volatility_1y"] > 0.015].iloc[0]

In [None]:
# First time volatility above 1,5% since mid 80s
df_au.query("volatility_1y > 0.02").loc["1985-01-02":].iloc[0]

In [None]:
# get volatility yearly on avg?
#...

Biggest 5 price changes were during the beginning of 1980.
very volatile from mid 70s and peaking on early 80s; dotcom bubble; great recession and EU sovereign debt crisis; right now is not volatile.
very stable during the 90s, and also now without counting  
right at the end of collapse of the Bretton Woods system (1973) the volatility rose a lot.

### STL decomposition (trend, seasonality, and residuals)

In [None]:
stl = STL(df_au["price"], period=252).fit()

In [None]:
fig, axes = plt.subplots(4, 1, figsize=(14, 8), sharex=True)

axes[0].plot(df_au.index, df_au["price"], label="Original", color="yellow", linewidth=0.5)
axes[0].set_title("Price of a troy ounce of gold across time")

axes[1].plot(df_au.index, stl.trend, label="Trend", color="aqua", linewidth=1)
axes[1].set_title("Trend component")

axes[2].plot(df_au.index, stl.seasonal, label="Seasonal", color="fuchsia", linewidth=0.5)
axes[2].set_title("Seasonal component")

axes[3].plot(df_au.index, stl.resid, label="Residual", color="orangered", linewidth=0.5)
axes[3].set_title("Residual component")

# plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(14, 8))

plt.plot(stl.trend, color="aqua", linewidth=1)

plt.title("Trend component of the price of gold across time")
plt.xlabel("Date")
plt.ylabel("Trend")

plt.yscale("log")

plt.show()

In [None]:
# try an average. 70s 80s ... all?
seasonal = stl.seasonal
specific_year = 2008
seasonal_year = seasonal[seasonal.index.year == specific_year]
#monthly_means = seasonal.groupby(seasonal.index.month).mean()

# Convert the monthly means into a pandas Series with month names for readability
#monthly_means.index = pd.to_datetime(monthly_means.index, format='%m').strftime('%B')
plt.figure(figsize=(14, 8))

plt.plot(seasonal_year.index, seasonal_year, color="fuchsia", linewidth=0.75)

plt.title(f"Seasonal component of price of gold for the year {specific_year}")
plt.xlabel("Date")
plt.ylabel("Seasonality")

plt.show()

- Some analysts say that in mid-November gold enters a very good seasonal phase, which lasts until the second half of February.
- Gotta work on this...

## Price of gold year-to-date (short-term)

In [None]:
# Get YTD df
df_au_ytd = df_au["2024":]

In [None]:
plt.figure(figsize=(14, 8))

sns.lineplot(data=df_au_ytd, x=df_au_ytd.index, y="price", color="yellow", linewidth=1)

plt.title("Price of a troy ounce of gold since 2024")
plt.xlabel("Date")
plt.ylabel("Price")

plt.show()

In [None]:
df_au[df_au["price"] > 2070].iloc[0]

In [None]:
df_au_ytd.loc[df_au_ytd["price"].idxmax()]

In [None]:
# get avg of jan-mar and apr-may
#

- Price broke out on early march,
- peaking on mid april,
- and recently rising again.

### Volatility

In [None]:
# Get 1-week moving standard deviation (5 trading days per week)
df_au_ytd["volatility_1w"] = df_au_ytd["price_change"].rolling(window=5).std()
df_au_ytd.dropna(subset=["volatility_1w"], inplace=True)

In [None]:
plt.figure(figsize=(14, 8))

sns.lineplot(data=df_au_ytd, x=df_au_ytd.index, y="volatility_1w", color="red", linewidth=1)

plt.title("Weekly volatility of the price of gold across time")
plt.xlabel("Date")
plt.ylabel("Volatility")

plt.show()

In [None]:
# Breakout point
df_au_ytd[df_au_ytd["volatility_1w"] > 0.0125].iloc[0]

In [None]:
# get avg vol jan-mar and apr-may
#

- the avg was ...
- then broke out in march 21st
- and avg became ... since.