# Analysis of the historical price of a troy ounce of gold

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import signal
from statsmodels.tsa.seasonal import STL

In [None]:
# Set charts theme
sns.set_theme(style="darkgrid", rc={"grid.alpha": 0.33})
plt.style.use("dark_background")

In [None]:
# Get gold price df
df_au = pd.read_csv("../data/AU.csv")

In [None]:
# Set date as index of the df, for more efficiency and simplicity
df_au["date"] = pd.to_datetime(df_au["date"])
df_au.set_index("date", inplace=True)

In [None]:
df_au.info()

In [None]:
df_au.describe()

## The price of gold across time

In [None]:
# 1-year moving average (252 trading days per year)
df_au["price_1y_ma"] = df_au["price"].rolling(window=252).mean()

In [None]:
plt.figure(figsize=(14, 7))

sns.lineplot(data=df_au,x=df_au.index,y="price", label="Price", color="gold", linewidth=0.25)
sns.lineplot(data=df_au,x=df_au.index,y="price_1y_ma", label="Moving average (1-year)", color="cyan", linewidth=0.75)

plt.title("Price of a troy ounce of gold across time")
plt.xlabel("Date")
plt.ylabel("Price per troy ounce")

# Gotta compress the axis to see early price changes
plt.yscale("log")

plt.tight_layout()
plt.show()

In [None]:
df_au.loc[df_au["price"].idxmin()]

In [None]:
df_au.loc[df_au["price"].idxmax()]

In [None]:
# need to get the three peaks
peaks, _ = signal.find_peaks(df_au["price"], distance=1000)
df_au.iloc[peaks].nlargest(5, "price")
# When broke all time high?

- The price rose rapidly since the early 70s, following the collapse of the Bretton Woods system in 1973.
- Peaked in 1980.
- Declined and stabilized until the early 2000s.
- Rose consistently until the early 2010s.
- Stabilized up until 2019.
- Started rising again in 2019.

In [None]:
df_au["price_change"] = df_au["price"].pct_change()

What were the top 10 days with the highest price change

In [None]:
df_au.loc[df_au["price_change"].abs().sort_values(ascending=False).head(10).index, ["price_change"]]

Biggest 5 price changes were during the beginning of 1980.

In [None]:
# 1-year moving standard deviation (252 trading days per year)
df_au["volatility_1y"] = df_au["price_change"].rolling(window=252).std()
df_au.dropna(subset=["volatility_1y"], inplace=True)

In [None]:
plt.figure(figsize=(14, 7))

sns.lineplot(data=df_au,x=df_au.index,y="volatility_1y", color="gold", linewidth=0.75)

plt.title("Yearly volatility of the price of gold across time")
plt.xlabel("Date")
plt.ylabel("Volatility")

plt.tight_layout()
plt.show()

In [None]:
# First time volatility 1,5%
df_au[df_au["volatility_1y"] > 0.015].iloc[0]

In [None]:
# First time volatility above 1,5% since mid 80s
df_au.query("volatility_1y > 0.02").loc["1985-01-02":].iloc[0]

very volatile from mid 70s and peaking on early 80s; dotcom bubble; great recession and EU sovereign debt crisis; right now is not volatile.
very stable during the 90s, and also now without counting  
right at the end of collapse of the Bretton Woods system (1973) the volatility rose a lot.

### Short-term price analysis (year-to-date)

In [None]:
# Get gold YTD df
df_au_ytd = df_au["2024-01-01":].copy()

In [None]:
plt.figure(figsize=(14, 7))

sns.lineplot(data=df_au_ytd,x=df_au_ytd.index,y="price", color="gold", linewidth=1)

plt.title("Price of a troy ounce of gold across time")
plt.xlabel("Date")
plt.ylabel("Price")

plt.tight_layout()
plt.show()

In [None]:
df_au_ytd["2024-02-29":"2024-03-07"]

In [None]:
df_au_ytd.loc[df_au_ytd["price"].idxmax()]

In [None]:
# Price broke out on early march, peaking on mid april, and recently rising again.

In [None]:
# lets get weekly volatility YTD!

In [None]:
# 1-week moving standard deviation (5 trading days per week)
df_au_ytd["volatility_1w"] = df_au_ytd["price_change"].rolling(window=5).std()
df_au_ytd.dropna(subset=["volatility_1w"], inplace=True)

In [None]:
plt.figure(figsize=(14, 7))

sns.lineplot(data=df_au_ytd,x=df_au_ytd.index,y="volatility_1w", color="gold", linewidth=1)

plt.title("Weekly volatility of the price of gold across time")
plt.xlabel("Date")
plt.ylabel("Volatility")

plt.tight_layout()
plt.show()

In [None]:
# Breakout point
df_au_ytd[df_au_ytd["volatility_1w"] > 0.0125].iloc[0]

In [None]:
# weekly volatility was low, and suddenly increased on march 21st.

## Time series STL decomposition to observe trend, seasonality, and residuals

In [None]:
stl = STL(df_au["price"], period=252)
result = stl.fit()

fig, axes = plt.subplots(4, 1, figsize=(14, 8), sharex=True)

axes[0].plot(df_au.index, df_au["price"], label="Original", linewidth=0.5)
axes[0].set_title("Original price")

axes[1].plot(df_au.index, result.trend, label="Trend", color="C1", linewidth=1)
axes[1].set_title("Trend component")

axes[2].plot(df_au.index, result.seasonal, label="Seasonal", color="C2", linewidth=0.5)
axes[2].set_title("Seasonal component")

axes[3].plot(df_au.index, result.resid, label="Residual", color="C3", linewidth=0.5)
axes[3].set_title("Residual component")

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(18, 7))

plt.plot(result.trend, color="gold", linewidth=1)

plt.title("Trend")
plt.xlabel("Date")
plt.ylabel("Gold price trend")

plt.yscale("log")

plt.tight_layout()
plt.show()

In [None]:
seasonal = result.seasonal
specific_year = 2008
seasonal_year = seasonal[seasonal.index.year == specific_year]

plt.figure(figsize=(16, 7))

plt.plot(seasonal_year.index, seasonal_year, color="lightblue", linewidth=0.75)

plt.title(f"Seasonal component for the year {specific_year}")
plt.xlabel("Date")
plt.ylabel("Seasonality")

plt.tight_layout()
plt.show()

In [None]:
# Some analysts say that in mid-November gold enters a very good seasonal phase, which lasts until the second half of February.
# Gotta work on the seasonality of gold next...