# Analysis of the historical price of a troy ounce of gold

## Setup

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import signal
from statsmodels.tsa.seasonal import STL

In [None]:
# Get gold price df
df_au = pd.read_csv("../data/AU.csv")

In [None]:
# Set date as index of the df, for more efficiency and simplicity
df_au["date"] = pd.to_datetime(df_au["date"])
df_au.set_index("date", inplace=True)

In [None]:
# Set charts theme
sns.set_theme(style="darkgrid", rc={"grid.alpha": 0.33})
plt.style.use("dark_background")

In [None]:
# Save chart as png function
def save_chart_as_png(filename: str) -> None:
    plt.savefig(
        f"../images/{filename}.png",
        format="png",
        dpi=300,
        orientation="landscape",
        bbox_inches="tight",
    )

## Dataset basic info

In [None]:
# First and last entries
pd.concat([df_au.head(1), df_au.tail(1)]).T

In [None]:
df_au.describe().T

In [None]:
# How many trading days per year on average
days_per_year = df_au[df_au.index.year != 2024].index.year.value_counts()
days_per_year.mean().round(2)

## Price of gold across time (long-term analysis)

In [None]:
# Get 1-year moving average (252 trading days per year)
df_au["price_1y_ma"] = df_au["price"].rolling(window=252).mean()

In [None]:
plt.figure(figsize=(10, 6))

sns.lineplot(data=df_au, x=df_au.index, y="price", color="yellow", linewidth=0.25)
sns.lineplot(data=df_au, x=df_au.index, y="price_1y_ma", label="Moving average (1-year)", color="aqua", linewidth=0.75)

plt.title("Price of a troy ounce of gold across time")
plt.xlabel("")
plt.ylabel("")

#save_chart_as_png("1.1_AU_price")
plt.show()

**It's easier to see the early price fluctuations with a logarithmic scale on the y-axis**

In [None]:
plt.figure(figsize=(10, 6))

sns.lineplot(data=df_au, x=df_au.index, y="price", color="yellow", linewidth=0.25)
sns.lineplot(data=df_au, x=df_au.index, y="price_1y_ma", label="Moving average (1-year)", color="aqua", linewidth=0.75)

# Compress the y axis to see early price fluctuations
plt.yscale("log")

plt.title("Price of a troy ounce of gold across time")
plt.xlabel("")
plt.ylabel("")

#save_chart_as_png("test2")
plt.show()

In [None]:
# All-time high
ath_date = df_au["price"].idxmax()
df_au.loc[[ath_date]]

In [None]:
# All-time low
atl_date = df_au["price"].idxmin()
df_au.loc[[atl_date]]

In [None]:
# Peaks
peaks, _ = signal.find_peaks(df_au["price"], distance=1000)
df_au.iloc[peaks].nlargest(12, "price").sort_values("date")[["price"]].T

In [None]:
# Valleys
valleys, _ = signal.find_peaks(-df_au["price"], distance=1000)
df_au.iloc[valleys].nsmallest(12, "price").sort_values("date")[["price"]].T

In [None]:
# Price appreciation since first entry
first_entry_price = df_au.iloc[0]["price"]
last_entry_price = df_au.iloc[-1]["price"]
(last_entry_price - first_entry_price) / first_entry_price

**Key takeaways:**
- The all-time low occurred in 1970, with the price at approximately \$35.
- Prices surged rapidly from the early 1970s, following the collapse of the Bretton Woods system in 1973.
- Reached a peak in 1980 at around \$850.
- Experienced a decline over the next 20 years.
- Prices rose consistently until the early 2010s, peaking in 2011 at about \$1,500.
- Declined slightly until 2019, then started rising again.
- An all-time high was achieved in April 2024, with prices nearing \$2,400.
- The price of gold has appreciated by approximately 6,670% since the first entry.

### Price change year-over-year

In [None]:
# Get YoY returns
# Get yearly gold price df with first and last prices
df_au_yearly = df_au.groupby(df_au.index.year)["price"].agg(
    first_price="first",
    last_price="last"
)
# Get YoY return
df_au_yearly["price_change"] = (df_au_yearly["last_price"] - df_au_yearly["first_price"]) / df_au_yearly["first_price"]

In [None]:
plt.figure(figsize=(10, 6))

sns.barplot(data=df_au_yearly, x=df_au_yearly.index, y="price_change", color="gold")

plt.title("Year-over-year return of gold across time")
plt.xlabel("")
plt.ylabel("")

# Show only beginning of decade
ax = plt.gca()
for index, label in enumerate(ax.get_xticklabels()):
    if index % 10 - 2 != 0:
        label.set_visible(False)

plt.show()

In [None]:
# Highest YoY return
df_au_yearly.loc[[df_au_yearly["price_change"].idxmax()]]

In [None]:
# Lowest YoY return
df_au_yearly.loc[[df_au_yearly["price_change"].idxmin()]]

In [None]:
# Average YoY return
df_au_yearly["price_change"].mean().round(4)

In [None]:
# Median YoY return
df_au_yearly["price_change"].median().round(4)

In [None]:
# Standard deviation YoY return
df_au_yearly["price_change"].std().round(4)

In [None]:
# Cumulative product YoY return
# This metric only differs from the "price appreciation since first entry" due to fluctuations between the price of last day of each year and the price of first day of the next year.
(1 + df_au_yearly["price_change"]).cumprod().iloc[-1].round(4) - 1

In [None]:
# Get max, min, average, median, standard deviation and cumulative product YoY return per decade
df_au_yearly["decade"] = (df_au_yearly.index // 10) * 10
df_au_yearly.groupby("decade").agg(
    max_yoy_price_change=("price_change", lambda x: x.max().round(4)),
    min_yoy_price_change=("price_change", lambda x: x.min().round(4)),
    avg_yoy_price_change=("price_change", lambda x: x.mean().round(4)),
    median_yoy_price_change=("price_change", lambda x: x.median().round(4)),
    std_yoy_price_change=("price_change", lambda x: x.std().round(4)),
    cumprod_yoy_price_change=("price_change", lambda x: (1 + x).cumprod().iloc[-1].round(4) - 1),
    dod_price_change=("decade", lambda x: (df_au_yearly.loc[x.index, "last_price"].iloc[-1] - df_au_yearly.loc[x.index, "first_price"].iloc[0]) / df_au_yearly.loc[x.index, "first_price"].iloc[0])
).T

**Key takeaways:**
- In 1979, gold achieved a remarkable year-over-year return of 130%.
- Two years later, gold experienced its lowest return, dropping approximately 32.4%.
- The average annual return for gold has been around 9.8%, with a median return of about 5.5%.
- The 1970s were the most lucrative decade for gold, boasting an average annual return of 36%.
- During the 1980s and 1990s, gold's performance lagged, with an average annual return of around -2%.
- The 2000s delivered strong performance, with an average annual return of 15% and a median return of 21%.
- The current decade has shown promising results so far, with an average annual return of 9.2% and a median return of 12.4%.

### Yearly volatility

In [None]:
# Get daily price change
df_au["price_change"] = df_au["price"].pct_change()

In [None]:
# Biggest price changes
df_au.loc[df_au["price_change"].abs().sort_values(ascending=False).head(10).index, ["price_change"]].T

In [None]:
# Get 1-year moving standard deviation
df_au["volatility_1y"] = df_au["price_change"].rolling(window=252).std()

In [None]:
plt.figure(figsize=(10, 6))

sns.lineplot(data=df_au, x=df_au.index, y="volatility_1y", color="red", linewidth=0.75)

plt.title("Yearly volatility of the price of gold across time")
plt.xlabel("")
plt.ylabel("")

plt.show()

In [None]:
# All-time high
ath_date = df_au["volatility_1y"].idxmax()
df_au.loc[[ath_date], ["price", "volatility_1y"]]

In [None]:
# All-time low
atl_date = df_au["volatility_1y"].idxmin()
df_au.loc[[atl_date], ["price", "volatility_1y"]]

In [None]:
# Top 5 peaks
peaks, _ = signal.find_peaks(df_au["volatility_1y"], distance=500)
df_au.iloc[peaks].nlargest(5, "volatility_1y").sort_values("date")[["volatility_1y"]].T

In [None]:
# Top 5 valleys
valleys, _ = signal.find_peaks(-df_au["volatility_1y"], distance=500)
df_au.iloc[valleys].nsmallest(5, "volatility_1y").sort_values("date")[["volatility_1y"]].T

In [None]:
# Average 1-year volatility
df_au["volatility_1y"].mean().round(3)

In [None]:
# Average 1-year volatility per decade
df_au_dec = df_au.groupby((df_au.index.year // 10) * 10)
df_au_dec = df_au_dec["volatility_1y"].mean().round(3).reset_index()
df_au_dec.columns = ["decade", "average_volatility_1y"]
df_au_dec.set_index("decade").T

**Key takeaways:**
- The largest five daily price changes occurred at the beginning of 1980.
- The average yearly volatility is 1.1%.
- The all-time low occurred in 1970, whereas the all-time high was observed a decade later, with yearly volatility almost tenfold higher.
- Volatility significantly increased following the collapse of the Bretton Woods system in 1973.
- The market was highly volatile from the mid-1970s through the early 1980s, and during the dotcom bubble, the Great Recession, and the EU sovereign debt crisis.
- The market remained very stable during the 1990s, and also during the current decade excluding the pandemic period.

### STL decomposition (trend, seasonality, and residuals)

In [None]:
stl = STL(df_au["price"], period=252).fit()

In [None]:
fig, axes = plt.subplots(4, 1, figsize=(10, 6), sharex=True)

axes[0].plot(df_au.index, df_au["price"], label="Original", color="yellow", linewidth=0.5)
axes[0].set_title("Price of a troy ounce of gold across time")

axes[1].plot(df_au.index, stl.trend, label="Trend", color="aqua", linewidth=1)
axes[1].set_title("Trend component")

axes[2].plot(df_au.index, stl.seasonal, label="Seasonal", color="fuchsia", linewidth=0.5)
axes[2].set_title("Seasonal component")

axes[3].plot(df_au.index, stl.resid, label="Residual", color="orangered", linewidth=0.5)
axes[3].set_title("Residual component")

plt.tight_layout()
plt.show()

#### Trend analysis

In [None]:
plt.figure(figsize=(10, 6))

plt.plot(stl.trend, color="aqua", linewidth=1)

plt.title("Trend component of the price of gold across time")
plt.xlabel("")
plt.ylabel("Trend")

plt.yscale("log")

plt.show()

**Key takeaways:**
- The trend experienced significant growth during the 1970s.
- There was a slow decline over the following 20 years.
- Since then, it has been rising with notable consistency.

#### Seasonality analysis

In [None]:
seasonal = stl.seasonal
monthly_avgs = seasonal.groupby(seasonal.index.month).mean()

plt.figure(figsize=(10, 6))

plt.plot(monthly_avgs.index, monthly_avgs.values, marker='o', color="fuchsia", linewidth=1)

plt.title("Average of the seasonal component of price of gold over the year")
plt.xlabel("Month")
plt.ylabel("Seasonality")

plt.show()

**Key takeaways:**
- Seasonal patterns exhibit variations over the years.
- Based on monthly averages, it reveals favorable seasons during the first four months of the year and in September.
- Some analysts suggest that gold experiences a strong seasonal phase starting in mid-November, extending until the second half of February.

## Price of gold year-to-date (short-term analysis)

In [None]:
# Get YTD df
df_au_ytd = df_au["2024":].copy()

In [None]:
plt.figure(figsize=(10, 6))

sns.lineplot(data=df_au_ytd, x=df_au_ytd.index, y="price", color="yellow", linewidth=1)

plt.xticks(fontsize=8)

plt.title("Price of a troy ounce of gold since 2024")
plt.xlabel("")
plt.ylabel("")

plt.show()

In [None]:
# Price change YTD
first_price = df_au_ytd.iloc[0]["price"]
last_price = df_au_ytd.iloc[-1]["price"]
(last_price - first_price) / first_price

In [None]:
# When the price quickly began to rise
rise_start = df_au_ytd.loc[df_au_ytd["price"].diff() > 20].index[0]
df_au_ytd.loc[[rise_start], ["price", "price_change"]].round(2)

In [None]:
# When the price peaked (which is also the ATH)
peak_date = df_au_ytd["price"].idxmax()
df_au_ytd.loc[[peak_date], ["price", "price_change"]].round(2)

In [None]:
# Average daily price change during the rise
df_au_ytd.loc[rise_start:peak_date]["price_change"].mean().round(5)

In [None]:
# Average price before the rise
avg_price_before = df_au_ytd.loc[:rise_start]["price"].mean().round(0)
avg_price_before

In [None]:
# Average price after the peak
avg_price_after = df_au_ytd.loc[peak_date:]["price"].mean().round(0)
avg_price_after

In [None]:
# Difference between the average prices
((avg_price_after - avg_price_before) / avg_price_before).round(3)

**Key takeaways:**
- The year-to-date price change stands at a notable 17.4%, showcasing strong market movement.
- Before the breakout, the average price hovered around \$2,032.
- The breakout occurred in early March.
- Post-breakout, the price grew quickly, averaging a daily increase of approximately 0.5%.
- By mid-April, the price peaked at \$2,400.
- After reaching all-time high in mid-April, the average price has since stabilized around \$2,343, suggesting a period of consolidation.

### Weekly volatility

In [None]:
# Get 1-week moving standard deviation (5 trading days per week)
df_au_ytd["volatility_1w"] = df_au_ytd["price_change"].rolling(window=5).std()

In [None]:
plt.figure(figsize=(10, 6))

sns.lineplot(data=df_au_ytd, x=df_au_ytd.index, y="volatility_1w", color="red", linewidth=1)

plt.xticks(fontsize=8)

plt.title("Weekly volatility of the price of gold across time")
plt.xlabel("")
plt.ylabel("")

plt.show()

In [None]:
# Breakout point
breakout_date = df_au_ytd[df_au_ytd["volatility_1w"] > 0.0125].index[0]
df_au_ytd.loc[[breakout_date], ["price", "price_change", "volatility_1w"]].round(2)

In [None]:
# Average volatility before breakout
avg_vol_before = df_au_ytd.loc[:breakout_date - pd.Timedelta(days=1)]["volatility_1w"].mean().round(4)
avg_vol_before

In [None]:
# Average volatility after breakout
avg_vol_after = df_au_ytd.loc[breakout_date:]["volatility_1w"].mean().round(4)
avg_vol_after

In [None]:
# Percent change between the average volatilities
((avg_vol_after - avg_vol_before) / avg_vol_before).round(3)

**Key takeaways:**
- In the beginning, weekly volatility averaged at 0.6%, indicating relative stability.
- A breakout occurred on March 21st, dramatically altering the landscape.
- Following the breakout, the average volatility surged to 1.3%, more than doubling its previous level.