# Comparison of gold and silver prices

## Setup

In [None]:
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
# Get gold and silver dfs
df_au = pd.read_csv("../data/AU.csv")
df_ag = pd.read_csv("../data/AG.csv")

In [None]:
# Set date as index for both dfs
for df in df_au, df_ag:
    df["date"] = pd.to_datetime(df["date"])
    df.set_index("date", inplace=True)

In [None]:
# Set charts theme
sns.set_theme(style="darkgrid", rc={"grid.alpha": 0.33})
plt.style.use("dark_background")

## Preparing data into a combined df

In [None]:
# Get 1-year volatility of gold and silver
for df, name in [(df_au, "au"), (df_ag, "ag")]:
    df["price_change"] = df["price"].pct_change()
    df[f"{name}_volatility"] = df["price_change"].rolling(window=252).std()
    df.drop(columns=["price_change"], inplace=True)

In [None]:
# Rename dfs cols
df_au.rename(columns={"price": "au_price"}, inplace=True)
df_ag.rename(columns={"price": "ag_price"}, inplace=True)

In [None]:
# Combine them in one df
df_combined = df_au.join(df_ag, how="inner")

## Comparison

### Prices

In [None]:
# Add decade col
df_combined["decade"] = df_combined.index.year // 10 * 10

In [None]:
plt.figure(figsize=(14, 8))
custom_palette = ["yellow", "purple", "lightsteelblue", "darkorange", "limegreen", "darkred", "teal"]

sns.scatterplot(data=df_combined, x="au_price", y="ag_price", hue="decade", palette=custom_palette, alpha=0.7, linewidth=0.2)

plt.title("Price of gold vs price of silver")
plt.xlabel("Gold price")
plt.ylabel("Silver price")
plt.legend(title="Decade")

plt.show()

- As we've seen before the prices of gold and silver have a stong corelation of 0.9.
- We can see that in the 70s, 80s and 2000s there where some times that the price of silver would rise more than the price of gold.
- Although, overall the price of gold rose more than silver. But let's compare the prices.

In [None]:
fig, ax1 = plt.subplots(figsize=(14, 8))
ax2 = ax1.twinx()

au_color = "yellow"
ag_color = "lightgrey"

sns.lineplot(data=df_combined, x=df_combined.index, y="au_price", ax=ax1, color=au_color, linewidth=0.4)
sns.lineplot(data=df_combined, x=df_combined.index, y="ag_price", ax=ax2, color=ag_color, linewidth=0.4)

# Compress the y axis to see early price fluctuations
ax1.set_yscale("log")
ax2.set_yscale("log")

plt.title("Prices of a troy ounce of gold and silver across time")
ax1.set_xlabel("Date")
ax1.set_ylabel("Gold price")
ax2.set_ylabel("Silver price")

# Manually create legend handles with a larger linewidth to be able to see it
legend_handles = [Line2D([0], [0], color=au_color, lw=2, label="Gold"),
                  Line2D([0], [0], color=ag_color, lw=2, label="Silver")]
ax1.legend(handles=legend_handles)

plt.show()

- during 60s and 70s the prices would be close, but then silver started loosing momentum mid 80s.
- appart during the 90s and especially since 2010s
- we need to analyse the ratio

#### Price ratio

In [None]:
# Get gold-silver ratio col and its 1-year moving average
df_combined["price_ratio"] = df_combined["au_price"] / df_combined["ag_price"]
df_combined["price_ratio_1y_ma"] = df_combined["price_ratio"].rolling(window=252).mean()

In [None]:
plt.figure(figsize=(14, 8))

sns.lineplot(data=df_combined, x=df_combined.index, y="price_ratio", label="Price ratio", color="tan", linewidth=0.25)
sns.lineplot(data=df_combined, x=df_combined.index, y="price_ratio_1y_ma", label="Moving average (1-year)", color="aqua", linewidth=0.75)

plt.title("Ratio of the price of gold and silver across time")
plt.xlabel("Date")
plt.ylabel("Ratio")

plt.show()

In [None]:
# Get average ratio per decade
df_combined_dec = df_combined.groupby("decade")
df_combined_dec = df_combined_dec["price_ratio"].mean().round(2).reset_index()
df_combined_dec.columns = ["decade", "average_price_ratio"]
df_combined_dec.set_index("decade").transpose()

- before the mid 80s the ratio was the lowest
- during the 80s, silver started losing terrain to gold, peaking in the early 90s.
- during the 90s, silver sort of recovered, but then went sideways
- during 2010s the ratio rised again
- this decade the ratio is high.

### Price change year over year

In [None]:
# Get year over year returns
# Get yearly prices df with first and last prices
df_combined_yearly = df_combined.groupby(df_combined.index.year).agg(
    au_first_price=pd.NamedAgg(column="au_price", aggfunc="first"),
    au_last_price=pd.NamedAgg(column="au_price", aggfunc="last"),
    ag_first_price=pd.NamedAgg(column="ag_price", aggfunc="first"),
    ag_last_price=pd.NamedAgg(column="ag_price", aggfunc="last")
)
# Get year over year return
df_combined_yearly["au_price_change"] = (df_combined_yearly["au_last_price"] - df_combined_yearly["au_first_price"]) / df_combined_yearly["au_first_price"]
df_combined_yearly["ag_price_change"] = (df_combined_yearly["ag_last_price"] - df_combined_yearly["ag_first_price"]) / df_combined_yearly["ag_first_price"]

In [None]:
# Get year col for id_vars (it must be a col)
df_combined_yearly["year"] = df_combined_yearly.index
# Reshape the df to a long format (with metal price changes cols as rows) to make it suited for a bar chart
df_long = df_combined_yearly.melt(id_vars="year", value_vars=["au_price_change", "ag_price_change"], var_name="metal", value_name="price_change")
df_long['metal'] = df_long['metal'].map({"au_price_change": "Gold", "ag_price_change": "Silver"})
df_long.set_index("year", inplace=True)

In [None]:
plt.figure(figsize=(14, 8))

ax = sns.barplot(data=df_long, x="year", y="price_change", hue="metal", palette={"Gold": "gold", "Silver": "silver"})

# Compress the y axis to see smaller bars
plt.yscale("symlog", linthresh=0.75)

plt.title("Year over year return of gold and silver across time")
plt.xlabel("Year")
plt.ylabel("Return")
plt.legend(title="Metal")

# Show only the beginning of the decade on the x-axis
ax = plt.gca()
for index, label in enumerate(ax.get_xticklabels()):
    if index % 10 - 2 != 0:
        label.set_visible(False)

plt.show()

- We can see that the absolute return is usually higher on silver, especially since the 80s, probably due to higher volatility
- Let's check the difference

#### Price change year over year difference

In [None]:
# Get gold price change yoy advantage over silver's
df_combined_yearly["au_price_change_adv"] = df_combined_yearly["au_price_change"] - df_combined_yearly["ag_price_change"]
# Get the metal which has better performance
df_combined_yearly["best_metal"] = df_combined_yearly["au_price_change_adv"].apply(lambda x: "Gold" if x > 0 else "Silver")
# Make values absoulte
df_combined_yearly["au_price_change_adv"] = df_combined_yearly["au_price_change_adv"].abs()

In [None]:
plt.figure(figsize=(14, 8))

sns.barplot(data=df_combined_yearly, x=df_combined_yearly.index, y="au_price_change_adv", hue="best_metal", palette={"Gold": "gold", "Silver": "silver"})

# Compress the y axis to see smaller bars
plt.yscale("symlog", linthresh=0.2)
#plt.yscale("log")
           
plt.title("Percentage points advantage of each metal in year over year return across time")
plt.xlabel("Year")
plt.ylabel("Return difference")
plt.legend(title="Best performing metal")

# Show only beginning of decade
ax = plt.gca()
for index, label in enumerate(ax.get_xticklabels()):
    if index % 10 - 2 != 0:
        label.set_visible(False)

plt.show()

- as we have seen, silver yoy return is higher than gold's

### Yearly volatility

In [None]:
plt.figure(figsize=(14, 8))

sns.lineplot(data=df_combined, x=df_combined.index, y="au_volatility", label="Gold volatility", color="yellow", linewidth=0.75)
sns.lineplot(data=df_combined, x=df_combined.index, y="ag_volatility", label="Silver volatility", color="lightgrey", linewidth=0.75)

plt.title("Yearly volatility of the price of gold and silver across time")
plt.xlabel("Date")
plt.ylabel("Volatility")

plt.show()

#### Yearly volatility ratio

In [None]:
# Get gold-silver yearly volatility ratio col
df_combined["volatility_ratio"] = df_combined["au_volatility"] / df_combined["ag_volatility"]

In [None]:
plt.figure(figsize=(14, 8))

sns.lineplot(data=df_combined, x=df_combined.index, y="volatility_ratio", label="Ratio", color="red", linewidth=0.75)
plt.axhline(y=1, label="Equal volatility", color="gold", linewidth=2)

plt.title("Ratio of the yearly volatility of the price of gold and silver across time")
plt.xlabel("Date")
plt.ylabel("Ratio")
plt.legend()

plt.show()

In [None]:
# Get the percentage of time in which ratio was at least 1
(df_combined[df_combined["volatility_ratio"] >= 1]["volatility_ratio"].count() / df_combined["volatility_ratio"].count() * 100).round(2)

- it was very rare when gold was more volatile than silver. less than 6% o the time
- only happened durning the 70s and early 2000s.