In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
!pip install yfinance
import yfinance as yf
import seaborn as sns
import matplotlib.pyplot as plt

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# Load in ticker info
gold = yf.Ticker('GC=F')
silver = yf.Ticker('SI=F')
platinum = yf.Ticker('PL=F')
copper = yf.Ticker('HG=F')
palladium = yf.Ticker('PA=F')
crudeOil = yf.Ticker('CL=F')
heatingOil = yf.Ticker('HO=F')
natGas = yf.Ticker('NG=F')
rbobGas = yf.Ticker('RB=F')
brentCrude = yf.Ticker('BZ=F')
corn = yf.Ticker('ZC=F')
oat = yf.Ticker('ZO=F')
wheat = yf.Ticker('KE=F')
soyMeal = yf.Ticker('ZM=F')
soyOil = yf.Ticker('ZL=F')
soybean = yf.Ticker('ZS=F')
feederCattle = yf.Ticker('GF=F')
leanHogs = yf.Ticker('HE=F')
liveCattle = yf.Ticker('LE=F')
cocoa = yf.Ticker('CC=F')
coffee = yf.Ticker('KC=F')
cotton = yf.Ticker('CT=F')
lumber = yf.Ticker('LBS=F')
sugar = yf.Ticker('SB=F')

# Use ticker to look up price history
gold = gold.history(period="max")
silver = silver.history(period="max")
platinum = platinum.history(period="max")
copper = copper.history(period="max")
palladium = palladium.history(period="max")
crudeOil = crudeOil.history(period="max")
heatingOil = heatingOil.history(period="max")
natGas = natGas.history(period="max")
rbobGas = rbobGas.history(period="max")
brentCrude = brentCrude.history(period="max")
corn = corn.history(period="max")
oat = oat.history(period="max")
wheat = wheat.history(period="max")
soyMeal = soyMeal.history(period="max")
soyOil = soyOil.history(period="max")
soybean = soybean.history(period="max")
feederCattle = feederCattle.history(period="max")
leanHogs = leanHogs.history(period="max")
liveCattle = liveCattle.history(period="max")
cocoa = cocoa.history(period="max")
coffee = coffee.history(period="max")
cotton = cotton.history(period="max")
lumber = lumber.history(period="max")
sugar = sugar.history(period="max")

In [None]:
# Lists I use for manipulating the dataframe for each commodity
commodities = [gold, silver, platinum, copper, palladium, crudeOil, heatingOil, natGas, rbobGas, brentCrude, corn,
               oat, wheat, soyMeal, soyOil, soybean, feederCattle, leanHogs, liveCattle, cocoa, coffee, cotton, lumber, sugar]
commodityName = ['gold', 'silver', 'platinum', 'copper', 'palladium', 'crudeOil', 'heatingOil', 'natGas', 'rbobGas', 'brentCrude',
                 'corn', 'oat', 'wheat', 'soyMeal', 'soyOil', 'soybean', 'feederCattle', 'leanHogs', 'liveCattle',
                 'cocoa', 'coffee', 'cotton', 'lumber', 'sugar']

In [None]:
# Drop all columns except date and close price, rename close price to name of the commodity
col2drop = ['Open', 'High', 'Low', 'Volume', 'Dividends', 'Stock Splits']
count = 0

for i in commodities:
    i.drop(columns = col2drop, axis = 0, inplace = True)
    i.columns = [commodityName[count]]
    count += 1

In [None]:
# Join all data frames into one
df = pd.concat(commodities, join='outer', axis=1)

In [None]:
# Some prices are missing for things like holidays, use forward fill to make the missing values the same as the previous
df.ffill()
df.dropna()

In [None]:
correlations = df.corr()
plt.figure(figsize=(12, 10))
sns.heatmap(correlations)

As would be expected, commodities are highly correlated with other commodities from the same category. For example, the agriculture sector (corn, oat, wheat, soy products) makes a distinct lighter box on the graph. Three commodities that seem unusually uncorrelated with the rest of the market are natural gas, palladium, and lumber. If you were trying to diversify your portfolio it could be beneficial to include these three.

## Looking at correlations during market downturn

In [None]:
# Loading data from s&p 500 index to display periods of market downturns
market = yf.Ticker('^GSPC')
market = market.history(period = 'max')
sns.lineplot(data = market, x = 'Date', y = 'Close')

Big drop off in 2008/09 due to housing market crash, let's take a look at commodity correlations during that time.

In [None]:
# Filter to only use data between 2008 and 2009
df = df.loc['2008-01-01':'2009-12-31']

# Same treatment as before
df.ffill()
df.dropna()
correlations = df.corr()
plt.figure(figsize=(12, 10))
sns.heatmap(correlations)

This analysis uses a much shorter time period so there is less data to draw from, but it shows that the three commodities uncorrelated with the others were actually gold, sugar, and cocoa somewhat. The graph also appears to be lighter in most sections suggesting that during market crashes most commodities become more correlated with each other. This could be an issue if you invest in commodities for the diversification benefit to help protect your portfolio during market downturns.