In [None]:
!pip install yfinance


In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from scipy.stats import gmean

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
import gc

np.random.seed(42069)
warnings.simplefilter('ignore')
%matplotlib inline

In [None]:

params = {
    'figure.figsize': (10, 5)
     }
plt.rcParams.update(params)

In [None]:

# This will download data from yahoo finance - more than enough for prototyping
df = yf.download(
        # tickers list or string as well
        tickers = "MGC=F",
        # use "period" instead of start/end
        # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
        # (optional, default is '1mo')
        period = "5y",
        # fetch data by interval (including intraday if period < 60 days)
        # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
        # (optional, default is '1d')
        interval = "1d",
        # group by ticker (to access via data['SPY'])
        # (optional, default is 'column')
        group_by = 'ticker',
        # adjust all OHLC automatically
        # (optional, default is False)
        auto_adjust = True,
        # download pre/post regular market hours data
        # (optional, default is False)
        prepost = True,
        # use threads for mass downloading? (True/False/Integer)
        # (optional, default is True)
        threads = True,
        # proxy URL scheme use use when downloading?
        # (optional, default is None)
        proxy = None
    )

In [None]:
df.shape

In [None]:

df.head()

In [None]:

df['avg_price'] = gmean(df[['Open', 'High', 'Low', 'Close']], axis=1)

In [None]:

df.head()

In [None]:
nullvaluecheck = pd.DataFrame(df.isna().sum().sort_values(ascending=False)*100/df.shape[0],columns=['missing %']).head(60)
nullvaluecheck.style.background_gradient(cmap='PuBu')

In [None]:

returns = np.log(df['Close']).diff()
returns.dropna(inplace=True)

In [None]:
plt.figure()
plt.plot(returns)
plt.title('Micro Gold Futures Daily Log Returns')
plt.xlabel('Timestamp')
plt.ylabel('Log Return (%)')
plt.tight_layout()

plt.figure()
plt.hist(returns, bins='auto')
plt.title('Micro Gold Futures Daily Log Returns Histogram')
plt.ylabel('Frequency')
plt.xlabel('Log Return (%)')
plt.tight_layout()

In [None]:
# 70 - 30 Train Test
split = int(0.3*len(returns))
X = returns[:-split]
X_test = returns[-split:]

In [None]:
plt.plot(X, label='Training Set')
plt.plot(X_test, label='Testing Set')
plt.title(label='GOLD Testing Set')
plt.xlabel('Timestamp')
plt.ylabel('Log Return (%)')
plt.legend()
plt.show()

In [None]:
# !pip install hmmlearn

In [None]:
from hmmlearn import hmm

In [None]:
X = X.to_numpy().reshape(-1, 1)
X_test = X_test.to_numpy().reshape(-1, 1)

In [None]:

model = hmm.GaussianHMM(
    # 2 States
    n_components=2,
    covariance_type="diag",
    verbose=True
)

In [None]:
%%time
model.fit(X)

In [None]:
# Adjust the probabilities for transitioning states
model.transmat_ = np.array(
    [
        [0.9, 0.1],
        [0.1, 0.9]
    ]
)

In [None]:
Z = model.predict(X_test)
Z_train = model.predict(X)

In [None]:

# Compute State Changes
returns_train0 = np.empty(len(Z_train))
returns_train1 = np.empty(len(Z_train))
returns_train0[:] = np.nan
returns_train1[:] = np.nan

# Create series for each state change
returns_train0[Z_train == 0] = returns[:-split][Z_train == 0]
returns_train1[Z_train == 1] = returns[:-split][Z_train == 1]


fig, ax = plt.subplots(figsize=(12,8))

# Plot the Volatility Regime and the states
plt.subplot(211)
plt.plot(Z)
plt.title(label='GOLD Training Volatility Regime')

plt.subplot(212)
plt.plot(returns_train0, label='State_0 (Low Volatility)', color='b')
plt.plot(returns_train1, label='State_1 (High Volatility)', color='r', )
plt.title(label='GOLD Training Volatility Clusters')
plt.legend()
plt.tight_layout()

In [None]:

# Compute State Changes
returns0 = np.empty(len(Z))
returns1 = np.empty(len(Z))
returns0[:] = np.nan
returns1[:] = np.nan

# Create series for each state change
returns0[Z == 0] = returns[-split:][Z == 0]
returns1[Z == 1] = returns[-split:][Z == 1]

# Plot the Volatility Regime and the states
fig, ax = plt.subplots(figsize=(12,8))

plt.subplot(211)
plt.plot(Z)
plt.title(label='GOLD Volatility Regime')

plt.subplot(212)
plt.plot(returns0, label='State_0 (Low Volatility)', color='b')
plt.plot(returns1, label='State_1 (High Volatility)', color='r')
plt.title(label='GOLD Volatility Clusters')

plt.legend()
plt.tight_layout()