# HMM Regressor

Notebook applying HMM regressor to make predictions on player performance.

1. ctrl+f TODO and change de dates to keep the last 3 years

2. Change save path and name with correct year

3. Run notebook


## 1. Preparation
---

In [3]:
from tqdm import tqdm
import pandas as pd
import numpy as np
import re
import math
import pickle
import glob
import xgboost
import tsfresh
import os
import glob
from scipy import stats
from hmmlearn import hmm
from datetime import datetime
from pprint import pprint
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

sns.set_context('notebook')
sns.set_color_codes()
plot_kwds = {'alpha' : 0.5, 's' : 100, 'linewidths':0}
large = 22; med = 16; small = 12
params = {'axes.titlesize': large,
          'legend.fontsize': med,
          'figure.figsize': (16, 10),
          'axes.labelsize': med,
          'axes.titlesize': med,
          'xtick.labelsize': med,
          'ytick.labelsize': med,
          'figure.titlesize': large}
plt.rcParams.update(params)
plt.style.use('seaborn-whitegrid')
sns.set_style("white")

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

pd.options.mode.chained_assignment = None  # default='warn'
np.random.seed(42)

# Variables
scrape = False

### 1.1 Example

In [4]:
model = hmm.GaussianHMM(n_components=3, covariance_type="full")
model.startprob_ = np.array([0.6, 0.3, 0.1])
model.transmat_ = np.array([[0.7, 0.2, 0.1],
                            [0.3, 0.5, 0.2],
                            [0.3, 0.3, 0.4]])
model.means_ = np.array([[0.0, 0.0], [3.0, -3.0], [5.0, 10.0]])
model.covars_ = np.tile(np.identity(2), (3, 1, 1))
X, Z = model.sample(100)

## 2. Get Predictions
---
### 2.1 Real Data

In [4]:
all_stats = {}
filenames = glob.glob(os.path.join('../LSTM-Neural-Network-for-Time-Series-Prediction/data', '*.csv'))
for f in tqdm(filenames):
    # print(f.split('/')[-1])
    if not os.path.basename(f) in all_stats.keys():
        data = pd.read_csv(f)
        data['Date'] = pd.to_datetime(data['Date'])
        # TODO: Change dates to be 2018-2021
        data = data[data['Date'] < pd.datetime(2021, 10, 1)]
        data = data[data['Date'] > pd.datetime(2018, 10, 1)]
        if data.shape[0] < 25:
            all_stats[f.split('/')[-1]] = 0
            continue
        X = np.array(data['Total_Points']).reshape(-1, 1)
        # X = np.array(pd.read_csv('../LSTM-Neural-Network-for-Time-Series-Prediction/data/NATHAN-MACKINNON-8477492.csv')['Total_Points']).reshape(-1, 1)
        if np.unique(X).shape == 1:
            all_stats[f.split('/')[-1]] = 0
            continue
        if X.shape[0] == 0:
            all_stats[f.split('/')[-1]] = 0
            continue
        n_states = int(X.max() + 1)
        model = hmm.GaussianHMM(n_components=n_states, covariance_type="diag", n_iter=1000)
        model = model.fit(X)
        model.monitor_.converged

        points = np.array([])
        for i in np.arange(0,500):
            tmp = model.sample(82)[0].sum()
            points = np.append(points, tmp)

        statistics = stats.describe(points)
        all_stats[f.split('/')[-1]] = statistics

  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
100%|██████████| 2042/2042 [56:30<00:00,  1.66s/it]


### 2.2 Save Results

In [5]:
all_stats_df = pd.DataFrame.from_dict(all_stats, orient='columns').T
all_stats_df.columns = ['nobs', 'minmax', 'mean', 'variance', 'skewness', 'kurtosis']
# TODO change filename
all_stats_df.to_pickle('predictions_hmm_3years_for_2022.pkl')
all_stats_df.head(10)

Unnamed: 0,nobs,minmax,mean,variance,skewness,kurtosis
A.J.-GREER-8478421.csv,0,0,0.0,0.0,0.0,0.0
AARON-DELL-8477180.csv,500,"(-0.34931946098237093, 0.29937947793483266)",0.00585918,0.0129983,0.0221002,-0.162712
AARON-EKBLAD-8477932.csv,500,"(22.820042268996314, 68.42109721139792)",43.2077,41.5721,0.104678,0.397996
AARON-NESS-8474604.csv,500,"(-0.24174847636782384, 10.08309836389127)",3.35169,2.99911,0.483191,0.337286
AARON-PALUSHAJ-8474030.csv,0,0,0.0,0.0,0.0,0.0
AARON-ROME-8470310.csv,0,0,0.0,0.0,0.0,0.0
AARON-VOLPATTI-8475619.csv,0,0,0.0,0.0,0.0,0.0
ADAM-ALMQUIST-8475332.csv,0,0,0.0,0.0,0.0,0.0
ADAM-BOQVIST-8480871.csv,500,"(14.051905069655604, 51.82690277654217)",31.4421,39.1555,0.226293,-0.142658
ADAM-BROOKS-8478996.csv,0,0,0.0,0.0,0.0,0.0
