# HMM Regressor

Notebook applying HMM regressor to make predictions on player performance.

## Note

* CTRL-F and search for `TODO` to modify parts dependant of the nhl season.

## 1. Preparation
---

In [6]:
import pandas as pd
import numpy as np
import re
import math
import pickle
import glob
import xgboost
import tsfresh
import os
import glob
from scipy import stats
from hmmlearn import hmm
from datetime import datetime
from pprint import pprint
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

sns.set_context('notebook')
sns.set_color_codes()
plot_kwds = {'alpha' : 0.5, 's' : 100, 'linewidths':0}
large = 22; med = 16; small = 12
params = {'axes.titlesize': large,
          'legend.fontsize': med,
          'figure.figsize': (16, 10),
          'axes.labelsize': med,
          'axes.titlesize': med,
          'xtick.labelsize': med,
          'ytick.labelsize': med,
          'figure.titlesize': large}
plt.rcParams.update(params)
plt.style.use('seaborn-whitegrid')
sns.set_style("white")

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

pd.options.mode.chained_assignment = None  # default='warn'
np.random.seed(42)

# Variables
scrape = False

### 1.1 Example

In [4]:
model = hmm.GaussianHMM(n_components=3, covariance_type="full")
model.startprob_ = np.array([0.6, 0.3, 0.1])
model.transmat_ = np.array([[0.7, 0.2, 0.1],
                            [0.3, 0.5, 0.2],
                            [0.3, 0.3, 0.4]])
model.means_ = np.array([[0.0, 0.0], [3.0, -3.0], [5.0, 10.0]])
model.covars_ = np.tile(np.identity(2), (3, 1, 1))
X, Z = model.sample(100)

## 2. Get Predictions
---
### 2.1 Real Data

In [7]:
all_stats = {}
filenames = glob.glob(os.path.join('../LSTM-Neural-Network-for-Time-Series-Prediction/data', '*.csv'))
for f in filenames:
    print(f.split('/')[-1])
    if not os.path.basename(f) in all_stats.keys():
        data = pd.read_csv(f)
        data['Date'] = pd.to_datetime(data['Date'])
        # TODO: Change dates to be 2017-2020
        data = data[data['Date'] < pd.datetime(2020, 10, 1)]
        data = data[data['Date'] > pd.datetime(2017, 10, 1)]
        if data.shape[0] < 25:
            all_stats[f.split('/')[-1]] = 0
            continue
        X = np.array(data['Total_Points']).reshape(-1, 1)
        # X = np.array(pd.read_csv('../LSTM-Neural-Network-for-Time-Series-Prediction/data/NATHAN-MACKINNON-8477492.csv')['Total_Points']).reshape(-1, 1)
        if np.unique(X).shape == 1:
            all_stats[f.split('/')[-1]] = 0
            continue
        if X.shape[0] == 0:
            all_stats[f.split('/')[-1]] = 0
            continue
        n_states = int(X.max() + 1)
        model = hmm.GaussianHMM(n_components=n_states, covariance_type="diag", n_iter=1000)
        model = model.fit(X)
        model.monitor_.converged

        points = np.array([])
        for i in np.arange(0,500):
            tmp = model.sample(82)[0].sum()
            points = np.append(points, tmp)

        statistics = stats.describe(points)
        all_stats[f.split('/')[-1]] = statistics

A.J.-GREER-8478421.csv
AARON-DELL-8477180.csv
AARON-EKBLAD-8477932.csv
AARON-NESS-8474604.csv
AARON-PALUSHAJ-8474030.csv
AARON-ROME-8470310.csv
AARON-VOLPATTI-8475619.csv
ADAM-ALMQUIST-8475332.csv
ADAM-BOQVIST-8480871.csv
ADAM-BROOKS-8478996.csv
ADAM-BURISH-8470063.csv
ADAM-CLENDENING-8476431.csv
ADAM-CRACKNELL-8471490.csv
ADAM-ERNE-8477454.csv
ADAM-FOX-8479323.csv
ADAM-GAUDETTE-8478874.csv
ADAM-HALL-8467925.csv
ADAM-HENRIQUE-8474641.csv
ADAM-JOHNSON-8480341.csv
ADAM-LARSSON-8476457.csv
ADAM-LOWRY-8476392.csv
ADAM-MCQUAID-8471717.csv
ADAM-PARDY-8471385.csv
ADAM-PAYERL-8476522.csv
ADAM-PELECH-8476917.csv
ADAM-WERNER-8479530.csv
ADAM-WILCOX-8476330.csv
ADIN-HILL-8478499.csv
ADRIAN-KEMPE-8477960.csv
AL-MONTOYA-8471219.csv
ALAN-QUINE-8476409.csv
ALEC-MARTINEZ-8474166.csv
ALEKSANDER-BARKOV-8477493.csv
ALEKSI-SAARELA-8478839.csv
ALES-HEMSKY-8469466.csv
ALEX-BIEGA-8473415.csv
ALEX-BROADHURST-8476283.csv
ALEX-BURROWS-8470358.csv
ALEX-CHIASSON-8475163.csv
ALEX-DEBRINCAT-8479337.csv
ALEX-FORMENT

[0;31mError: Number of distinct clusters (4) found smaller than n_clusters (5). Possibly due to duplicate points in X.
[0m

ALEXANDER-YELESIN-8481630.csv
ALEXANDRE-BOLDUC-8470719.csv
ALEXANDRE-CARRIER-8478851.csv
ALEXANDRE-FORTIN-8479657.csv
ALEXANDRE-GRENIER-8476414.csv
ALEXANDRE-TEXIER-8480074.csv
ALEXEI-EMELIN-8471296.csv
ALEXEY-MARCHENKO-8476289.csv
ANDERS-BJORK-8478075.csv
ANDERS-LEE-8475314.csv
ANDERS-LINDBACK-8474765.csv
ANDERS-NILSSON-8475195.csv
ANDRE-BENOIT-8470378.csv
ANDRE-BURAKOVSKY-8477444.csv
ANDREAS-ATHANASIOU-8476960.csv
ANDREAS-BORGMAN-8480158.csv
ANDREAS-ENGLUND-8477971.csv
ANDREAS-JOHNSSON-8477341.csv
ANDREAS-MARTINSEN-8478561.csv
ANDREI-CHIBISOV-8481642.csv
ANDREI-LOKTIONOV-8474681.csv
ANDREI-MARKOV-8467496.csv
ANDREI-MIRONOV-8478842.csv
ANDREI-SVECHNIKOV-8480830.csv
ANDREI-VASILEVSKIY-8476883.csv
ANDREJ-MESZAROS-8471236.csv
ANDREJ-NESTRASIL-8475250.csv
ANDREJ-SEKERA-8471284.csv
ANDREJ-SUSTR-8477205.csv
ANDREW-AGOZZINO-8475461.csv
ANDREW-ALBERTS-8469626.csv
ANDREW-BODNARCHUK-8473550.csv
ANDREW-CAMPBELL-8474638.csv
ANDREW-COGLIANO-8471699.csv
ANDREW-COPP-8477429.csv
ANDREW-CRESCENZI-8476

[0;31mError: Number of distinct clusters (5) found smaller than n_clusters (6). Possibly due to duplicate points in X.
[0m

ANTHONY-DUCLAIR-8477407.csv
ANTHONY-GRECO-8479447.csv
ANTHONY-MANTHA-8477511.csv
ANTHONY-PELUSO-8474061.csv
ANTHONY-RICHARD-8478409.csv
ANTHONY-STOLARZ-8476932.csv
ANTOINE-BIBEAU-8477312.csv
ANTOINE-ROUSSEL-8474849.csv
ANTOINE-VERMETTE-8468535.csv
ANTON-BELOV-8477289.csv
ANTON-BLIDH-8477320.csv
ANTON-FORSBERG-8476341.csv
ANTON-KHUDOBIN-8471418.csv
ANTON-LANDER-8475165.csv
ANTON-LINDHOLM-8478073.csv
ANTON-RODIN-8475210.csv
ANTON-SLEPYSHEV-8477415.csv
ANTON-STRALMAN-8471873.csv
ANTON-VOLCHENKOV-8468501.csv
ANTON-WEDIN-8481637.csv
ANTTI-NIEMI-8474550.csv
ANTTI-RAANTA-8477293.csv
ANTTI-SUOMELA-8480965.csv
ANZE-KOPITAR-8471685.csv
ARRON-ASHAM-8465025.csv
ARTEM-ANISIMOV-8473573.csv
ARTEMI-PANARIN-8478550.csv
ARTTURI-LEHKONEN-8477476.csv
ASHTON-SAUTNER-8477085.csv
AUSTIN-CZARNIK-8478512.csv
AUSTIN-POGANSKI-8478040.csv
AUSTIN-WAGNER-8478455.csv
AUSTIN-WATSON-8475766.csv
AUSTON-MATTHEWS-8479318.csv
B.J.-CROMBEEN-8470647.csv
BARCLAY-GOODROW-8476624.csv
BARRET-JACKMAN-8467890.csv
BARRETT-HAYTON-8

[0;31mError: Number of distinct clusters (4) found smaller than n_clusters (5). Possibly due to duplicate points in X.
[0m

BRAD-STUART-8467331.csv
BRADEN-HOLTBY-8474651.csv
BRADY-AUSTIN-8477041.csv
BRADY-KEEPER-8481442.csv
BRADY-SKJEI-8476869.csv
BRADY-TKACHUK-8480801.csv
BRANDON-BOLLIG-8475650.csv
BRANDON-CARLO-8478443.csv
BRANDON-DAVIDSON-8475869.csv
BRANDON-DEFAZIO-8476214.csv
BRANDON-DUBINSKY-8471273.csv
BRANDON-GIGNAC-8479330.csv
BRANDON-GORMLEY-8475759.csv
BRANDON-HAGEL-8479542.csv
BRANDON-HALVERSON-8477990.csv
BRANDON-KOZUN-8475295.csv
BRANDON-MANNING-8475430.csv
BRANDON-MASHINTER-8474850.csv
BRANDON-MCMILLAN-8474646.csv
BRANDON-MONTOUR-8477986.csv
BRANDON-PIRRI-8475204.csv
BRANDON-PRUST-8471283.csv
BRANDON-SAAD-8476438.csv
BRANDON-SUTTER-8474091.csv
BRANDON-TANEV-8479293.csv
BRANDON-YIP-8471450.csv
BRAYDEN-MCNABB-8475188.csv
BRAYDEN-POINT-8478010.csv
BRAYDEN-SCHENN-8475170.csv
BRAYDON-COBURN-8470601.csv
BRENDAN-GALLAGHER-8475848.csv
BRENDAN-GAUNCE-8476867.csv
BRENDAN-GUHLE-8478425.csv
BRENDAN-LEIPSIC-8476894.csv
BRENDAN-LEMIEUX-8477962.csv
BRENDAN-PERLINI-8477943.csv
BRENDAN-RANFORD-8475896.csv
BRE

[0;31mError: Number of distinct clusters (3) found smaller than n_clusters (4). Possibly due to duplicate points in X.
[0m

CASEY-BAILEY-8478376.csv
CASEY-CIZIKAS-8475231.csv
CASEY-DESMITH-8479193.csv
CASEY-MITTELSTADT-8479999.csv
CASEY-NELSON-8479268.csv
CASEY-WELLMAN-8475602.csv
CAYDEN-PRIMEAU-8480051.csv
CEDRIC-PAQUETTE-8476975.csv
CEDRICK-DESJARDINS-8473638.csv
CHAD-BILLINS-8476827.csv
CHAD-JOHNSON-8473434.csv
CHAD-RUHWEDEL-8477244.csv
CHANDLER-STEPHENSON-8476905.csv
CHARLES-HUDON-8476948.csv
CHARLIE-COYLE-8475745.csv
CHARLIE-LINDGREN-8479292.csv
CHARLIE-MCAVOY-8479325.csv
CHASE-BALISY-8476322.csv
CHASE-DE-LEO-8478029.csv
CHRIS-BIGRAS-8477453.csv
CHRIS-BREEN-8475648.csv
CHRIS-BROWN-8475161.csv
CHRIS-BUTLER-8471761.csv
CHRIS-CONNER-8472368.csv
CHRIS-DRIEDGER-8476904.csv
CHRIS-HIGGINS-8470274.csv
CHRIS-KELLY-8467967.csv
CHRIS-KREIDER-8475184.csv
CHRIS-KUNITZ-8470543.csv
CHRIS-MUELLER-8474535.csv
CHRIS-NEIL-8467493.csv
CHRIS-PHILLIPS-8464956.csv
CHRIS-PORTER-8470871.csv
CHRIS-STEWART-8473485.csv
CHRIS-SUMMERS-8473589.csv
CHRIS-TERRY-8474052.csv
CHRIS-THORBURN-8469501.csv
CHRIS-TIERNEY-8476919.csv
CHRIS-VAN

[0;31mError: Number of distinct clusters (2) found smaller than n_clusters (3). Possibly due to duplicate points in X.
[0m

ERIC-NYSTROM-8470180.csv
ERIC-O'DELL-8474603.csv
ERIC-ROBINSON-8480762.csv
ERIC-SELLECK-8474554.csv
ERIC-STAAL-8470595.csv
ERIC-TANGRADI-8474025.csv
ERIK-BRANNSTROM-8480073.csv
ERIK-BURGDOERFER-8475982.csv
ERIK-CERNAK-8478416.csv
ERIK-COLE-8467396.csv
ERIK-CONDRA-8473588.csv
ERIK-GUDBRANSON-8475790.csv
ERIK-GUSTAFSSON-8475640.csv
ERIK-GUSTAFSSON-8476979.csv
ERIK-HAULA-8475287.csv
ERIK-JOHNSON-8473446.csv
ERIK-KARLSSON-8474578.csv
ESA-LINDELL-8476902.csv
ETHAN-BEAR-8478451.csv
EVAN-BOUCHARD-8480803.csv
EVAN-MCENENY-8477091.csv
EVAN-RODRIGUES-8478542.csv
EVANDER-KANE-8475169.csv
EVGENI-MALKIN-8471215.csv
EVGENI-NABOKOV-8460705.csv
EVGENY-DADONOV-8474149.csv
EVGENY-KUZNETSOV-8475744.csv
EVGENY-MEDVEDEV-8478563.csv
EVGENY-SVECHNIKOV-8478431.csv
FEDOR-TYUTIN-8469492.csv
FILIP-CHLAPIK-8478488.csv
FILIP-CHYTIL-8480078.csv
FILIP-FORSBERG-8476887.csv
FILIP-HRONEK-8479425.csv
FILIP-ZADINA-8480821.csv
FRANCIS-BOUILLON-8465914.csv
FRANCOIS-BEAUCHEMIN-8467400.csv
FRANK-CORRADO-8476302.csv
FRANK-VAT

[0;31mError: Number of distinct clusters (4) found smaller than n_clusters (5). Possibly due to duplicate points in X.
[0m

FRANS-NIELSEN-8470144.csv
FRAZER-MCLAREN-8473712.csv
FREDDIE-HAMILTON-8475878.csv
FREDERIC-ST-DENIS-8473647.csv
FREDERICK-GAUDREAU-8477919.csv
FREDERIK-ANDERSEN-8475883.csv
FREDERIK-GAUTHIER-8477512.csv
FREDRIK-CLAESSON-8476368.csv
GABRIEL-BOURQUE-8475268.csv
GABRIEL-CARLSSON-8478506.csv
GABRIEL-DUMONT-8475254.csv
GABRIEL-LANDESKOG-8476455.csv
GABRIEL-VILARDI-8480014.csv
GAETAN-HAAS-8481813.csv
GAGE-QUINNEY-8479748.csv
GARNET-HATHAWAY-8477903.csv
GARRET-SPARKS-8476343.csv
GARRETT-MITCHELL-8475291.csv
GARRETT-WILSON-8475253.csv
GAVIN-BAYREUTHER-8479945.csv
GEMEL-SMITH-8476966.csv
GEORGE-PARROS-8468095.csv
GERALD-MAYHEW-8479933.csv
GERMAN-RUBTSOV-8479424.csv
GILBERT-BRULE-8471680.csv
GILLES-SENN-8480213.csv
GIOVANNI-FIORE-8479613.csv
GIVANI-SMITH-8479379.csv
GRANT-CLITSOME-8471482.csv
GREG-MCKEGG-8475735.csv
GREG-PATERYN-8474688.csv
GREGORY-CAMPBELL-8470169.csv
GRIFFEN-MOLINO-8479969.csv
GRIFFIN-REINHART-8476852.csv
GUILLAUME-BRISEBOIS-8478465.csv
GUSTAV-FORSLING-8478055.csv
GUSTAV-LINDS

[0;31mError: Number of distinct clusters (3) found smaller than n_clusters (4). Possibly due to duplicate points in X.
[0m

HENRIK-HAAPALA-8480164.csv
HENRIK-LUNDQVIST-8468685.csv
HENRIK-SAMUELSSON-8476868.csv
HENRIK-SEDIN-8467876.csv
HENRIK-TALLINDER-8466182.csv
HENRIK-ZETTERBERG-8468083.csv
HUDSON-FASCHING-8477392.csv
HUNTER-MISKA-8480112.csv
HUNTER-SHINKARUK-8477445.csv
IAN-COLE-8474013.csv
IAN-MCCOSHEN-8477452.csv
IGOR-OZHIGANOV-8480943.csv
IGOR-SHESTERKIN-8478048.csv
IIRO-PAKARINEN-8476336.csv
ILYA-BRYZGALOV-8468524.csv
ILYA-KOVALCHUK-8469454.csv
ILYA-LYUBUSHKIN-8480950.csv
ILYA-MIKHEYEV-8481624.csv
ILYA-SAMSONOV-8478492.csv
ISAC-LUNDESTROM-8480806.csv
IVAN-BARBASHEV-8477964.csv
IVAN-PROVOROV-8478500.csv
J.C.-BEAUDIN-8478833.csv
J.T.-COMPHER-8477456.csv
JACCOB-SLAVIN-8476958.csv
JACK-CAMPBELL-8475789.csv
JACK-EICHEL-8478403.csv
JACK-HILLEN-8474519.csv
JACK-HUGHES-8481559.csv
JACK-JOHNSON-8471677.csv
JACK-RODEWALD-8478268.csv
JACK-ROSLOVIC-8478458.csv
JACK-SKILLE-8471681.csv
JACK-STUDNICKA-8480021.csv
JACOB-DE-LA-ROSE-8477455.csv
JACOB-JOSEFSON-8475185.csv
JACOB-LARSSON-8478491.csv
JACOB-MACDONALD-84794

[0;31mError: Number of distinct clusters (4) found smaller than n_clusters (5). Possibly due to duplicate points in X.
[0m

JAMES-REIMER-8473503.csv
JAMES-SHEPPARD-8473536.csv
JAMES-VAN-RIEMSDYK-8474037.csv
JAMES-WISNIEWSKI-8470222.csv
JAMES-WRIGHT-8474674.csv
JAMIE-BENN-8473994.csv
JAMIE-DEVANE-8475190.csv
JAMIE-MCBAIN-8473571.csv
JAMIE-MCGINN-8473465.csv
JAMIE-OLEKSIAK-8476467.csv
JAN-HEJDA-8470699.csv
JAN-RUTTA-8480172.csv
JANI-HAKANPAA-8475825.csv
JANNE-KUOKKANEN-8479511.csv
JANNIK-HANSEN-8471498.csv
JANSEN-HARKINS-8478424.csv
JARED-BOLL-8471766.csv
JARED-COREAU-8477225.csv
JARED-COWEN-8475174.csv
JARED-MCCANN-8477955.csv
JARED-SPURGEON-8474716.csv
JARET-ANDERSON-DOLAN-8479994.csv
JAROME-IGINLA-8462042.csv
JAROMIR-JAGR-8448208.csv
JAROSLAV-HALAK-8470860.csv
JARRED-TINORDI-8475797.csv
JARRET-STOLL-8468526.csv
JASON-AKESON-8475917.csv
JASON-CHIMERA-8466251.csv
JASON-DEMERS-8474218.csv
JASON-DICKINSON-8477450.csv
JASON-GARRISON-8474520.csv
JASON-KASDORF-8476309.csv
JASON-LABARBERA-8467391.csv
JASON-POMINVILLE-8469506.csv
JASON-ROBERTSON-8480027.csv
JASON-SPEZZA-8469455.csv
JASON-ZUCKER-8475722.csv


[0;31mError: Number of distinct clusters (5) found smaller than n_clusters (6). Possibly due to duplicate points in X.
[0m

JAY-BEAGLE-8474291.csv
JAY-BOUWMEESTER-8470151.csv
JAY-HARRISON-8469531.csv
JAY-MCCLEMENT-8469508.csv
JAY-ROSEHILL-8470816.csv
JAYCE-HAWRYLUK-8477963.csv
JAYCOB-MEGNA-8477034.csv
JAYSON-MEGNA-8477126.csv
JC-LIPON-8477076.csv
JEAN-FRANCOIS-BERUBE-8475234.csv
JEAN-GABRIEL-PAGEAU-8476419.csv
JEAN-PHILIPPE-COTE-8468745.csv
JEAN-SEBASTIEN-DEA-8477520.csv
JEAN-SEBASTIEN-GIGUERE-8462044.csv
JEFF-CARTER-8470604.csv
JEFF-GLASS-8471301.csv
JEFF-HALPERN-8467831.csv
JEFF-PETRY-8473507.csv
JEFF-SCHULTZ-8471240.csv
JEFF-SKINNER-8475784.csv
JEFF-ZATKOFF-8473553.csv
JEREMY-LAUZON-8478468.csv
JEREMY-MORIN-8475214.csv
JEREMY-SMITH-8474092.csv
JEREMY-WELSH-8476819.csv
JERRED-SMITHSON-8468162.csv
JERRY-D'AMIGO-8475309.csv
JESPER-BOQVIST-8480003.csv
JESPER-BRATT-8479407.csv
JESPER-FAST-8475855.csv
JESPERI-KOTKANIEMI-8480829.csv
JESSE-BLACKER-8475203.csv
JESSE-JOENSUU-8473514.csv
JESSE-PULJUJARVI-8479344.csv
JESSE-WINCHESTER-8474498.csv
JHONAS-ENROTH-8473523.csv
JIM-O'BRIEN-8474024.csv
JIM-SLATER-8470289.cs

[0;31mError: Number of distinct clusters (6) found smaller than n_clusters (7). Possibly due to duplicate points in X.
[0m

JOHNNY-ODUYA-8469665.csv
JON-GILLIES-8476903.csv
JON-MERRILL-8475750.csv
JONAS-BRODIN-8476463.csv
JONAS-GUSTAVSSON-8475361.csv
JONAS-HILLER-8473972.csv
JONAS-JOHANSSON-8477992.csv
JONAS-SIEGENTHALER-8478399.csv
JONATHAN-BERNIER-8473541.csv
JONATHAN-DAVIDSSON-8480247.csv
JONATHAN-DROUIN-8477494.csv
JONATHAN-ERICSSON-8470318.csv
JONATHAN-HUBERDEAU-8476456.csv
JONATHAN-MARCHESSAULT-8476539.csv
JONATHAN-QUICK-8471734.csv
JONATHAN-RACINE-8476411.csv
JONATHAN-TOEWS-8473604.csv
JONATHON-BLUM-8474164.csv
JONI-ORTIO-8475299.csv
JONNY-BRODZINSKI-8477380.csv
JOONA-LUOTO-8481649.csv
JOONAS-DONSKOI-8475820.csv
JOONAS-KEMPPAINEN-8478564.csv
JOONAS-KORPISALO-8476914.csv
JOONAS-NATTINEN-8475198.csv
JORDAN-BINNINGTON-8476412.csv
JORDAN-CARON-8475150.csv
JORDAN-EBERLE-8474586.csv
JORDAN-GREENWAY-8478413.csv
JORDAN-GROSS-8480913.csv
JORDAN-KYROU-8479385.csv
JORDAN-LEOPOLD-8467917.csv
JORDAN-MARTINOOK-8476921.csv
JORDAN-NOLAN-8475325.csv
JORDAN-OESTERLE-8477851.csv
JORDAN-SCHMALTZ-8476877.csv
JORDAN-SCHRO

[0;31mError: Number of distinct clusters (3) found smaller than n_clusters (4). Possibly due to duplicate points in X.
[0m

JOSH-MANSON-8476312.csv
JOSH-MORRISSEY-8477504.csv
JOSH-NORRIS-8480064.csv
JOSH-TEVES-8481425.csv
JOSHUA-HO-SANG-8477959.csv
JT-BROWN-8476806.csv
JT-MILLER-8476468.csv
JUHO-LAMMIKKO-8477996.csv


[0;31mError: Number of distinct clusters (3) found smaller than n_clusters (5). Possibly due to duplicate points in X.
[0m

JUJHAR-KHAIRA-8476915.csv
JULIAN-MELCHIORI-8475815.csv
JULIEN-BROUILLETTE-8475567.csv
JULIEN-GAUTHIER-8479328.csv
JULIUS-HONKA-8477945.csv
JUSSI-JOKINEN-8469638.csv
JUSSI-RYNNAS-8475680.csv
JUSTIN-ABDELKADER-8471716.csv
JUSTIN-AUGER-8477428.csv
JUSTIN-BAILEY-8477473.csv
JUSTIN-BRAUN-8474027.csv
JUSTIN-DOWLING-8475413.csv
JUSTIN-FALK-8473992.csv
JUSTIN-FAULK-8475753.csv
JUSTIN-FLOREK-8475845.csv
JUSTIN-FONTAINE-8476235.csv
JUSTIN-HODGMAN-8474793.csv
JUSTIN-HOLL-8475718.csv
JUSTIN-JOHNSON-8476086.csv
JUSTIN-KLOOS-8480082.csv
JUSTIN-PETERS-8471251.csv
JUSTIN-SCHULTZ-8474602.csv
JUSTIN-SHUGG-8475826.csv
JUSTIN-WILLIAMS-8468508.csv
JUUSE-SAROS-8477424.csv
JUUSO-RIIKOLA-8480945.csv
JUUSO-VALIMAKI-8479976.csv
JYRKI-JOKIPAKKA-8476279.csv
KAAPO-KAHKONEN-8478039.csv
KAAPO-KAKKO-8481554.csv
KADEN-FULCHER-8480363.csv
KAEL-MOUILLIERAT-8476147.csv
KAILER-YAMAMOTO-8479977.csv
KALE-CLAGUE-8479348.csv
KALLE-KOSSILA-8479290.csv
KARI-LEHTONEN-8470140.csv
KARL-ALZNER-8473991.csv
KARL-STOLLERY-8476798.csv


[0;31mError: Number of distinct clusters (4) found smaller than n_clusters (5). Possibly due to duplicate points in X.
[0m

MARTIN-ST.-LOUIS-8466378.csv
MARTIN-ST.-PIERRE-8470366.csv
MASON-APPLETON-8478891.csv
MASON-MARCHMENT-8478975.csv
MASON-RAYMOND-8471664.csv
MAT-CLARK-8475162.csv
MATHEW-BARZAL-8478445.csv


[0;31mError: Number of distinct clusters (5) found smaller than n_clusters (6). Possibly due to duplicate points in X.
[0m

MATHIEU-JOSEPH-8478472.csv
MATHIEU-OLIVIER-8479671.csv
MATHIEU-PERREAULT-8473618.csv
MATISS-KIVLENIEKS-8480162.csv
MATS-ZUCCARELLO-8475692.csv
MATT-BARTKOWSKI-8474749.csv
MATT-BELESKEY-8473492.csv
MATT-BENNING-8476988.csv
MATT-CALVERT-8474685.csv
MATT-CAREY-8477836.csv
MATT-CARKNER-8467931.csv
MATT-CARLE-8470640.csv
MATT-COOKE-8465951.csv
MATT-CULLEN-8464989.csv
MATT-D'AGOSTINI-8471851.csv
MATT-DONOVAN-8474659.csv
MATT-DUCHENE-8475168.csv
MATT-DUMBA-8476856.csv
MATT-ELLIS-8470018.csv
MATT-FRASER-8475671.csv
MATT-FRATTIN-8474192.csv
MATT-GILROY-8475118.csv
MATT-GREENE-8470121.csv
MATT-GRZELCYK-8476891.csv
MATT-HACKETT-8475252.csv
MATT-HALISCHUK-8474094.csv
MATT-HENDRICKS-8468611.csv
MATT-HUNWICK-8471436.csv
MATT-IRWIN-8475625.csv
MATT-KASSIAN-8471719.csv
MATT-LINDBLAD-8477228.csv
MATT-LORITO-8478371.csv
MATT-LUFF-8479644.csv
MATT-MARTIN-8474709.csv
MATT-MOULSON-8470852.csv
MATT-MURRAY-8476899.csv
MATT-NIETO-8476442.csv
MATT-NISKANEN-8471702.csv
MATT-PELECH-8471700.csv
MATT-PUEMPEL-84764

[0;31mError: Number of distinct clusters (5) found smaller than n_clusters (6). Possibly due to duplicate points in X.
[0m

MAX-FRIBERG-8476384.csv
MAX-JONES-8479368.csv
MAX-MCCORMICK-8476323.csv
MAX-PACIORETTY-8474157.csv
MAX-REINHART-8475733.csv
MAX-TALBOT-8470171.csv
MAX-VERONNEAU-8480314.csv
MAXIM-LAPIERRE-8470654.csv
MAXIM-LETUNOV-8477983.csv
MAXIM-MAMIN-8479553.csv
MAXIME-FORTUNUS-8469798.csv
MAXIME-LAGACE-8476509.csv
MAXIME-LAJOIE-8479320.csv
MELKER-KARLSSON-8477922.csv
MICHAEL-AMADIO-8478020.csv
MICHAEL-BOURNIVAL-8475739.csv
MICHAEL-BUNTING-8478047.csv
MICHAEL-CHAPUT-8475808.csv
MICHAEL-DAL-COLLE-8477936.csv
MICHAEL-DEL-ZOTTO-8474584.csv
MICHAEL-DIPIETRO-8480022.csv
MICHAEL-FROLIK-8473564.csv
MICHAEL-GRABNER-8473546.csv
MICHAEL-HUTCHINSON-8474636.csv
MICHAEL-KAPLA-8480081.csv
MICHAEL-KERANEN-8477923.csv
MICHAEL-KOSTKA-8474497.csv
MICHAEL-LATTA-8475247.csv
MICHAEL-LEIGHTON-8468038.csv
MICHAEL-MCCARRON-8477446.csv
MICHAEL-MCLEOD-8479415.csv
MICHAEL-MERSCH-8476352.csv
MICHAEL-PALIOTTA-8476394.csv
MICHAEL-RAFFL-8477290.csv
MICHAEL-RASMUSSEN-8479992.csv
MICHAEL-RYDER-8467545.csv
MICHAEL-SGARBOSSA-8475958

[0;31mError: Number of distinct clusters (4) found smaller than n_clusters (5). Possibly due to duplicate points in X.
[0m

MIRCO-MUELLER-8477509.csv
MIRO-HEISKANEN-8480036.csv
MITCH-CALLAHAN-8475296.csv
MITCH-REINKE-8480761.csv
MITCHELL-MARNER-8478483.csv
MITCHELL-STEPHENS-8478477.csv
MORGAN-ELLIS-8475782.csv
MORGAN-FROST-8480028.csv
MORGAN-GEEKIE-8479987.csv
MORGAN-KLIMCHUK-8477449.csv
MORGAN-RIELLY-8476853.csv
NAIL-YAKUPOV-8476855.csv
NATE-GUENIN-8470317.csv
NATE-PROSSER-8475613.csv
NATE-SCHMIDT-8477220.csv
NATE-THOMPSON-8470775.csv
NATHAN-BASTIAN-8479414.csv
NATHAN-BEAULIEU-8476470.csv
NATHAN-GERBE-8471804.csv
NATHAN-HORTON-8470596.csv
NATHAN-LAWSON-8474855.csv
NATHAN-LIEUWEN-8476319.csv
NATHAN-MACKINNON-8477492.csv
NATHAN-WALKER-8477573.csv
NAZEM-KADRI-8475172.csv
NEAL-PIONK-8480145.csv
NELSON-NOGIER-8478031.csv
NIC-DOWD-8475343.csv
NIC-PETAN-8477464.csv
NICHOLAS-BAPTISTE-8477491.csv
NICHOLAS-MERKLEY-8478447.csv
NICK-BJUGSTAD-8475760.csv
NICK-BONINO-8474009.csv
NICK-CAAMANO-8479381.csv
NICK-COUSINS-8476393.csv
NICK-DRAZENOVIC-8471832.csv
NICK-FOLIGNO-8473422.csv
NICK-HOLDEN-8474207.csv
NICK-JENSEN-8475

[0;31mError: Number of distinct clusters (3) found smaller than n_clusters (4). Possibly due to duplicate points in X.
[0m

NICK-SHORE-8476406.csv
NICK-SORENSEN-8477466.csv
NICK-SPALING-8474096.csv
NICK-SUZUKI-8480018.csv
NICKLAS-BACKSTROM-8473563.csv
NICKLAS-GROSSMANN-8471269.csv
NICKLAS-JENSEN-8476482.csv
NICO-HISCHIER-8480002.csv
NICO-STURM-8481477.csv
NICOLAS-AUBE-KUBEL-8477979.csv
NICOLAS-BEAUDIN-8480814.csv
NICOLAS-DESCHAMPS-8474598.csv
NICOLAS-DESLAURIERS-8475235.csv
NICOLAS-HAGUE-8479980.csv
NICOLAS-KERDILES-8476866.csv
NICOLAS-ROY-8478462.csv
NIKITA-GUSEV-8477038.csv
NIKITA-KUCHEROV-8476453.csv
NIKITA-NESTEROV-8476300.csv
NIKITA-NIKITIN-8471348.csv
NIKITA-SCHERBAK-8477957.csv
NIKITA-SOSHNIKOV-8478373.csv
NIKITA-TRYAMKIN-8477997.csv
NIKITA-ZADOROV-8477507.csv
NIKITA-ZAITSEV-8479458.csv
NIKLAS-BACKSTROM-8473404.csv
NIKLAS-HJALMARSSON-8471769.csv
NIKLAS-KRONWALL-8468509.csv
NIKLAS-SVEDBERG-8476844.csv
NIKLAS-TREUTLE-8478999.csv
NIKO-MIKKOLA-8478859.csv
NIKOLAI-KHABIBULIN-8459140.csv
NIKOLAI-KNYZHOV-8481812.csv
NIKOLAI-KULEMIN-8473579.csv
NIKOLAI-PROKHORKIN-8476947.csv
NIKOLAJ-EHLERS-8477940.csv
NIKOLA

[0;31mError: Number of distinct clusters (3) found smaller than n_clusters (4). Possibly due to duplicate points in X.
[0m

PHILLIP-DANAULT-8476479.csv
PHILLIP-DI-GIUSEPPE-8476858.csv
PIERRE-CEDRIC-LABRIE-8474201.csv
PIERRE-EDOUARD-BELLEMARE-8477930.csv
PIERRE-ENGVALL-8478115.csv
PIERRE-LEBLOND-8471428.csv
PIERRE-LUC-DUBOIS-8479400.csv
PIERRE-MARC-BOUCHARD-8470153.csv
PK-SUBBAN-8474056.csv
PONTUS-ABERG-8476857.csv
QUINN-HUGHES-8480800.csv
QUINTON-HOWDEN-8475769.csv
RADEK-DVORAK-8462041.csv
RADEK-FAKSA-8476889.csv
RADEK-MARTINEK-8468101.csv
RADIM-SIMEK-8480160.csv
RADIM-VRBATA-8468085.csv


[0;31mError: Number of distinct clusters (3) found smaller than n_clusters (4). Possibly due to duplicate points in X.
[0m

RADKO-GUDAS-8475462.csv
RAFFI-TORRES-8468485.csv
RAMAN-HRABARENKA-8477127.csv
RAPHAEL-DIAZ-8476244.csv
RASMUS-ANDERSSON-8478397.csv
RASMUS-ASPLUND-8479335.csv
RASMUS-DAHLIN-8480839.csv
RASMUS-RISSANEN-8475294.csv
RASMUS-RISTOLAINEN-8477499.csv
RASMUS-SANDIN-8480873.csv
RAY-EMERY-8469548.csv
RAY-WHITNEY-8458537.csv
REID-BOUCHER-8476423.csv
REILLY-SMITH-8475191.csv
REM-PITLICK-8479514.csv
REMI-ELIE-8477461.csv
RENE-BOURQUE-8471504.csv
RETO-BERRA-8473499.csv
RHETT-GARDNER-8479587.csv
RICH-CLUNE-8471733.csv
RICH-PEVERLEY-8471514.csv
RICHARD-BACHMAN-8473614.csv
RICHARD-PANIK-8475209.csv
RICK-NASH-8470041.csv
RICKARD-RAKELL-8476483.csv
RILEY-BARBER-8477003.csv
RILEY-NASH-8474062.csv
RILEY-SHEAHAN-8475772.csv


[0;31mError: Number of distinct clusters (4) found smaller than n_clusters (5). Possibly due to duplicate points in X.
[0m

RILEY-STILLMAN-8479388.csv
RINAT-VALIEV-8477642.csv
RJ-UMBERGER-8469469.csv
ROB-KLINKHAMMER-8473970.csv
ROB-O'GARA-8476303.csv
ROB-SCUDERI-8467452.csv
ROB-ZEPP-8467972.csv
ROBBIE-RUSSO-8476418.csv
ROBBY-FABBRI-8477952.csv
ROBERT-BORTUZZO-8474145.csv
ROBERT-HAGG-8477462.csv
ROBERT-THOMAS-8480023.csv
ROBERTO-LUONGO-8466141.csv
ROBIN-LEHNER-8475215.csv
ROBYN-REGEHR-8467344.csv
ROCCO-GRIMALDI-8476428.csv
ROLAND-MCKEOWN-8477981.csv
ROMAN-HORAK-8475264.csv
ROMAN-JOSI-8474600.csv


[0;31mError: Number of distinct clusters (5) found smaller than n_clusters (6). Possibly due to duplicate points in X.
[0m

ROMAN-LYUBIMOV-8479648.csv
ROMAN-POLAK-8471392.csv
ROMAN-WILL-8477914.csv
RON-HAINSEY-8468493.csv
RONALDS-KENINS-8477589.csv
ROOPE-HINTZ-8478449.csv
ROSS-JOHNSTON-8477527.csv
ROSTISLAV-KLESLA-8468484.csv
ROSTISLAV-OLESZ-8471220.csv
ROURKE-CHARTIER-8478078.csv
RUDOLFS-BALCERS-8478870.csv
RYAN-BOURQUE-8475243.csv
RYAN-CALLAHAN-8471339.csv
RYAN-CARPENTER-8477846.csv
RYAN-CARTER-8473646.csv
RYAN-CRAIG-8470065.csv
RYAN-DONATO-8477987.csv
RYAN-DZINGEL-8476288.csv
RYAN-ELLIS-8475176.csv
RYAN-GARBUTT-8476116.csv
RYAN-GETZLAF-8470612.csv
RYAN-GRAVES-8477435.csv
RYAN-HAMILTON-8472424.csv
RYAN-HARTMAN-8477451.csv


[0;31mError: Number of distinct clusters (4) found smaller than n_clusters (6). Possibly due to duplicate points in X.
[0m

RYAN-JOHANSEN-8475793.csv
RYAN-JOHNSTON-8478963.csv
RYAN-JONES-8471323.csv
RYAN-KESLER-8470616.csv
RYAN-KUFFNER-8481426.csv
RYAN-LINDGREN-8479324.csv
RYAN-LOMBERG-8479066.csv
RYAN-MACINNIS-8477974.csv
RYAN-MALONE-8467988.csv
RYAN-MCDONAGH-8474151.csv
RYAN-MILLER-8468011.csv
RYAN-MURPHY-8476465.csv
RYAN-MURRAY-8476850.csv
RYAN-NUGENT-HOPKINS-8476454.csv
RYAN-O'REILLY-8475158.csv


Fitting a model with 34 free scalar parameters with only 28 data points will result in a degenerate solution.


RYAN-POEHLING-8480068.csv


[0;31mError: Number of distinct clusters (3) found smaller than n_clusters (5). Possibly due to duplicate points in X.
[0m

RYAN-PULOCK-8477506.csv


[0;31mError: Number of distinct clusters (5) found smaller than n_clusters (6). Possibly due to duplicate points in X.
[0m

RYAN-REAVES-8471817.csv
RYAN-SMYTH-8460496.csv
RYAN-SPOONER-8475727.csv
RYAN-SPROUL-8476450.csv
RYAN-STANTON-8475598.csv
RYAN-STOA-8471708.csv
RYAN-STROME-8476458.csv
RYAN-SUTER-8470600.csv
RYAN-WHITE-8473466.csv
RYAN-WHITNEY-8470309.csv
RYAN-WILSON-8473700.csv
RYANE-CLOWE-8469622.csv
SAKU-KOIVU-8459442.csv
SAKU-MAENALANEN-8477357.csv
SAM-BENNETT-8477935.csv
SAM-CARRICK-8475842.csv
SAM-GAGNER-8474040.csv
SAM-LAFFERTY-8478043.csv
SAM-MONTEMBEAULT-8478470.csv
SAM-REINHART-8477933.csv
SAM-STEEL-8479351.csv
SAMI-AITTOKALLIO-8475824.csv
SAMI-NIKU-8478915.csv
SAMI-SALO-8465202.csv
SAMI-VATANEN-8475222.csv
SAMMY-BLAIS-8478104.csv
SAMUEL-GIRARD-8479398.csv


[0;31mError: Number of distinct clusters (4) found smaller than n_clusters (5). Possibly due to duplicate points in X.
[0m

SAMUEL-HENLEY-8476511.csv
SAMUEL-MORIN-8477502.csv
SCOTT-CLEMMENSEN-8466339.csv
SCOTT-DARLING-8474152.csv
SCOTT-FOSTER-8479138.csv
SCOTT-GOMEZ-8467351.csv
SCOTT-HANNAN-8466158.csv
SCOTT-HARRINGTON-8476449.csv
SCOTT-HARTNELL-8468486.csv
SCOTT-KOSMACHUK-8476910.csv
SCOTT-LAUGHTON-8476872.csv
SCOTT-MAYFIELD-8476429.csv
SCOTT-SABOURIN-8477149.csv
SCOTT-WEDGEWOOD-8475809.csv
SCOTT-WILSON-8476293.csv
SCOTTIE-UPSHALL-8470105.csv


[0;31mError: Number of distinct clusters (3) found smaller than n_clusters (4). Possibly due to duplicate points in X.
[0m

SEAN-BERGENHEIM-8470176.csv
SEAN-COLLINS-8474744.csv
SEAN-COUTURIER-8476461.csv
SEAN-KURALY-8476374.csv
SEAN-MALONE-8477391.csv
SEAN-MONAHAN-8477497.csv
SEAN-WALKER-8480336.csv
SEBASTIAN-AHO-8478427.csv
SEBASTIAN-AHO-8480222.csv
SEMYON-VARLAMOV-8473575.csv
SERGEI-BOBROVSKY-8475683.csv
SERGEI-GONCHAR-8458951.csv
SERGEI-PLOTNIKOV-8478928.csv
SERGEY-KALININ-8478566.csv
SERGEY-TOLCHINSKY-8477566.csv
SETH-GRIFFITH-8476495.csv
SETH-HELGESON-8475274.csv
SETH-JONES-8477495.csv
SHANE-DOAN-8462038.csv
SHANE-GERSICH-8478063.csv
SHANE-HARPER-8475597.csv
SHANE-O'BRIEN-8470839.csv
SHANE-PRINCE-8476386.csv
SHAWN-HORCOFF-8467423.csv
SHAWN-MATTHIAS-8473574.csv
SHAWN-THORNTON-8465978.csv
SHAYNE-GOSTISBEHERE-8476906.csv
SHEA-THEODORE-8477447.csv
SHEA-WEBER-8470642.csv
SHELDON-BROOKBANK-8469992.csv
SHELDON-DRIES-8480326.csv
SHELDON-REMPAL-8480776.csv
SIDNEY-CROSBY-8471675.csv
SIMON-DESPRES-8475155.csv
SIMON-GAGNE-8467346.csv
SIMON-MOSER-8477666.csv
SLATER-KOEKKOEK-8476886.csv
SLAVA-VOYNOV-8474594.csv
SONN

[0;31mError: Number of distinct clusters (6) found smaller than n_clusters (7). Possibly due to duplicate points in X.
[0m

STU-BICKEL-8474772.csv
STUART-PERCY-8476478.csv
SVEN-ANDRIGHETTO-8477413.csv
SVEN-BAERTSCHI-8476466.csv
TAGE-THOMPSON-8479420.csv
TANNER-FRITZ-8479206.csv
TANNER-GLASS-8470854.csv
TANNER-KERO-8478528.csv
TANNER-PEARSON-8476871.csv
TANNER-RICHARD-8476911.csv
TARO-HIROSE-8481433.csv
TAYLOR-BECK-8475192.csv
TAYLOR-CHORNEY-8471710.csv
TAYLOR-FEDUN-8476166.csv
TAYLOR-HALL-8475791.csv
TAYLOR-LEIER-8476955.csv
TAYLOR-PYATT-8467881.csv
TEDDY-BLUEGER-8476927.csv
TEDDY-PURCELL-8473962.csv
TEEMU-PULKKINEN-8475800.csv
TEEMU-SELANNE-8457981.csv
TEUVO-TERAVAINEN-8476882.csv
THATCHER-DEMKO-8477967.csv
THOMAS-CHABOT-8478469.csv
THOMAS-DI-PAULI-8476974.csv
THOMAS-GREISS-8471306.csv
THOMAS-HICKEY-8474066.csv
THOMAS-VANEK-8470598.csv
TIM-ERIXON-8475148.csv
TIM-GLEASON-8469476.csv
TIM-HEED-8475841.csv
TIM-JACKMAN-8469490.csv
TIM-KENNEDY-8471842.csv
TIM-SCHALLER-8477213.csv
TIM-SESTITO-8471636.csv
TIM-THOMAS-8460703.csv
TIMO-MEIER-8478414.csv
TIMOTHY-GETTINGER-8479364.csv
TIMOTHY-LILJEGREN-8480043.csv
TJ-B

### 2.2 Save Results

In [8]:
all_stats_df = pd.DataFrame.from_dict(all_stats, orient='columns').T
all_stats_df.columns = ['nobs', 'minmax', 'mean', 'variance', 'skewness', 'kurtosis']
all_stats_df.to_pickle('predictions_hmm_3years_for_2021.pkl')
all_stats_df.head(10)

Unnamed: 0,nobs,minmax,mean,variance,skewness,kurtosis
A.J.-GREER-8478421.csv,500,"(5.709672724683751, 25.534055004993228)",13.1009,7.60412,0.227929,0.522997
AARON-DELL-8477180.csv,500,"(-0.23544148291288167, 4.213466197174426)",0.925224,0.929644,0.854183,0.174074
AARON-EKBLAD-8477932.csv,500,"(18.98625378121845, 58.96552562304788)",40.52,48.4988,0.0443088,-0.160787
AARON-NESS-8474604.csv,500,"(-0.0400711438583026, 12.910477808168023)",5.11348,4.6331,0.482452,0.312154
AARON-PALUSHAJ-8474030.csv,0,0,0.0,0.0,0.0,0.0
AARON-ROME-8470310.csv,0,0,0.0,0.0,0.0,0.0
AARON-VOLPATTI-8475619.csv,0,0,0.0,0.0,0.0,0.0
ADAM-ALMQUIST-8475332.csv,0,0,0.0,0.0,0.0,0.0
ADAM-BOQVIST-8480871.csv,500,"(10.085345063158206, 54.673079579186656)",26.9481,58.5617,0.53842,0.198286
ADAM-BROOKS-8478996.csv,0,0,0.0,0.0,0.0,0.0
