In [58]:
import lightgbm as lgb
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import pathlib
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn import metrics
from sklearn.datasets import load_breast_cancer
from scipy.stats import randint as sp_randint
from scipy.stats import uniform as sp_uniform
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from scipy.stats import uniform as sp_uniform
import sklearn.metrics
from lightgbm import LGBMClassifier

In [59]:
# Parse results data
results = pd.DataFrame()

for path in pathlib.Path('/Users/sam/Documents/projects/premier_league_predictions/data').rglob("*.csv"):
    data = pd.read_csv(path)
    results = pd.concat([results, data])
    
# Remove missing values    
results = results[results.result.values != 'pp']
results = results[~pd.isnull(results.result.values)]
results = results.drop(['Unnamed: 0'], axis=1)

In [63]:
X = results.loc[:, results.columns != 'result'].values
y = results.iloc[:, -1].values

In [67]:
y_values = []

for value in y:
    if value == "home":
        y_values.append(0)
    elif value == "away":
        y_values.append(1)
    else:
        y_values.append(2)
        
y = y_values

In [69]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=True)

In [70]:
parameters={"early_stopping_rounds":20, 
            "eval_metric" : 'multi_logloss', 
            "eval_set" : [(X_test,y_test)],
            'eval_names': ['valid'],
            'verbose': 100,
            'categorical_feature': 'auto'}

In [71]:
parameter_tuning ={
             'max_depth': sp_randint(1,3),
             'num_leaves': sp_randint(1, 4), 
             'learning_rate ': [0.1,0.01,0.001],
             'min_child_samples': sp_randint(20, 50), 
             'min_child_weight': [1e-5, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4],
             'subsample': sp_uniform(loc=0.2, scale=0.8), 
             'colsample_bytree': sp_uniform(loc=0.4, scale=0.6),
             'reg_alpha': [0, 1e-1, 1, 2, 5, 7, 10, 50, 100],
             'reg_lambda': [0, 1e-1, 1, 5, 10, 20, 50, 100]}

In [72]:
scorer = sklearn.metrics.make_scorer(sklearn.metrics.f1_score, average = 'weighted')

In [73]:
classifier = LGBMClassifier(random_state=300, silent=True, metric='None', n_jobs=4, n_estimators=5000)

find_parameters = RandomizedSearchCV(
    estimator=classifier, param_distributions=parameter_tuning, 
    n_iter=100,
    scoring=scorer,
    cv=5,
    refit=True,
    random_state=300,
    verbose=False)

In [74]:
find_parameters.fit(X_train, y_train, **parameters)
print('Best score : {} with parameters: {} '.format(find_parameters.best_score_, find_parameters.best_params_))

Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.12763
[200]	valid's multi_logloss: 1.12759
[300]	valid's multi_logloss: 1.12759
[400]	valid's multi_logloss: 1.12759
[500]	valid's multi_logloss: 1.12759
[600]	valid's multi_logloss: 1.12759
Early stopping, best iteration is:
[613]	valid's multi_logloss: 1.12759
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.10862
[200]	valid's multi_logloss: 1.10841
[300]	valid's multi_logloss: 1.1084
[400]	valid's multi_logloss: 1.1084
[500]	valid's multi_logloss: 1.1084
Early stopping, best iteration is:
[528]	valid's multi_logloss: 1.1084
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.1093
[200]	valid's multi_logloss: 1.1091
[300]	valid's multi_logloss: 1.1091
[400]	valid's multi_logloss: 1.1091
[500]	valid's multi_logloss: 1.1091
Early stopping, best iteration is:
[517]	valid's multi_logloss: 1.1091
Training until vali

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

[900]	valid's multi_logloss: 1.07686
[1000]	valid's multi_logloss: 1.07686
[1100]	valid's multi_logloss: 1.07686
Early stopping, best iteration is:
[1147]	valid's multi_logloss: 1.07686
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.14725
[200]	valid's multi_logloss: 1.08305
[300]	valid's multi_logloss: 1.0753
[400]	valid's multi_logloss: 1.07465
[500]	valid's multi_logloss: 1.07459
[600]	valid's multi_logloss: 1.07459
[700]	valid's multi_logloss: 1.07459
[800]	valid's multi_logloss: 1.07459
[900]	valid's multi_logloss: 1.07459
Early stopping, best iteration is:
[959]	valid's multi_logloss: 1.07459
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.15053
[200]	valid's multi_logloss: 1.13246
[300]	valid's multi_logloss: 1.13141
[400]	valid's multi_logloss: 1.1313
[500]	valid's multi_logloss: 1.13128
[600]	valid's multi_logloss: 1.13128
[700]	valid's multi_logloss: 1.13128
[800]	valid's multi_logloss: 1

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[7]	valid's multi_logloss: 1.09039
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[75]	valid's multi_logloss: 1.06442
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[32]	valid's multi_logloss: 1.06965
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[77]	valid's multi_logloss: 1.0704
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.06254
Early stopping, best iteration is:
[158]	valid's multi_logloss: 1.05748
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.12924
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't imp

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

[300]	valid's multi_logloss: 1.04108
[400]	valid's multi_logloss: 1.03894
Early stopping, best iteration is:
[440]	valid's multi_logloss: 1.03824
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[29]	valid's multi_logloss: 1.06554
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[63]	valid's multi_logloss: 1.06807
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[45]	valid's multi_logloss: 1.06446
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[6]	valid's multi_logloss: 1.12924
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Earl

[1000]	valid's multi_logloss: 1.1097
[1100]	valid's multi_logloss: 1.1097
[1200]	valid's multi_logloss: 1.1097
[1300]	valid's multi_logloss: 1.1097
[1400]	valid's multi_logloss: 1.1097
[1500]	valid's multi_logloss: 1.1097
[1600]	valid's multi_logloss: 1.1097
[1700]	valid's multi_logloss: 1.1097
[1800]	valid's multi_logloss: 1.1097
[1900]	valid's multi_logloss: 1.1097
[2000]	valid's multi_logloss: 1.1097
[2100]	valid's multi_logloss: 1.1097
[2200]	valid's multi_logloss: 1.1097
[2300]	valid's multi_logloss: 1.1097
Early stopping, best iteration is:
[2306]	valid's multi_logloss: 1.1097
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.11805
[200]	valid's multi_logloss: 1.11742
[300]	valid's multi_logloss: 1.11722
[400]	valid's multi_logloss: 1.11716
[500]	valid's multi_logloss: 1.11715
[600]	valid's multi_logloss: 1.11714
[700]	valid's multi_logloss: 1.11713
[800]	valid's multi_logloss: 1.11713
[900]	valid's multi_logloss: 1.11713
[1000]	valid's m

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

[200]	valid's multi_logloss: 1.105
[300]	valid's multi_logloss: 1.10463
[400]	valid's multi_logloss: 1.10457
[500]	valid's multi_logloss: 1.10457
[600]	valid's multi_logloss: 1.10457
[700]	valid's multi_logloss: 1.10457
[800]	valid's multi_logloss: 1.10457
[900]	valid's multi_logloss: 1.10457
[1000]	valid's multi_logloss: 1.10457
[1100]	valid's multi_logloss: 1.10457
[1200]	valid's multi_logloss: 1.10457
Early stopping, best iteration is:
[1268]	valid's multi_logloss: 1.10457
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.09944
[200]	valid's multi_logloss: 1.09861
[300]	valid's multi_logloss: 1.09855
[400]	valid's multi_logloss: 1.09855
[500]	valid's multi_logloss: 1.09855
[600]	valid's multi_logloss: 1.09855
Early stopping, best iteration is:
[611]	valid's multi_logloss: 1.09855
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.21619
[200]	valid's multi_logloss: 1.21253
[300]	valid's multi_logloss: 

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.16331
[200]	valid's multi_logloss: 1.10925
[300]	valid's multi_logloss: 1.08349
[400]	valid's multi_logloss: 1.07808
[500]	valid's multi_logloss: 1.07674
[600]	valid's multi_logloss: 1.07641
[700]	valid's multi_logloss: 1.07633
[800]	valid's multi_logloss: 1.07631
[900]	valid's multi_logloss: 1.0763
[1000]	valid's multi_logloss: 1.0763
[1100]	valid's multi_logloss: 1.0763
[1200]	valid's multi_logloss: 1.0763
[1300]	valid's multi_logloss: 1.0763
[1400]	valid's multi_logloss: 1.0763
[1500]	valid's multi_logloss: 1.0763
[1600]	valid's multi_logloss: 1.0763
[1700]	valid's multi_logloss: 1.0763
Early stopping, best iteration is:
[1732]	valid's multi_logloss: 1.0763
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.17034
[200]	valid's multi_logloss: 1.15303
[300]	valid's multi_logloss: 1.15168
[400]	valid's multi_logloss: 1.1515
[500]	valid's multi_logloss: 1.

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.12118
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[2]	valid's multi_logloss: 1.09217
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.03338
Early stopping, best iteration is:
[113]	valid's multi_logloss: 1.03271
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[28]	valid's multi_logloss: 1.06202
Training until validation scores don't impr

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.13886
[200]	valid's multi_logloss: 1.08504
[300]	valid's multi_logloss: 1.06935
[400]	valid's multi_logloss: 1.06796
[500]	valid's multi_logloss: 1.06789
[600]	valid's multi_logloss: 1.06789
[700]	valid's multi_logloss: 1.06789
[800]	valid's multi_logloss: 1.06789
[900]	valid's multi_logloss: 1.06789
Early stopping, best iteration is:
[882]	valid's multi_logloss: 1.06789
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.07861
[200]	valid's multi_logloss: 1.06738
[300]	valid's multi_logloss: 1.06726
[400]	valid's multi_logloss: 1.06726
[500]	valid's multi_logloss: 1.06726
[600]	valid's multi_logloss: 1.06726
Early stopping, best iteration is:
[647]	valid's multi_logloss: 1.06726
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.08693
[200]	valid's multi_logloss: 1.06735
[300]	valid's multi_logloss: 1.0672
[400]	va

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[52]	valid's multi_logloss: 1.06746
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.06948
Early stopping, best iteration is:
[91]	valid's multi_logloss: 1.0691
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[51]	valid's multi_logloss: 1.06531
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.12924
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't impro

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[3]	valid's multi_logloss: 1.12924
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.12118
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.18805
[200]	valid's multi_logloss: 1.1859
[300]	valid's multi_logloss: 1.18585
[400]	valid's multi_logloss: 1.18585
[500]	valid's multi_logloss: 1.18585
[600]	valid's multi_logloss: 1.18585
[700]	valid's multi_logloss: 1.18585
[800]	valid's multi_loglos

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

[200]	valid's multi_logloss: 1.10808
[300]	valid's multi_logloss: 1.10808
[400]	valid's multi_logloss: 1.10808
Early stopping, best iteration is:
[419]	valid's multi_logloss: 1.10808
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.10906
[200]	valid's multi_logloss: 1.10897
[300]	valid's multi_logloss: 1.10897
[400]	valid's multi_logloss: 1.10897
Early stopping, best iteration is:
[390]	valid's multi_logloss: 1.10897
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.1095
[200]	valid's multi_logloss: 1.10942
[300]	valid's multi_logloss: 1.10942
Early stopping, best iteration is:
[359]	valid's multi_logloss: 1.10942
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.11745
[200]	valid's multi_logloss: 1.11741
[300]	valid's multi_logloss: 1.11741
Early stopping, best iteration is:
[369]	valid's multi_logloss: 1.11741
Training until validation scores don't improve fo

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

[100]	valid's multi_logloss: 1.10762
[200]	valid's multi_logloss: 1.10584
[300]	valid's multi_logloss: 1.10563
[400]	valid's multi_logloss: 1.10561
[500]	valid's multi_logloss: 1.10561
[600]	valid's multi_logloss: 1.10561
[700]	valid's multi_logloss: 1.10561
[800]	valid's multi_logloss: 1.10561
[900]	valid's multi_logloss: 1.10561
[1000]	valid's multi_logloss: 1.10561
[1100]	valid's multi_logloss: 1.10561
Early stopping, best iteration is:
[1081]	valid's multi_logloss: 1.10561
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.09933
[200]	valid's multi_logloss: 1.09889
[300]	valid's multi_logloss: 1.09887
[400]	valid's multi_logloss: 1.09887
[500]	valid's multi_logloss: 1.09887
Early stopping, best iteration is:
[518]	valid's multi_logloss: 1.09887
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.19163
[200]	valid's multi_logloss: 1.18754
[300]	valid's multi_logloss: 1.18746
[400]	valid's multi_logloss:

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.12924
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.12118
Training until validation scores don't improve for 20 rounds
[100]	valid's multi_logloss: 1.12703
[200]	valid's multi_logloss: 1.12697
[300]	valid's multi_logloss: 1.12697
[400]	valid's multi_logloss: 1.12697
Early stopping, best iteration is:
[472]	valid's multi_logloss: 1.12697
Training until validation scores don't improve for 20 rounds
[10

Traceback (most recent call last):
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py", line 228, in train
    booster = Booster(params=params, train_set=train_set)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1714, in __init__
    train_set.construct().handle,
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 1085, in construct
    categorical_feature=self.categorical_feature, params=self.params)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 887, in _lazy_init
    self.__init_from_np2d(data, params_str, ref_dataset)
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 937, in __init_from_np2d
    ctypes.byref(self.handle)))
  File "/Users/sam/opt/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py", line 45, in _safe_call
    raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
lightgbm.basic.Ligh

Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.12118
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[9]	valid's multi_logloss: 1.12924
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best iteration is:
[1]	valid's multi_logloss: 1.11611
Training until validation scores don't improve for 20 rounds
Early stopping, best i

        nan 0.23218409        nan        nan 0.29387624        nan
 0.32088103 0.23218409        nan        nan        nan 0.23218409
 0.32357024 0.23218409 0.28753914 0.23218409 0.3052926  0.23218409
 0.32210091 0.24970132 0.32225827 0.23218409        nan 0.23218409
 0.30834093        nan        nan 0.31574527        nan 0.23380338
        nan 0.33082547        nan 0.23218409        nan 0.33348602
        nan 0.16914856 0.23218409 0.23218409 0.24970132 0.32201116
        nan 0.32204329        nan        nan 0.23218409 0.23218409
 0.23218409 0.32715584 0.24970132        nan 0.32325652 0.23218409
        nan        nan        nan        nan        nan 0.23218409
        nan 0.28773348 0.27470052 0.23218409        nan        nan
 0.23218409        nan        nan 0.32794693 0.23218409 0.33559136
        nan        nan 0.32593943        nan        nan 0.32046785
        nan        nan 0.23218409 0.24970132 0.32510632        nan
 0.23218409 0.23218409 0.23218409 0.23218409 0.23218409       

In [76]:
best_parameters = find_parameters.best_params_
best_parameters

{'colsample_bytree': 0.5834803063086598,
 'learning_rate ': 0.001,
 'max_depth': 2,
 'min_child_samples': 44,
 'min_child_weight': 0.1,
 'num_leaves': 2,
 'reg_alpha': 7,
 'reg_lambda': 0.1,
 'subsample': 0.5937222974453096}

In [77]:
best_parameters_model = lgb.LGBMClassifier(**best_parameters)
best_parameters_model.set_params(**best_parameters)

LGBMClassifier(colsample_bytree=0.5834803063086598, learning_rate =0.001,
               max_depth=2, min_child_samples=44, min_child_weight=0.1,
               num_leaves=2, reg_alpha=7, reg_lambda=0.1,
               subsample=0.5937222974453096)

In [78]:
clf = best_parameters_model.fit(X_train, y_train)

In [79]:
y_preds = clf.predict(X_test)

In [80]:
np.mean(np.asarray(y_test) == y_preds)

0.4878048780487805

In [81]:
np.mean(np.asarray(y_test) == 0)

0.4634146341463415

In [82]:
this_week = pd.read_csv("/Users/sam/Documents/projects/premier_league_predictions/current_gameweek.csv", index_col=0)

In [83]:
preds = []

for result in clf.predict(this_week.values):
    if result == 0:
        preds.append("home")
    elif result == 1:
        preds.append("away")
    else:
        preds.append("draw")
    
preds

['home',
 'home',
 'draw',
 'draw',
 'home',
 'draw',
 'draw',
 'draw',
 'draw',
 'home']

In [62]:
X = dataset.loc[:, dataset.columns != 'result'].values
y = dataset.iloc[:, -1].values