In [1]:
import pandas as pd
import numpy as np

In [12]:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

In [4]:
from goalguru.statsbombs_package.data_processor import load_all_seasons_past_info

In [5]:
all_data = load_all_seasons_past_info(save_concat=False)
all_data.shape

9_27.csv loaded locally
37_90.csv loaded locally
37_42.csv loaded locally
37_4.csv loaded locally
43_106.csv loaded locally
43_3.csv loaded locally
1238_108.csv loaded locally
11_27.csv loaded locally
7_27.csv loaded locally
2_27.csv loaded locally
12_27.csv loaded locally
55_43.csv loaded locally
72_107.csv loaded locally
72_30.csv loaded locally
53_106.csv loaded locally


(2590, 38)

In [6]:
all_data.columns

Index(['match_id', 'target', 'pass_completed_home_all_past',
       'pass_total_home_all_past', 'pass_precision_home_all_past',
       'shot_on_target_home_all_past', 'shot_goal_home_all_past',
       'shot_xg_home_all_past', 'shot_total_home_all_past',
       'shot_precision_home_all_past', 'shot_conversion_home_all_past',
       'pass_completed_home_last_10', 'pass_total_home_last_10',
       'pass_precision_home_last_10', 'shot_on_target_home_last_10',
       'shot_goal_home_last_10', 'shot_xg_home_last_10',
       'shot_total_home_last_10', 'shot_precision_home_last_10',
       'shot_conversion_home_last_10', 'pass_completed_away_all_past',
       'pass_total_away_all_past', 'pass_precision_away_all_past',
       'shot_on_target_away_all_past', 'shot_goal_away_all_past',
       'shot_xg_away_all_past', 'shot_total_away_all_past',
       'shot_precision_away_all_past', 'shot_conversion_away_all_past',
       'pass_completed_away_last_10', 'pass_total_away_last_10',
       'pass_prec

In [10]:
all_data_not_na = all_data.dropna(axis=0, how='any')
X = all_data_not_na.drop(columns=['match_id', 'target'])
y = all_data_not_na['target']
X.shape, y.shape

((2434, 36), (2434,))

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=79)

In [14]:
params = {
    "loss" : ["hinge", "squared_hinge", "modified_huber"],
    "alpha" : [0.0001, 0.001, 0.01, 0.1],
    "penalty" : ["l2", "l1", "elasticnet", None],
}
clf = SGDClassifier(max_iter=10000)
grid = GridSearchCV(clf, param_grid=params, cv=5, n_jobs = -1, scoring='accuracy', verbose=3)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV 2/5] END alpha=0.0001, loss=hinge, penalty=l2;, score=0.399 total time=   0.3s[CV 3/5] END alpha=0.0001, loss=hinge, penalty=l2;, score=0.328 total time=   0.3s

[CV 1/5] END alpha=0.0001, loss=hinge, penalty=l2;, score=0.276 total time=   0.2s
[CV 5/5] END alpha=0.0001, loss=hinge, penalty=l2;, score=0.444 total time=   0.3s
[CV 4/5] END alpha=0.0001, loss=hinge, penalty=l1;, score=0.429 total time=   0.5s
[CV 1/5] END alpha=0.0001, loss=hinge, penalty=elasticnet;, score=0.487 total time=   0.5s
[CV 5/5] END alpha=0.0001, loss=hinge, penalty=l1;, score=0.376 total time=   0.5s
[CV 1/5] END alpha=0.0001, loss=hinge, penalty=l1;, score=0.431 total time=   0.4s
[CV 3/5] END alpha=0.0001, loss=hinge, penalty=elasticnet;, score=0.440 total time=   0.3s
[CV 2/5] END alpha=0.0001, loss=hinge, penalty=elasticnet;, score=0.481 total time=   0.4s
[CV 4/5] END alpha=0.0001, loss=hinge, penalty=elasticnet;, score=0.291 total time= 

In [17]:
grid.best_params_, grid.best_score_

({'alpha': 0.1, 'loss': 'squared_hinge', 'penalty': 'l1'}, 0.4785837502156288)

In [25]:
y_pred = grid.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(grid.best_params_)

0.3283173734610123
{'alpha': 0.1, 'loss': 'squared_hinge', 'penalty': 'l1'}


In [28]:
grid.predict(np.zeros((1,36)))



array([-1])

In [29]:
from sklearn.preprocessing import MinMaxScaler, RobustScaler

In [44]:
rs = RobustScaler()
X_train_rob = rs.fit_transform(X_train)
X_test_rob = rs.transform(X_test)

mms = MinMaxScaler()
X_train_mm = mms.fit_transform(X_train)
X_test_mm = mms.transform(X_test)

In [31]:
grid_rob = GridSearchCV(clf, param_grid=params, cv=5, n_jobs = -1, scoring='accuracy', verbose=2)
grid_rob.fit(X_train_rob, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV] END ...............alpha=0.0001, loss=hinge, penalty=l2; total time=   0.1s
[CV] END ...............alpha=0.0001, loss=hinge, penalty=l2; total time=   0.1s
[CV] END ...............alpha=0.0001, loss=hinge, penalty=l2; total time=   0.2s
[CV] END ...............alpha=0.0001, loss=hinge, penalty=l1; total time=   0.2s
[CV] END ...............alpha=0.0001, loss=hinge, penalty=l1; total time=   0.2s
[CV] END .......alpha=0.0001, loss=hinge, penalty=elasticnet; total time=   0.5s
[CV] END .......alpha=0.0001, loss=hinge, penalty=elasticnet; total time=   0.4s
[CV] END .......alpha=0.0001, loss=hinge, penalty=elasticnet; total time=   0.3s
[CV] END .............alpha=0.0001, loss=hinge, penalty=None; total time=   0.1s
[CV] END .......alpha=0.0001, loss=hinge, penalty=elasticnet; total time=   0.3s
[CV] END .............alpha=0.0001, loss=hinge, penalty=None; total time=   0.2s
[CV] END .......alpha=0.0001, loss=hinge, penal



[CV] END .........alpha=0.01, loss=squared_hinge, penalty=l1; total time=  10.1s




[CV] END .........alpha=0.01, loss=squared_hinge, penalty=l2; total time=  12.7s
[CV] END .........alpha=0.01, loss=squared_hinge, penalty=l2; total time=  12.7s




[CV] END .........alpha=0.01, loss=squared_hinge, penalty=l2; total time=  17.5s
[CV] END .........alpha=0.01, loss=squared_hinge, penalty=l2; total time=  17.7s




[CV] END .........alpha=0.01, loss=squared_hinge, penalty=l2; total time=  17.9s
[CV] END .......alpha=0.01, loss=squared_hinge, penalty=None; total time=   0.3s
[CV] END .......alpha=0.01, loss=squared_hinge, penalty=None; total time=   0.3s




[CV] END .........alpha=0.01, loss=squared_hinge, penalty=l1; total time=  20.1s
[CV] END .......alpha=0.01, loss=squared_hinge, penalty=None; total time=   0.4s
[CV] END ........alpha=0.01, loss=modified_huber, penalty=l2; total time=   0.1s
[CV] END ........alpha=0.01, loss=modified_huber, penalty=l2; total time=   0.1s
[CV] END ........alpha=0.01, loss=modified_huber, penalty=l2; total time=   0.1s
[CV] END ........alpha=0.01, loss=modified_huber, penalty=l2; total time=   0.1s
[CV] END ........alpha=0.01, loss=modified_huber, penalty=l2; total time=   0.1s
[CV] END ........alpha=0.01, loss=modified_huber, penalty=l1; total time=   0.1s
[CV] END ........alpha=0.01, loss=modified_huber, penalty=l1; total time=   0.1s
[CV] END ........alpha=0.01, loss=modified_huber, penalty=l1; total time=   0.1s
[CV] END ........alpha=0.01, loss=modified_huber, penalty=l1; total time=   0.1s
[CV] END ........alpha=0.01, loss=modified_huber, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, loss=mo



[CV] END .......alpha=0.01, loss=squared_hinge, penalty=None; total time=   7.3s




[CV] END .......alpha=0.01, loss=squared_hinge, penalty=None; total time=   7.2s




[CV] END ..........alpha=0.1, loss=squared_hinge, penalty=l2; total time=  17.9s




[CV] END .alpha=0.01, loss=squared_hinge, penalty=elasticnet; total time=  40.7s




[CV] END ..........alpha=0.1, loss=squared_hinge, penalty=l2; total time=  18.1s
[CV] END .alpha=0.01, loss=squared_hinge, penalty=elasticnet; total time=  32.7s




[CV] END ..........alpha=0.1, loss=squared_hinge, penalty=l2; total time=  21.0s
[CV] END .alpha=0.01, loss=squared_hinge, penalty=elasticnet; total time=  33.2s




[CV] END .alpha=0.01, loss=squared_hinge, penalty=elasticnet; total time=  36.2s




[CV] END ..........alpha=0.1, loss=squared_hinge, penalty=l2; total time=  12.6s




[CV] END ..........alpha=0.1, loss=squared_hinge, penalty=l2; total time=  17.7s




[CV] END .alpha=0.01, loss=squared_hinge, penalty=elasticnet; total time=  42.8s




[CV] END ..........alpha=0.1, loss=squared_hinge, penalty=l1; total time=  28.3s




[CV] END ..........alpha=0.1, loss=squared_hinge, penalty=l1; total time=  28.4s




[CV] END ..........alpha=0.1, loss=squared_hinge, penalty=l1; total time=  28.2s




[CV] END ..........alpha=0.1, loss=squared_hinge, penalty=l1; total time=  39.3s




[CV] END ..alpha=0.1, loss=squared_hinge, penalty=elasticnet; total time=  28.5s




[CV] END ..........alpha=0.1, loss=squared_hinge, penalty=l1; total time=  38.6s




[CV] END ........alpha=0.1, loss=squared_hinge, penalty=None; total time=  11.5s




[CV] END ........alpha=0.1, loss=squared_hinge, penalty=None; total time=  12.7s
[CV] END .........alpha=0.1, loss=modified_huber, penalty=l2; total time=   0.1s
[CV] END .........alpha=0.1, loss=modified_huber, penalty=l2; total time=   0.1s
[CV] END .........alpha=0.1, loss=modified_huber, penalty=l2; total time=   0.1s
[CV] END .........alpha=0.1, loss=modified_huber, penalty=l2; total time=   0.1s
[CV] END .........alpha=0.1, loss=modified_huber, penalty=l2; total time=   0.1s
[CV] END ..alpha=0.1, loss=squared_hinge, penalty=elasticnet; total time=  37.4s
[CV] END .........alpha=0.1, loss=modified_huber, penalty=l1; total time=   0.2s




[CV] END .........alpha=0.1, loss=modified_huber, penalty=l1; total time=   0.1s
[CV] END .........alpha=0.1, loss=modified_huber, penalty=l1; total time=   0.1s
[CV] END .........alpha=0.1, loss=modified_huber, penalty=l1; total time=   0.1s
[CV] END .alpha=0.1, loss=modified_huber, penalty=elasticnet; total time=   0.1s
[CV] END .........alpha=0.1, loss=modified_huber, penalty=l1; total time=   0.1s
[CV] END .alpha=0.1, loss=modified_huber, penalty=elasticnet; total time=   0.1s
[CV] END .alpha=0.1, loss=modified_huber, penalty=elasticnet; total time=   0.2s
[CV] END .alpha=0.1, loss=modified_huber, penalty=elasticnet; total time=   0.1s
[CV] END .......alpha=0.1, loss=modified_huber, penalty=None; total time=   0.1s
[CV] END .alpha=0.1, loss=modified_huber, penalty=elasticnet; total time=   0.2s
[CV] END .......alpha=0.1, loss=modified_huber, penalty=None; total time=   0.1s
[CV] END .......alpha=0.1, loss=modified_huber, penalty=None; total time=   0.1s
[CV] END .......alpha=0.1, l



[CV] END ........alpha=0.1, loss=squared_hinge, penalty=None; total time=  15.7s




[CV] END ........alpha=0.1, loss=squared_hinge, penalty=None; total time=  14.9s




[CV] END ..alpha=0.1, loss=squared_hinge, penalty=elasticnet; total time=  27.7s




[CV] END ........alpha=0.1, loss=squared_hinge, penalty=None; total time=  11.4s
[CV] END ..alpha=0.1, loss=squared_hinge, penalty=elasticnet; total time=  26.6s




In [32]:
y_pred_rob = grid_rob.predict(X_test_rob)
print(accuracy_score(y_test, y_pred_rob))
print(grid_rob.best_params_)

0.5321477428180574
{'alpha': 0.1, 'loss': 'modified_huber', 'penalty': 'l1'}


In [35]:
grid_rob.predict_proba(np.zeros((1,36))), grid_rob.classes_

(array([[0.30759686, 0.25711524, 0.4352879 ]]), array([-1,  0,  1]))

In [36]:
grid_mm = GridSearchCV(clf, param_grid=params, cv=5, n_jobs = -1, scoring='accuracy', verbose=1)
grid_mm.fit(X_train_mm, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits




In [46]:
y_pred_mm = grid_mm.predict(X_test_mm)
print(accuracy_score(y_test, y_pred_mm))
print(grid_mm.best_params_)

0.5266757865937073
{'alpha': 0.1, 'loss': 'hinge', 'penalty': None}


In [48]:
grid_mm.predict(np.zeros((1,36))), grid_mm.classes_

(array([-1]), array([-1,  0,  1]))

In [50]:
import goalguru.soccermatch_package.ml_logic.model as sm

2023-09-12 12:50:05.225906: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-12 12:50:05.891639: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-12 12:50:05.912665: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[34m
Loading TensorFlow...[0m

✅ TensorFlow loaded (0.0s)


In [51]:
model = sm.initialize_model(X_train_rob.shape)

✅ Model initialized


In [52]:
model = sm.compile_model(model)

✅ Model compiled


In [54]:
from tensorflow.keras.utils import to_categorical

In [59]:
y_train_cat = to_categorical(y_train, num_classes=3)
y_test_cat = to_categorical(y_test, num_classes=3)

In [60]:
model, history = sm.train_model(model, X_train_rob, y_train_cat, patience=6)

[34m
Training model...[0m
✅ Model trained on 1703 rows with accuracy: 0.45


In [61]:
metrics = sm.evaluate_model(model, X_test_rob, y_test_cat)
metrics

[34m
Evaluating model on 731 rows...[0m
✅ Model evaluated, accuracy: 0.45


{'loss': 1.0420852899551392, 'accuracy': 0.4487003982067108}