In [33]:
import pandas as pd
import sqlite3
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', 40)

In [28]:
con = sqlite3.connect('data/transformed/team_moving_avgs_merged.sqlite')
team_last_20 = pd.read_sql_query(f"SELECT * FROM \"{'team_last_20'}\"", con)
team_last_30 = pd.read_sql_query(f"SELECT * FROM \"{'team_last_30'}\"", con)
team_last_40 = pd.read_sql_query(f"SELECT * FROM \"{'team_last_40'}\"", con)
team_all_season = pd.read_sql_query(f"SELECT * FROM \"{'team_all_season'}\"", con)
con.close()

team_last_20['Date'] = pd.to_datetime(team_last_20['Date'])
team_last_30['Date'] = pd.to_datetime(team_last_30['Date'])
team_last_40['Date'] = pd.to_datetime(team_last_40['Date'])
team_all_season['Date'] = pd.to_datetime(team_all_season['Date'])

team_last_30.columns

Index(['game_id', 'Date', 'Season', 'home_team', 'home_win', 'home_streak',
       'home_last10', 'home_FG', 'home_FGA', 'home_FG%', 'home_3P', 'home_3PA',
       'home_3P%', 'home_FT', 'home_FTA', 'home_FT%', 'home_ORB', 'home_DRB',
       'home_TRB', 'home_AST', 'home_STL', 'home_BLK', 'home_TOV', 'home_PF',
       'home_PTS', 'home_TS%', 'home_eFG%', 'home_3PAr', 'home_FTr',
       'home_ORB%', 'home_DRB%', 'home_TRB%', 'home_AST%', 'home_STL%',
       'home_BLK%', 'home_TOV%', 'home_ORtg', 'home_DRtg', 'home_Pace',
       'away_team', 'away_streak', 'away_last10', 'away_FG', 'away_FGA',
       'away_FG%', 'away_3P', 'away_3PA', 'away_3P%', 'away_FT', 'away_FTA',
       'away_FT%', 'away_ORB', 'away_DRB', 'away_TRB', 'away_AST', 'away_STL',
       'away_BLK', 'away_TOV', 'away_PF', 'away_PTS', 'away_TS%', 'away_eFG%',
       'away_3PAr', 'away_FTr', 'away_ORB%', 'away_DRB%', 'away_TRB%',
       'away_AST%', 'away_STL%', 'away_BLK%', 'away_TOV%', 'away_ORtg',
       'away_DRtg', 'awa

In [29]:
def prepare_data(data):
    data = data.dropna(how='any') #axis=1,
    threshold_date = '2022-10-01'
    totals_columns = ['home_FG', 'home_FGA', 'home_FG%', 'home_3P', 'home_3PA',
       'home_3P%', 'home_FT', 'home_FTA', 'home_FT%', 'home_ORB', 'home_DRB',
       'home_TRB', 'home_AST', 'home_STL', 'home_BLK', 'home_TOV', 'home_PF',
       'home_PTS',  'home_TS%', 'away_FG', 'away_FGA',
       'away_FG%', 'away_3P', 'away_3PA', 'away_3P%', 'away_FT', 'away_FTA',
       'away_FT%', 'away_ORB', 'away_DRB', 'away_TRB', 'away_AST', 'away_STL',
       'away_BLK', 'away_TOV', 'away_PF', 'away_PTS', 'away_TS%']

    prc_columns = ['home_TS%', 'home_eFG%', 'home_3PAr', 'home_FTr',
       'home_ORB%', 'home_DRB%', 'home_TRB%', 'home_AST%', 'home_STL%',
       'home_BLK%', 'home_TOV%', 'home_ORtg', 'home_DRtg', 'away_TS%', 'away_eFG%',
       'away_3PAr', 'away_FTr', 'away_ORB%', 'away_DRB%', 'away_TRB%',
       'away_AST%', 'away_STL%', 'away_BLK%', 'away_TOV%', 'away_ORtg',
       'away_DRtg']
    X_train = data[data['Date'] < threshold_date]\
        .drop(columns=['game_id', 'Date', 'Season',
                       'home_team', 'home_win',
                       'away_team']) \
        .drop(columns=totals_columns)

    X_test = data[data['Date'] > threshold_date]\
        .drop(columns=['game_id', 'Date', 'Season',
                       'home_team', 'home_win',
                       'away_team'])\
        .drop(columns=totals_columns)

    y_train = data[data['Date'] < threshold_date].loc[:,'home_win']
    y_test = data[data['Date'] > threshold_date].loc[:,'home_win']

    # nan_columns_X_train = X_train.columns[X_train.isna().any()].tolist()
    # nan_columns_X_test = X_test.columns[X_test.isna().any()].tolist()
    # print(nan_columns_X_train, nan_columns_X_test)

    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    pca = PCA()
    X_train_pca = pca.fit_transform(X_train_scaled)
    X_test_pca = pca.transform(X_test_scaled)

    return X_train_pca, X_test_pca, y_train, y_test

X_train, X_test, y_train, y_test = prepare_data(team_all_season)

In [30]:
X_train

# kolumny szczegolowe - 40
# kolumny ogolne - 52

array([[-4.12439348e-01, -4.55175911e-01, -9.21496689e-01, ...,
        -2.39405783e-03,  1.12157402e-03, -1.43096821e-02],
       [ 2.02090534e-01, -5.27784533e-01, -9.14383655e-01, ...,
         7.56014401e-03, -9.30742847e-03, -1.43948596e-02],
       [-7.48426911e-01,  6.51590776e-03, -7.62619408e-01, ...,
         4.30072586e-04,  9.20210000e-03, -5.00512628e-03],
       ...,
       [ 4.59225073e-01, -3.01160293e-01, -5.18634339e-01, ...,
         7.02298481e-03, -9.70051635e-03,  6.58764446e-03],
       [-9.22929818e-01, -7.10880886e-02, -5.16800981e-01, ...,
        -4.33216286e-03,  1.23680762e-03, -7.02913804e-04],
       [ 8.22042245e-01,  2.04872628e-01, -6.01530463e-01, ...,
         7.29488316e-05, -7.39600845e-03, -7.97477983e-03]])

In [31]:
model = Sequential()

model.add(Dense(40, input_dim=X_train.shape[1], activation='tanh'))
model.add(Dropout(0.2))  # Dropout dla ograniczenia przeuczenia

# ukryta warstwa
model.add(Dense(7, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(7, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train,
                    validation_split=0.2,
                    epochs=50,
                    batch_size=32,
                    verbose=1)

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5325 - loss: 0.6985 - val_accuracy: 0.6063 - val_loss: 0.6802
Epoch 2/50
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5991 - loss: 0.6799 - val_accuracy: 0.6173 - val_loss: 0.6694
Epoch 3/50
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6023 - loss: 0.6702 - val_accuracy: 0.6029 - val_loss: 0.6679
Epoch 4/50
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6020 - loss: 0.6646 - val_accuracy: 0.6122 - val_loss: 0.6627
Epoch 5/50
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6176 - loss: 0.6573 - val_accuracy: 0.6156 - val_loss: 0.6594
Epoch 6/50
[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6341 - loss: 0.6521 - val_accuracy: 0.6037 - val_loss: 0.6611
Epoch 7/50
[1m147/147[0m [32m━━━━━━━

In [32]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
# minmax zamiast standardscaler
# model = Sequential()
#
# model.add(Dense(40, input_dim=X_train.shape[1], activation='tanh'))
# model.add(Dropout(0.2))  # Dropout dla ograniczenia przeuczenia
#
# # ukryta warstwa
# model.add(Dense(7, activation='relu'))
# model.add(Dropout(0.5))
#
# model.add(Dense(7, activation='relu'))
# model.add(Dropout(0.5))
#
# model.add(Dense(1, activation='sigmoid'))
#
# model.compile(optimizer=Adam(learning_rate=0.001),
#               loss='binary_crossentropy',
#               metrics=['accuracy'])
#
# history = model.fit(X_train, y_train,
#                     validation_split=0.2,
#                     epochs=50,
#                     batch_size=32,
#                     verbose=1)

[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy on test set: 0.6293211162015827
              precision    recall  f1-score   support

           0       0.62      0.40      0.48      1051
           1       0.63      0.81      0.71      1350

    accuracy                           0.63      2401
   macro avg       0.63      0.60      0.60      2401
weighted avg       0.63      0.63      0.61      2401



In [26]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
# zmienione liczebnosci neuronow w warstwach ukrytych, zgodnie z literatura (skoczylas)
# model = Sequential()
#
# model.add(Dense(40, input_dim=X_train.shape[1], activation='tanh'))
# model.add(Dropout(0.2))  # Dropout dla ograniczenia przeuczenia
#
# # ukryta warstwa
# model.add(Dense(7, activation='relu'))
# model.add(Dropout(0.5))
#
# model.add(Dense(7, activation='relu'))
# model.add(Dropout(0.5))
#
# model.add(Dense(1, activation='sigmoid'))
#
# model.compile(optimizer=Adam(learning_rate=0.001),
#               loss='binary_crossentropy',
#               metrics=['accuracy'])
#
# history = model.fit(X_train, y_train,
#                     validation_split=0.2,
#                     epochs=50,
#                     batch_size=32,
#                     verbose=1)

[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy on test set: 0.6084964598084132
              precision    recall  f1-score   support

           0       0.64      0.24      0.35      1051
           1       0.60      0.89      0.72      1350

    accuracy                           0.61      2401
   macro avg       0.62      0.57      0.54      2401
weighted avg       0.62      0.61      0.56      2401



In [32]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
# pca, wszystkie kolumny
# # Warstwa wejściowa i pierwsza ukryta warstwa
# model.add(Dense(128, input_dim=X_train.shape[1], activation='tanh'))
# model.add(Dropout(0.4))  # Dropout dla ograniczenia przeuczenia
#
# # Druga ukryta warstwa
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.4))
#
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.4))
#
# model.add(Dense(1, activation='sigmoid'))
#
# model.compile(optimizer=Adam(learning_rate=0.002),
#               loss='binary_crossentropy',
#               metrics=['accuracy'])
#
# history = model.fit(X_train, y_train,
#                     validation_split=0.2,
#                     epochs=40,
#                     batch_size=32,
#                     verbose=1)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Accuracy on test set: 0.6276870163370594
              precision    recall  f1-score   support

           0       0.64      0.42      0.51       530
           1       0.62      0.80      0.70       633

    accuracy                           0.63      1163
   macro avg       0.63      0.61      0.60      1163
weighted avg       0.63      0.63      0.61      1163



In [35]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
# pca, bez ogolnych kolumn
# model.add(Dense(128, input_dim=X_train.shape[1], activation='tanh'))
# model.add(Dropout(0.5))  # Dropout dla ograniczenia przeuczenia
#
# # Druga ukryta warstwa
# model.add(Dense(32, activation='relu'))
# model.add(Dropout(0.5))
#
# model.add(Dense(32, activation='relu'))
# model.add(Dropout(0.5))
#
# model.add(Dense(1, activation='sigmoid'))
#
# model.compile(optimizer=Adam(learning_rate=0.003),
#               loss='binary_crossentropy',
#               metrics=['accuracy'])
#
# history = model.fit(X_train, y_train,
#                     validation_split=0.2,
#                     epochs=40,
#                     batch_size=32,
#                     verbose=1)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Accuracy on test set: 0.646603611349957
              precision    recall  f1-score   support

           0       0.66      0.45      0.54       530
           1       0.64      0.81      0.71       633

    accuracy                           0.65      1163
   macro avg       0.65      0.63      0.63      1163
weighted avg       0.65      0.65      0.63      1163



In [29]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
# pca, bez ogolnych kolumn
# # Warstwa wejściowa i pierwsza ukryta warstwa
# model.add(Dense(128, input_dim=X_train.shape[1], activation='tanh'))
# model.add(Dropout(0.4))  # Dropout dla ograniczenia przeuczenia
#
# # Druga ukryta warstwa
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.4))
#
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.4))
#
# model.add(Dense(1, activation='sigmoid'))
#
# model.compile(optimizer=Adam(learning_rate=0.002),
#               loss='binary_crossentropy',
#               metrics=['accuracy'])
#
# history = model.fit(X_train, y_train,
#                     validation_split=0.2,
#                     epochs=40,
#                     batch_size=32,
#                     verbose=1)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Accuracy on test set: 0.6311263972484953
              precision    recall  f1-score   support

           0       0.59      0.63      0.61       530
           1       0.67      0.63      0.65       633

    accuracy                           0.63      1163
   macro avg       0.63      0.63      0.63      1163
weighted avg       0.63      0.63      0.63      1163



In [41]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
# pca, bez szczegolowych kolumn 1
# model.add(Dense(128, input_dim=X_train.shape[1], activation='tanh'))
# model.add(Dropout(0.4))  # Dropout dla ograniczenia przeuczenia
#
# # Druga ukryta warstwa
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.4))
#
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.4))
#
# model.add(Dense(1, activation='sigmoid'))
#
# model.compile(optimizer=Adam(learning_rate=0.002),
#               loss='binary_crossentropy',
#               metrics=['accuracy'])
#
# history = model.fit(X_train, y_train,
#                     validation_split=0.2,
#                     epochs=40,
#                     batch_size=32,
#                     verbose=1)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Accuracy on test set: 0.6251074806534824
              precision    recall  f1-score   support

           0       0.70      0.31      0.43       530
           1       0.61      0.89      0.72       633

    accuracy                           0.63      1163
   macro avg       0.65      0.60      0.57      1163
weighted avg       0.65      0.63      0.59      1163



In [5]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
# bez pca, bez ogólnych kolumn
# model.add(Dense(128, input_dim=X_train.shape[1], activation='tanh'))
# model.add(Dropout(0.3))  # Dropout dla ograniczenia przeuczenia
#
# # Druga ukryta warstwa
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.2))

#model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.4))

# model.add(Dense(1, activation='sigmoid'))
#
# model.compile(optimizer=Adam(learning_rate=0.001),
#               loss='binary_crossentropy',
#               metrics=['accuracy'])
#
# history = model.fit(X_train, y_train,
#                     validation_split=0.2,
#                     epochs=40,
#                     batch_size=32,
#                     verbose=1)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Accuracy on test set: 0.6173688736027515
              precision    recall  f1-score   support

           0       0.60      0.48      0.53       530
           1       0.63      0.73      0.68       633

    accuracy                           0.62      1163
   macro avg       0.61      0.61      0.60      1163
weighted avg       0.61      0.62      0.61      1163



In [10]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
# pca, bez ogólnych kolumn
# model.add(Dense(128, input_dim=X_train.shape[1], activation='tanh'))
# model.add(Dropout(0.3))  # Dropout dla ograniczenia przeuczenia
#
#
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.2))
#

#
# model.add(Dense(1, activation='sigmoid'))
#
# model.compile(optimizer=Adam(learning_rate=0.001),
#               loss='binary_crossentropy',
#               metrics=['accuracy'])
#
# history = model.fit(X_train, y_train,
#                     validation_split=0.2,
#                     epochs=40,
#                     batch_size=32,
#                     verbose=1)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Accuracy on test set: 0.587274290627687
              precision    recall  f1-score   support

           0       0.56      0.43      0.49       530
           1       0.60      0.72      0.65       633

    accuracy                           0.59      1163
   macro avg       0.58      0.57      0.57      1163
weighted avg       0.58      0.59      0.58      1163



In [20]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
# pca, ogolne kolumny bez szczegolowych
# model.add(Dense(128, input_dim=X_train.shape[1], activation='tanh'))
# model.add(Dropout(0.3))  # Dropout dla ograniczenia przeuczenia
#
#
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.2))
#
#
# model.add(Dense(1, activation='sigmoid'))
#
# model.compile(optimizer=Adam(learning_rate=0.001),
#               loss='binary_crossentropy',
#               metrics=['accuracy'])
#
# history = model.fit(X_train, y_train,
#                     validation_split=0.2,
#                     epochs=50,
#                     batch_size=32,
#                     verbose=1)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Accuracy on test set: 0.58469475494411
              precision    recall  f1-score   support

           0       0.57      0.38      0.45       530
           1       0.59      0.76      0.66       633

    accuracy                           0.58      1163
   macro avg       0.58      0.57      0.56      1163
weighted avg       0.58      0.58      0.57      1163



In [16]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print("Accuracy on test set:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# model.add(Dense(128, input_dim=X_train.shape[1], activation='tanh'))
# model.add(Dropout(0.3))  # Dropout dla ograniczenia przeuczenia
#
# # Druga ukryta warstwa
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.2))
#
# model.add(Dense(1, activation='sigmoid'))
#
# model.compile(optimizer=Adam(learning_rate=0.001),
#               loss='binary_crossentropy',
#               metrics=['accuracy'])
#
# history = model.fit(X_train, y_train,
#                     validation_split=0.2,
#                     epochs=50,
#                     batch_size=32,
#                     verbose=1)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy on test set: 0.5958727429062769
              precision    recall  f1-score   support

           0       0.57      0.45      0.51       530
           1       0.61      0.72      0.66       633

    accuracy                           0.60      1163
   macro avg       0.59      0.58      0.58      1163
weighted avg       0.59      0.60      0.59      1163



In [None]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Przewidywane klasy")
plt.ylabel("Rzeczywiste klasy")
plt.title("Macierz pomyłek dla regresji logistycznej")
plt.savefig("graphs/confusion_matrix_rl.png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
with open('Models/LR_model.pkl', 'wb') as file:
    pickle.dump(model, file)