In [None]:
import pandas as pd
import sqlite3
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', 40)

In [None]:
con = sqlite3.connect('data/transformed/team_moving_avgs_merged.sqlite')
team_last_20 = pd.read_sql_query(f"SELECT * FROM \"{'team_last_20'}\"", con)
team_last_30 = pd.read_sql_query(f"SELECT * FROM \"{'team_last_30'}\"", con)
team_last_40 = pd.read_sql_query(f"SELECT * FROM \"{'team_last_40'}\"", con)
team_all_season = pd.read_sql_query(f"SELECT * FROM \"{'team_all_season'}\"", con)
con.close()

team_last_20['Date'] = pd.to_datetime(team_last_20['Date'])
team_last_30['Date'] = pd.to_datetime(team_last_30['Date'])
team_last_40['Date'] = pd.to_datetime(team_last_40['Date'])
team_all_season['Date'] = pd.to_datetime(team_all_season['Date'])

team_last_30.columns

In [None]:
team_all_season[team_all_season['home_win']==1].shape[0] / team_all_season.shape[0]

In [None]:
def prepare_data(data):
    data = data.dropna(how='any') #axis=1,
    threshold_date = '2022-10-01'
    totals_columns = ['home_FG', 'home_FGA', 'home_FG%', 'home_3P', 'home_3PA',
       'home_3P%', 'home_FT', 'home_FTA', 'home_FT%', 'home_ORB', 'home_DRB',
       'home_TRB', 'home_AST', 'home_STL', 'home_BLK', 'home_TOV', 'home_PF',
       'home_PTS',  'home_TS%', 'away_FG', 'away_FGA',
       'away_FG%', 'away_3P', 'away_3PA', 'away_3P%', 'away_FT', 'away_FTA',
       'away_FT%', 'away_ORB', 'away_DRB', 'away_TRB', 'away_AST', 'away_STL',
       'away_BLK', 'away_TOV', 'away_PF', 'away_PTS', 'away_TS%']

    prc_columns = ['home_TS%', 'home_eFG%', 'home_3PAr', 'home_FTr',
       'home_ORB%', 'home_DRB%', 'home_TRB%', 'home_AST%', 'home_STL%',
       'home_BLK%', 'home_TOV%', 'home_ORtg', 'home_DRtg', 'away_TS%', 'away_eFG%',
       'away_3PAr', 'away_FTr', 'away_ORB%', 'away_DRB%', 'away_TRB%',
       'away_AST%', 'away_STL%', 'away_BLK%', 'away_TOV%', 'away_ORtg',
       'away_DRtg']
    X_train = data[data['Date'] < threshold_date]\
        .drop(columns=['game_id', 'Date', 'Season',
                       'home_team', 'home_win',
                       'away_team']) \
        .drop(columns=totals_columns)

    X_test = data[data['Date'] > threshold_date]\
        .drop(columns=['game_id', 'Date', 'Season',
                       'home_team', 'home_win',
                       'away_team'])\
        .drop(columns=totals_columns)

    y_train = data[data['Date'] < threshold_date].loc[:,'home_win']
    y_test = data[data['Date'] > threshold_date].loc[:,'home_win']

    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    pca = PCA()
    X_train_pca = pca.fit_transform(X_train_scaled)
    X_test_pca = pca.transform(X_test_scaled)

    return X_train_pca, X_test_pca, y_train, y_test

X_train, X_test, y_train, y_test = prepare_data(team_last_20)

In [None]:
X_train.shape[0] + X_test.shape[0]

In [None]:
X_train.shape[0] / (X_train.shape[0] + X_test.shape[0])

In [None]:
model = Sequential()

model.add(Dense(40, input_dim=X_train.shape[1], activation='tanh'))
model.add(Dropout(0.2))

model.add(Dense(15, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer=Adam(learning_rate=0.003),
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train,
                    validation_split=0.2,
                    epochs=40,
                    batch_size=32,
                    verbose=1)

In [None]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
accuracy = accuracy_score(y_test, y_pred)
print(f"Dokładność modelu: {accuracy * 100:.2f}%")
print(classification_report(y_test, y_pred))

In [None]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
accuracy = accuracy_score(y_test, y_pred)
print(f"Dokładność modelu: {accuracy * 100:.2f}%")
print(classification_report(y_test, y_pred))

In [None]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Przewidywane klasy")
plt.ylabel("Rzeczywiste klasy")
plt.title("Macierz pomyłek dla MLP")
plt.savefig("graphs/confusion_matrix_nn.png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
with open('Models/LR_model.pkl', 'wb') as file:
    pickle.dump(model, file)