In [None]:
import re
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

# Check if GPU is available
physical_devices = tf.config.list_physical_devices('GPU')
if not physical_devices:
    raise RuntimeError("GPU device not found.")
else:
    print("GPU found. Using GPU for computations.")

positions = ["qb", "rb", "wr", "te"]
positions = ["qb"]

for pos in positions:
    print("Position: " + pos)
    # Load your dataset
    data = pd.read_csv("datasets/weekly_scoring.csv")

    # Preprocessing
    data = data[data['POS'] == pos]

    # Drop Zero values (bye weeks, injuries)
    data = data[data['MISC FPTS'] != 0]
    weights = data['WEIGHT']

    # Instead of dropping zero values, substitute them with the mean (including zeroes) for everyone
    # column_to_replace = 'MISC FPTS'
    # mean_value = data[column_to_replace].mean()
    # data[column_to_replace] = data[column_to_replace].replace(0, mean_value)

    # Replace zero values with the mean specific to each player
    # player_means = data.groupby('PLAYER', group_keys=True)[column_to_replace].max()
    # data[column_to_replace] = data.groupby('PLAYER')[column_to_replace].apply(lambda x: x.replace(0, x.mean()))

    if pos == 'qb':
        # Define the list of variables to predict
        columns_to_predict = ['MISC FPTS']
        look_back = 15
    if pos == 'rb':
        columns_to_predict = ['MISC FPTS', 'MAX_FPTS']
        look_back = 16
    if pos == 'wr':
        columns_to_predict = ['MISC FPTS', 'AVG_FPTS', 'RECEIVING YDS', 'RECEIVING TD']
        look_back = 14
    if pos == 'te':
        columns_to_predict = ['MISC FPTS', 'AVG_FPTS']
        look_back = 20

    # Sort the data by the date column
    date_column = "CONTINUOUS_DATE"
    data[date_column] = pd.to_datetime(data[date_column])
    data = data.sort_values(by=date_column)

    # Extract the relevant columns for training
    training_data = data[columns_to_predict].values

    # Impute missing values for training_data
    imputer = SimpleImputer(strategy='mean')
    training_data = imputer.fit_transform(training_data)

    # Normalize the data using Min-Max scaling
    scaler = MinMaxScaler(feature_range=(0, 1))
    training_data_scaled = scaler.fit_transform(training_data)

    # Define a function to create LSTM datasets
    def create_lstm_dataset(dataset, look_back=1):
        X, Y = [], []
        for i in range(len(dataset) - look_back):
            X.append(dataset[i:(i + look_back)])
            Y.append(dataset[i + look_back])
        return np.array(X), np.array(Y)

    # Create the LSTM dataset
    X, Y = create_lstm_dataset(training_data_scaled, look_back)

    # Split the data into training and testing sets
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    Y_train, Y_test = Y[:train_size], Y[train_size:]

    # Reshape the input data for LSTM (samples, time steps, features)
    X_train = np.reshape(X_train, (X_train.shape[0], look_back, X_train.shape[2]))
    X_test = np.reshape(X_test, (X_test.shape[0], look_back, X_test.shape[2]))

    # Build the LSTM model with cuDNN optimization for GPU
    model = Sequential()
    model.add(LSTM(units=50, input_shape=(X_train.shape[1], X_train.shape[2]), implementation=2))  # 'implementation=2' enables cuDNN
    model.add(Dense(units=len(columns_to_predict)))
    model.compile(optimizer='adam', loss='mse')

    # Train the model
    model.fit(X_train, Y_train, epochs=100, batch_size=32, validation_data=(X_test, Y_test), verbose=0)

    # Make predictions on the test set
    predictions = model.predict(X_test)

    # Rescale the predictions to the original scale
    predictions_rescaled = scaler.inverse_transform(predictions)
    Y_test_rescaled = scaler.inverse_transform(Y_test)

    # Evaluate the model
    mse = mean_squared_error(Y_test_rescaled, predictions_rescaled)
    print(f'Mean Squared Error {pos}: {mse}')

    # Mean Absolute Error (MAE)
    mae = mean_absolute_error(Y_test_rescaled, predictions_rescaled)
    print(f"MAE {pos}: {mae}")

    # R-squared
    r_squared = r2_score(Y_test_rescaled, predictions_rescaled)
    print(f"R-squared {pos}: {r_squared}")

    # Create a DataFrame to store the results
    results_list = []

    # Create a DataFrame with 'PLAYER' and predicted values
    result_df = pd.DataFrame({'PLAYER': data['PLAYER'].iloc[train_size + look_back:], 'Predicted_FPTS': predictions_rescaled[:, 0]})

    # Group by 'PLAYER' and calculate the average predicted FPTS
    result_df = result_df.groupby('PLAYER').mean().reset_index()
    result_df = result_df.sort_values(by='Predicted_FPTS', ascending=False)

    # Add the 'TEAM' column if needed (replace with your own logic)
    pattern = r'\((.*?)\)'
    result_df['TEAM'] = result_df['PLAYER'].apply(lambda x: re.search(pattern, x).group(1) if re.search(pattern, x) else pd.NA)

    # Remove any player with the team equal to 'FA'
    final_results_df = result_df.query("TEAM != 'FA'")

    # Save the results to a CSV file
    file_name = f"predictions/LSTM_predictions_{pos}.csv"
    final_results_df.to_csv(file_name, index=False)
