Import csv

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR
from sklearn.datasets import make_classification
from sklearn.ensemble import StackingRegressor

In [2]:
# Assuming all_data and season_mapping are already loaded as DataFrames
all_data = pd.read_csv('all_data.csv')

In [3]:
all_data = all_data.drop(['team'], axis=1)

In [4]:
team_name_mapping = pd.read_csv('team_name_mapping.csv')  # Assume it has columns 'team_name' and 'encoded_team_name'
team_name_dict = dict(zip(team_name_mapping['original'], team_name_mapping['encoded']))
all_data['team_name'] = all_data['team_name'].map(team_name_dict)

In [5]:
all_data = all_data.dropna(axis=0)
all_data = all_data.drop(['name'],axis=1)

In [6]:
# Combine the features selected from Lasso, RFE, and Random Forest
selected_features = list(set([
    'minutes_rolling_5','opponent_team','element', 'ict_index_rolling_10', 'lagged_bps_rolling_5', 'creativity_rolling_5', 'bps_rolling_5', 'lagged_influence_rolling_5', 'value_rolling_5', 'bps_rolling_10', 'creativity_rolling_10', 'lagged_influence_rolling_10', 'lagged_total_points_rolling_10', 'lagged_creativity_rolling_10', 'lagged_ict_index_rolling_5', 'lagged_ict_index_rolling_10', 'lagged_total_points_rolling_5', 'lagged_value', 'influence_rolling_5', 'ict_index_rolling_5', 'threat_rolling_5', 'team_name', 'lagged_bps_rolling_10', 'lagged_minutes_rolling_5', 'lagged_value_rolling_5', 'influence_rolling_10'
]))

# Define X and y
X = all_data[selected_features]
y = all_data['total_points']


In [7]:
# Defining the seasons you're interested in
# train_seasons = ['2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22']
# test_season = ['2022-23']
train_seasons = ['2016-17', '2017-18']
test_season = ['2018-19']
# Training data
X_train = X[all_data['season'].isin(train_seasons)]
y_train = y[all_data['season'].isin(train_seasons)]

# Test data
X_test = X[all_data['season'].isin(test_season)]
y_test = y[all_data['season'].isin(test_season)]


In [8]:
print(all_data.shape)
print(all_data['season'].unique())
print(X_train.shape)
print(y_train.shape)


(159792, 66)
['2016-17' '2017-18' '2018-19' '2019-20' '2020-21' '2021-22' '2022-23']
(44362, 26)
(44362,)


Best parameters for Random Forest: ```{'bootstrap': True, 'max_depth': 9, 'max_features': 'sqrt', 'min_samples_leaf': 7, 'min_samples_split': 2, 'n_estimators': 450}```

Best parameters for Gradient Boosting: ```{'learning_rate': 0.1, 'max_depth': 3, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'min_samples_split': 3, 'n_estimators': 100, 'subsample': 0.9}```

Best parameters for XGBoost:
```{'colsample_bytree': 0.7, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 5, 'min_child_weight': 2, 'n_estimators': 400, 'subsample': 0.9}```

Activate tensyflow to use keras or tensorflow (not native to mac arm) <br>
```conda activate tensyflow```
<br>
```jupyter notebook \
  --NotebookApp.allow_origin='https://colab.research.google.com' \
  --port=8888 \
  --NotebookApp.port_retries=0
```

In [9]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
import numpy as np

AttributeError: ignored

Best parameters for Deep Learning model: <br>
```{'activation': 'relu', 'batch_size': 16, 'epochs': 100, 'neurons': 16, 'optimizer': 'Adam'}``` <br>
Mean Squared Error on the test set: 5.028358402767072 <br>
Root Mean Squared Error on the test set: 2.24240014332123

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping
import numpy as np

# Assume X_train, y_train, X_test, y_test are predefined
# X_train, y_train, X_test, y_test = your_data_here

# Data Preprocessing: Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Architecture: Define a function to create the model, required for KerasRegressor
def create_model(optimizer='adam', neurons=16, activation='relu', dropout_rate=0.0, num_layers=2):
    try:
        model = Sequential()
        model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation))
        model.add(BatchNormalization())
        model.add(Dropout(dropout_rate))

        for _ in range(num_layers):
            model.add(Dense(neurons, activation=activation))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

        model.add(Dense(1))
        model.compile(loss='mean_squared_error', optimizer=optimizer)
        return model
    except Exception as e:
        print(f"An error occurred while creating the model: {e}")
        return None

# Create the KerasRegressor model
try:
    model = KerasRegressor(build_fn=create_model, verbose=0)
except Exception as e:
    print(f"An error occurred while initializing KerasRegressor: {e}")

# Hyperparameter Tuning: Define the hyperparameter grid
param_grid_dl = {
    'batch_size': [8, 16],
    'epochs': [80, 100],
    'optimizer': ['Adam', 'SGD'],
    'neurons': [16, 32],
    'activation': ['relu', 'sigmoid'],
    'dropout_rate': [0.0, 0.1],
    'num_layers': [2, 3]  # Number of additional hidden layers in the model
}

# Initialize and fit GridSearchCV
early_stopping_monitor = EarlyStopping(patience=3)
try:
    grid_search_dl = GridSearchCV(estimator=model, param_grid=param_grid_dl, cv=3, n_jobs=-1, verbose=2)
    grid_search_dl.fit(X_train, y_train, callbacks=[early_stopping_monitor])
except Exception as e:
    print(f"An error occurred during GridSearchCV: {e}")

# Get the best estimator and parameters
try:
    best_dl = grid_search_dl.best_estimator_
    print("Best parameters for Deep Learning model:", grid_search_dl.best_params_)
except Exception as e:
    print(f"An error occurred while fetching the best estimator and parameters: {e}")

# Evaluate the model on the test set
try:
    y_pred_dl = best_dl.predict(X_test)
    mse_dl = mean_squared_error(y_test, y_pred_dl)
    rmse_dl = np.sqrt(mse_dl)
    print(f"Mean Squared Error on the test set: {mse_dl}")
    print(f"Root Mean Squared Error on the test set: {rmse_dl}")
except Exception as e:
    print(f"An error occurred during the evaluation of the model: {e}")
