## Random Forest with GridSearchCV hyperparemeter tuning and no regularization

In [4]:
# Get user input for the week number to predict
num_week = int(input("Enter the week to predict: "))

import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.impute import SimpleImputer

# Load your dataset
data = pd.read_csv("datasets/weekly_scoring.csv")

# Preprocessing
data = data[data['POS'] == 'qb']
data = data.drop(columns=['POS RANK', 'POS', 'MISC G', 'MISC ROST', 'MISC FPTS/G', 'RECEIVING REC', 'RECEIVING TGT', 'RECEIVING YDS', 'RECEIVING Y/R',
 'RECEIVING LG', 'RECEIVING 20+', 'RECEIVING TD', 'RUSHING Y/A', 'RUSHING LG',
 'RUSHING 20+'])
data = pd.get_dummies(data, columns=['PLAYER'], drop_first=True)

# Identify columns with missing values before imputation
columns_with_missing = data.columns[data.isnull().any()].tolist()

# Impute missing values with the mean of each column
imputer = SimpleImputer(strategy='mean')
data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Define the list of variables to predict
var_list = ['PASSING CMP', 'PASSING ATT', 'PASSING PCT', 'PASSING YDS', 'PASSING Y/A', 'PASSING TD', 'PASSING INT',
            'PASSING SACKS', 'RUSHING ATT', 'RUSHING YDS', 'RUSHING TD', 'MISC FL', 'MISC FPTS', 'WEEK']

# Separate the dataset into features (X) and the target variable (y)
X = data.drop(var_list, axis=1)
y = data['MISC FPTS']

# Save a copy of the dataset
df = data

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(RandomForestRegressor(), param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Fit the model with the best hyperparameters
best_rf_model = grid_search.best_estimator_
best_rf_model.fit(X, y)

# Get a list of unique player names after one-hot encoding
unique_players = X.columns

# Create a list of dictionaries to store the results
results_list = []

for player in unique_players:
    # Create a DataFrame with all zeros
    predict_data = pd.DataFrame(0, index=range(1), columns=X.columns)
    # Set the corresponding player's column to 1 for prediction
    predict_data[player] = 1
    # Make a prediction for the player
    misc_fpts_prediction = best_rf_model.predict(predict_data)
    results_list.append({'Player': player, 'MISC FPTS': misc_fpts_prediction[0]})

# Convert the list of dictionaries into a DataFrame
results_df = pd.DataFrame(results_list)


# Save the results to a CSV file
file_name = f"predictions/RFweek{num_week}.csv"
results_df.to_csv(file_name, index=False)

results_df.head(10)

Unnamed: 0,Player,MISC FPTS
0,PLAYER_Aaron Rodgers (NYJ),17.991356
1,PLAYER_Adam Froman (ATL),1.008681
2,PLAYER_Aidan O'Connell (LV),1.928071
3,PLAYER_Alex McGough (GB),0.693268
4,PLAYER_Alex Smith (FA),11.243885
5,PLAYER_Alex Tanney (FA),2.226974
6,PLAYER_Andrew Luck (FA),18.960851
7,PLAYER_Andy Dalton (CAR),13.236663
8,PLAYER_Anthony Brown (FA),2.226974
9,PLAYER_Anthony Richardson (IND),7.80026
