## Linear Regression

Train

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# Create a DataFrame with your expanded dataset
data = pd.DataFrame({
    'Week': [1, 2, 1, 2],
    'Player': ['J', 'A', 'A', 'J'],
    'Pts': [15, 17, 19, 25],
    'Yds': [120, 80, 150, 100],  # Example Yds values
    'Tds': [2, 1, 3, 2]  # Example Tds values
})

# Convert categorical 'Player' variable into numerical using one-hot encoding
data = pd.get_dummies(data, columns=['Player'])

# Separate the dataset into features (X) and the target variable (y)
X = data.drop(['Pts', 'Yds', 'Tds'], axis=1)  # Keep 'Player' and 'Week' for prediction
y = data[['Pts', 'Yds', 'Tds']]  # Predict multiple columns

# Create and train a linear regression model for each target variable
models = {}
for target in y.columns:
    model = LinearRegression()
    model.fit(X, y[target])
    models[target] = model

data.head(10)


Unnamed: 0,Week,Pts,Yds,Tds,Player_A,Player_J
0,1,15,120,2,False,True
1,2,17,80,1,True,False
2,1,19,150,3,True,False
3,2,25,100,2,False,True


Predict

In [2]:

import pandas as pd
from sklearn.linear_model import LinearRegression

# Create a DataFrame with your expanded dataset
data = pd.DataFrame({
    'Week': [1, 2, 1, 2],
    'Player': ['J', 'B', 'B', 'J'],
    'Pts': [15, 17, 19, 25],
    'Yds': [120, 80, 150, 100],  # Example Yds values
    'Tds': [2, 1, 3, 2]  # Example Tds values
})

df = data
# Convert categorical 'Player' variable into numerical using one-hot encoding
data = pd.get_dummies(data, columns=['Player'], drop_first=False)

# Separate the dataset into features (X) and the target variable (y)
X = data.drop(['Pts', 'Yds', 'Tds'], axis=1)
y = data[['Pts', 'Yds', 'Tds']]

# Create and train a linear regression model for each player and each target variable
player_models = {}
for player in df['Player'].unique():
    player_data = data[data['Player_' + player] == 1]
    X_player = player_data.drop(['Pts', 'Yds', 'Tds'], axis=1)
    y_player = player_data[['Pts', 'Yds', 'Tds']]
    
    models = {}
    for target in y.columns:
        model = LinearRegression()
        model.fit(X_player, y_player[target])
        models[target] = model
    
    player_models[player] = models

# Now, you can make predictions for week 3 with the new features for each player
week3_data = pd.DataFrame({
    'Week': [3],
    'Player_B': [1],  # Set the corresponding player's column to 1, and others to 0 for prediction
    'Player_J': [0],  # Ensure the player columns match those from training data
})

predicted_data_week3 = {}
for player in df['Player'].unique():
    predictions = {}
    for target in y.columns:
        predictions[target] = player_models[player][target].predict(week3_data)[0]
    predicted_data_week3[player] = predictions

# Print the predictions for each player and each target variable
print("Predicted Data for Week 3:")
for player in df['Player'].unique():
    print(f"Player {player}:")
    for target, value in predicted_data_week3[player].items():
        print(f"{target}: {value:.2f}")



Predicted Data for Week 3:
Player J:
Pts: 35.00
Yds: 80.00
Tds: 2.00
Player B:
Pts: 15.00
Yds: 10.00
Tds: -1.00


## Random Forest with GridSearchCV hyperparemeter tuning and no regularization

In [7]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

# Create a DataFrame with your expanded dataset
data = pd.DataFrame({
    'Week': [1, 2, 1, 2],
    'Player': ['J', 'B', 'B', 'J'],
    'Pts': [15, 17, 19, 25],
    'Yds': [120, 80, 150, 100],  # Example Yds values
    'Tds': [2, 1, 3, 2]  # Example Tds values
})

df = data

# Convert categorical 'Player' variable into numerical using one-hot encoding
data = pd.get_dummies(data, columns=['Player'], drop_first=False)

# Separate the dataset into features (X) and the target variable (y)
X = data.drop(['Pts', 'Yds', 'Tds'], axis=1)
y = data[['Pts', 'Yds', 'Tds']]

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(RandomForestRegressor(), param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Fit the model with the best hyperparameters
best_rf_model = grid_search.best_estimator_
best_rf_model.fit(X, y)

# Now, you can make predictions for week 3 with the new features
week3_data = pd.DataFrame({
    'Week': [3],
    'Player_B': [1],  # Set the corresponding player's column to 1, and others to 0 for prediction
    'Player_J': [0]  # Ensure the player columns match those from training data
})

# Calculate the prediction for week 3
predicted_pts_week3 = best_rf_model.predict(week3_data)

print(df)
print(data)

# Print the predictions for each player
print("Predicted Points for Week 3:")
for player in df['Player'].unique():
    prediction = predicted_pts_week3[0][df['Player'].unique().tolist().index(f'Player_{player}')]
    print(f"Player {player}: {prediction:.2f}")


   Week Player  Pts  Yds  Tds
0     1      J   15  120    2
1     2      B   17   80    1
2     1      B   19  150    3
3     2      J   25  100    2
   Week  Pts  Yds  Tds  Player_B  Player_J
0     1   15  120    2     False      True
1     2   17   80    1      True     False
2     1   19  150    3      True     False
3     2   25  100    2     False      True
Predicted Points for Week 3:


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices