## Linear Regression

Train

In [16]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# Create a DataFrame with your expanded dataset
data = pd.DataFrame({
    'Week': [1, 2, 1, 2],
    'Player': ['J', 'A', 'A', 'J'],
    'Pts': [15, 17, 19, 25],
    'Yds': [120, 80, 150, 100],  # Example Yds values
    'Tds': [2, 1, 3, 2]  # Example Tds values
})

# Convert categorical 'Player' variable into numerical using one-hot encoding
data = pd.get_dummies(data, columns=['Player'])

# Separate the dataset into features (X) and the target variable (y)
X = data.drop(['Pts', 'Yds', 'Tds'], axis=1)  # Keep 'Player' and 'Week' for prediction
y = data[['Pts', 'Yds', 'Tds']]  # Predict multiple columns

# Create and train a linear regression model for each target variable
models = {}
for target in y.columns:
    model = LinearRegression()
    model.fit(X, y[target])
    models[target] = model

data.head(10)


Unnamed: 0,Week,Pts,Yds,Tds,Player_A,Player_J
0,1,15,120,2,False,True
1,2,17,80,1,True,False
2,1,19,150,3,True,False
3,2,25,100,2,False,True


Predict

In [22]:

import pandas as pd
from sklearn.linear_model import LinearRegression

# Create a DataFrame with your expanded dataset
data = pd.DataFrame({
    'Week': [1, 2, 1, 2],
    'Player': ['J', 'B', 'B', 'J'],
    'Pts': [15, 17, 19, 25],
    'Yds': [120, 80, 150, 100],  # Example Yds values
    'Tds': [2, 1, 3, 2]  # Example Tds values
})

df = data
# Convert categorical 'Player' variable into numerical using one-hot encoding
data = pd.get_dummies(data, columns=['Player'], drop_first=False)

# Separate the dataset into features (X) and the target variable (y)
X = data.drop(['Pts', 'Yds', 'Tds'], axis=1)
y = data[['Pts', 'Yds', 'Tds']]

# Create and train a linear regression model for each player and each target variable
player_models = {}
for player in df['Player'].unique():
    player_data = data[data['Player_' + player] == 1]
    X_player = player_data.drop(['Pts', 'Yds', 'Tds'], axis=1)
    y_player = player_data[['Pts', 'Yds', 'Tds']]
    
    models = {}
    for target in y.columns:
        model = LinearRegression()
        model.fit(X_player, y_player[target])
        models[target] = model
    
    player_models[player] = models

# Now, you can make predictions for week 3 with the new features for each player
week3_data = pd.DataFrame({
    'Week': [3],
    'Player_B': [1],  # Set the corresponding player's column to 1, and others to 0 for prediction
    'Player_J': [0],  # Ensure the player columns match those from training data
})

predicted_data_week3 = {}
for player in df['Player'].unique():
    predictions = {}
    for target in y.columns:
        predictions[target] = player_models[player][target].predict(week3_data)[0]
    predicted_data_week3[player] = predictions

# Print the predictions for each player and each target variable
print("Predicted Data for Week 3:")
for player in df['Player'].unique():
    print(f"Player {player}:")
    for target, value in predicted_data_week3[player].items():
        print(f"{target}: {value:.2f}")



Predicted Data for Week 3:
Player J:
Pts: 35.00
Yds: 80.00
Tds: 2.00
Player B:
Pts: 15.00
Yds: 10.00
Tds: -1.00


## Linear Regression FULL

In [6]:
# Get user input for the week number to predict
num_week = int(input("Enter the week to predict: "))

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

# Load your dataset
data = pd.read_csv("datasets/weekly_scoring.csv")

# Preprocessing
data = data[data['POS'] == 'qb']
data = data.drop(columns=['POS RANK', 'POS', 'MISC G', 'MISC ROST', 'MISC FPTS/G', 'RECEIVING REC', 'RECEIVING TGT', 'RECEIVING YDS', 'RECEIVING Y/R',
 'RECEIVING LG', 'RECEIVING 20+', 'RECEIVING TD', 'RUSHING Y/A', 'RUSHING LG',
 'RUSHING 20+'])
data = pd.get_dummies(data, columns=['PLAYER'], drop_first=True)

# Identify columns with missing values before imputation
columns_with_missing = data.columns[data.isnull().any()].tolist()

# Impute missing values with the mean of each column
imputer = SimpleImputer(strategy='mean')
data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Define the list of variables to predict
var_list = ['PASSING CMP', 'PASSING ATT', 'PASSING PCT', 'PASSING YDS', 'PASSING Y/A', 'PASSING TD', 'PASSING INT',
            'PASSING SACKS', 'RUSHING ATT', 'RUSHING YDS', 'RUSHING TD', 'MISC FL', 'MISC FPTS', 'WEEK']

# Separate the dataset into features (X) and the target variable (y)
X = data.drop(var_list, axis=1)
y = data['MISC FPTS']

# Save a copy of the dataset
df = data

# Hyperparameter tuning using GridSearchCV for Linear Regression
param_grid = {
    'fit_intercept': [True, False]
}

grid_search = GridSearchCV(LinearRegression(), param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Fit the model with the best hyperparameters
best_lr_model = grid_search.best_estimator_
best_lr_model.fit(X, y)

# Get a list of unique player names after one-hot encoding
unique_players = X.columns

# Create a list of dictionaries to store the results
results_list = []

for player in unique_players:
    # Create a DataFrame with all zeros
    week6_data = pd.DataFrame(0, index=range(1), columns=X.columns)
    # Set the corresponding player's column to 1 for prediction
    week6_data[player] = 1
    # Make a prediction for the player
    misc_fpts_prediction = best_lr_model.predict(week6_data)
    results_list.append({'Player': player, 'MISC FPTS': misc_fpts_prediction[0]})

# Convert the list of dictionaries into a DataFrame
results_df = pd.DataFrame(results_list)

results_df.head(10)

# Save the results to a CSV file
file_name = f"datasets/LRweek{num_week}.csv"
results_df.to_csv(file_name, index=False)
