Load in data

In [59]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv('no_null_df.csv')

# Drop 'Unnamed: 0' index column

df = df.drop(columns=['Unnamed: 0'])

Separate String Variables that can't be used for Linear Regression. Separate target variable PPR/G 

In [60]:
# Separate String variables that can't be used for Linear Regression

string_variables = ['Player', 'Tm', 'Season', 'PosRank']
X_string_features = df[string_variables]

# Separate target variable PPR/G

y = df['PPR/G']

# Collect data for all variables being used in Linear Regression

X = df.drop(columns=string_variables)
X = X.drop(columns=['PPR/G'])

print(X.dtypes)

Age                         int64
PPR/G_prev                float64
avg_depth_of_target       float64
caught_percent            float64
grades_offense            float64
grades_pass_route         float64
route_rate                float64
slot_rate                 float64
wide_rate                 float64
inline_rate               float64
yprr                      float64
man_grades_pass_route     float64
zone_grades_pass_route    float64
grades_pass               float64
pbe                       float64
Pass Plays/G              float64
Receptions/G              float64
Routes/G                  float64
Targets/G                 float64
TD/G                      float64
YDS/G                     float64
YAC/G                     float64
RZ Targets/G              float64
P_Att/G                   float64
P_Cmp/G                   float64
P_TD/G                    float64
P_Yds/G                   float64
dtype: object


In [61]:
# Standardize data

scaler = StandardScaler()


X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized, columns=X.columns)
X_standardized

# Combine back with df_string_features

X_combined = pd.concat([X_standardized, X_string_features.reset_index(drop=True)], axis=1)

Perform Linear Regression

In [62]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_standardized, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

Mean Squared Error: 11.53827389322356
R^2 Score: 0.6630401617010593


Analyze Results

In [63]:
# Combine predictions with string features for further analysis
results_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
#results_with_strings = pd.concat([results_df, X_string_features.loc[X_test.index].reset_index(drop=True)], axis=1)

print(results_df)

# Get the coefficients (weights) and intercept
coefficients = model.coef_

# Create a DataFrame to display feature names and their corresponding coefficients
feature_names = X_standardized.columns
coefficients_df = pd.DataFrame({
    'Feature': feature_names,
    'Coefficient': coefficients
})

print(coefficients_df)

        Actual  Predicted
521   6.800000   6.313697
941   6.176471  11.124057
741   3.542857   5.098772
980   2.511111   4.122518
411   2.700000   2.996341
..         ...        ...
332  13.568750  15.501992
208  16.337500  12.644738
992   1.436364   2.855296
78    3.388889   6.259422
29   19.166667  14.042999

[201 rows x 2 columns]
                   Feature  Coefficient
0                      Age    -0.620083
1               PPR/G_prev     0.741687
2      avg_depth_of_target     0.013502
3           caught_percent    -0.256697
4           grades_offense    -1.512350
5        grades_pass_route     2.844687
6               route_rate     0.109216
7                slot_rate    -0.311258
8                wide_rate    -0.225327
9              inline_rate     0.104785
10                    yprr    -0.496305
11   man_grades_pass_route    -0.152096
12  zone_grades_pass_route    -0.464664
13             grades_pass     0.277483
14                     pbe     0.140150
15            Pass Plays