In [None]:
import pandas as pd

# Train with 2022-2023 data, Test with 2023-2024 data
df_22 = pd.read_csv('2022playerstats_QB.csv')
df_23 = pd.read_csv('2023playerstats_QB.csv')
df_24 = pd.read_csv('2024playerstats_QB.csv')
print(df_22.head())
# print(df_22.info())
print(df_23.head())
# print(df_23.info())
print(df_24.head())
# print(df_24.info())

# QB model:
- pass attempts per game
- passing yds per game
- passing tds per game
- interceptions per game
- rush yds per game
- rush tds per game

In [None]:
def compute_features(df):
    # Compute the features needed
    df['PassAttPerGame'] = df['PassAtt'] / df['GamesPlayed'].replace(0, pd.NA)
    df['PassAttPerGame'] = df['PassAttPerGame'].fillna(0)

    df['PassYdsPerGame'] = df['PassYds'] / df['GamesPlayed'].replace(0, pd.NA)
    df['PassYdsPerGame'] = df['PassYdsPerGame'].fillna(0)

    df['PassTDsPerGame'] = df['PassTD'] / df['GamesPlayed'].replace(0, pd.NA)
    df['PassTDsPerGame'] = df['PassTDsPerGame'].fillna(0)

    df['PassIntPerGame'] = df['PassInt'] / df['GamesPlayed'].replace(0, pd.NA)
    df['PassIntPerGame'] = df['PassIntPerGame'].fillna(0)

    df['RushYdsPerGame'] = df['RushYds'] / df['GamesPlayed'].replace(0, pd.NA)
    df['RushYdsPerGame'] = df['RushYdsPerGame'].fillna(0)

    df['RushTDsPerGame'] = df['RushTD'] / df['GamesPlayed'].replace(0, pd.NA)
    df['RushTDsPerGame'] = df['RushTDsPerGame'].fillna(0)

In [None]:
compute_features(df_22)
compute_features(df_23)

print(df_22.head())
print(df_23.head())

In [None]:
# Keep only the features we want
features = [
    'PassAttPerGame',
    'PassYdsPerGame',
    'PassTDsPerGame',
    'PassIntPerGame',
    'RushYdsPerGame',
    'RushTDsPerGame'
]

# Merge the data frames to remove players that are not in both
df_22_23_merged = df_22.merge(df_23, on='Player', how='inner', suffixes=('', '_23'))
df_23_24_merged = df_23.merge(df_24, on='Player', how='inner', suffixes=('', '_24'))

y_test = df_23_24_merged['FantasyPtsPPR_24']
y_train = df_22_23_merged['FantasyPtsPPR_23']
X_test = df_23_24_merged[features]
X_train = df_22_23_merged[features]

print(y_train.head())
print(X_train.head())
print(y_test.head())
print(X_test.head())

In [None]:
# Normalize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X_train[:5])
print(X_test[:5])

In [27]:
# Train and evaluate
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

train_preds = model.predict(X_train)
preds = model.predict(X_test)
from sklearn.metrics import root_mean_squared_error, r2_score, mean_absolute_error
print("Training RMSE:", root_mean_squared_error(y_train, train_preds))
print("Training R^2:", r2_score(y_train, train_preds))
print("Training MAE:", mean_absolute_error(y_train, train_preds))
print("Testing RMSE:", root_mean_squared_error(y_test, preds))
print("Testing R^2:", r2_score(y_test, preds))
print("Testing MAE:", mean_absolute_error(y_test, preds))

Training RMSE: 88.4336996362048
Training R^2: 0.4324899853720058
Training MAE: 71.62223895711469
Testing RMSE: 100.52369007353049
Testing R^2: 0.2947701279509122
Testing MAE: 77.75720657749756


In [None]:
print(preds[:5])
print(y_test.head())

In [None]:
# Model coefficients
print("Intercept:", model.intercept_)
print("Feature Coefficients:")
for feat, coef in zip(features, model.coef_):
    print(f"{feat}: {coef:.4f}")

In [None]:
# actual - 1d data frame
# predicted - 1d numpy array
# player - 1d data frame that matches the predictions to the player
# display - T/F for displaying a preview of the results
def compute_rank_squared_error(actual, predicted, player, display):
    results = pd.DataFrame({
        'Player': player,
        'Actual': actual.values,
        'Predicted': predicted
    }, index=actual.index)

    results['ActualRank'] = results['Actual'].rank(ascending=False, method='min').astype(int)
    results['PredictedRank'] = results['Predicted'].rank(ascending=False, method='min').astype(int)

    results['RankError'] = results['PredictedRank'] - results['ActualRank']

    rank_squared_error = (results['RankError'] ** 2).sum()

    if display:
        print(results.head())

    return rank_squared_error

In [None]:
rank_squared_error = compute_rank_squared_error(y_test, preds, df_23_24_merged['Player'], True)

print("2024 Rank Squared Error:", rank_squared_error)
# print(results.head())

In [None]:
results = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': preds
}, index=y_test.index)

# Compute the rankings
results['ActualRank'] = results['Actual'].rank(ascending=False, method='min').astype(int)
results['PredictedRank'] = results['Predicted'].rank(ascending=False, method='min').astype(int)

results['RankError'] = (results['PredictedRank'] - results['ActualRank'])

print(results.head())
