Linear Regression

In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Read In CSV
df = pd.read_csv('2024Weeks1-2.csv')
teamID_df = pd.read_csv('TeamID.csv')
teamDict = teamID_df.to_dict('records')

# Function to get Team Names from ID
def getTeamName(id):
    temp = []
    for i in id:
        count = 0
        for j in teamDict:
            if count == i - 1:
                temp.append(j['Team'])
            count = count + 1
    return temp

# Split into past and present
dfTrain = df.head(16).copy()
dfPredict = df.tail(len(df)-16).copy()

# Step 1: Infer the Win/Loss outcome for the home team (1 = win, 0 = loss)
dfTrain.loc[:, 'Home Win'] = dfTrain['Home Total Score'] > dfTrain['Away Total Score']
# Step 2: Select relevant features (quarterly scores, home/away teams, etc.)
features = ['Home ML', 'Away ML', 'Home Team ID', 'Away Team ID']

dfTrain = dfTrain[features + ['Home Win']].dropna()

# Step 3: Define features (X) and target (y)
X = dfTrain.drop(columns=['Home Win'])
y = dfTrain['Home Win'].astype(int)
# Step 4: Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Step 5: Initialize and train types of models
model = LogisticRegression()
model.fit(X_train, y_train)
modelRF = RandomForestClassifier(n_estimators=100, random_state = 42)
modelRF.fit(X_train, y_train)
modelGB = GradientBoostingClassifier(n_estimators=100, random_state = 42)
modelGB.fit(X_train, y_train)

# Step 6: Make predictions on the test set
y_pred = model.predict(X_test)
y_predRF = modelRF.predict(X_test)
y_predGB = modelGB.predict(X_test)

# Step 7: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

accuracyRF = accuracy_score(y_test, y_predRF)
reportRF = classification_report(y_test, y_predRF)

accuracyGB = accuracy_score(y_test, y_predGB)
reportGB = classification_report(y_test, y_predGB)

# Model Eval Output
print(f"Accuracy: {accuracy}")
print("Classification Report LR:")
print(report)

print(f"Accuracy: {accuracyRF}")
print("Classification Report RF:")
print(reportRF)

print(f"Accuracy: {accuracyGB}")
print("Classification Report GB:")
print(reportGB)

#Predict Next Week Games
dfPredict = dfPredict[features].dropna()

X_next_games = dfPredict[features]

predictions = model.predict(X_next_games)
predictionsRF = modelRF.predict(X_next_games)
predictionsGB = modelGB.predict(X_next_games)

#Prediction Output
prediction_results = pd.DataFrame({
    'Home Team': getTeamName(dfPredict['Home Team ID']),
    'Away Team': getTeamName(dfPredict['Away Team ID']),
    'Predicted Home Win LR': predictions,
    'Predicted Home Win RF': predictionsRF,
    'Predicted Home Win GB': predictionsGB
})

print("Predictions for Next Week's Games:")
prediction_results.to_csv('2024Week2Predictions.csv')

Accuracy: 0.6
Classification Report LR:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.75      0.75      0.75         4

    accuracy                           0.60         5
   macro avg       0.38      0.38      0.38         5
weighted avg       0.60      0.60      0.60         5

Accuracy: 0.4
Classification Report RF:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.67      0.50      0.57         4

    accuracy                           0.40         5
   macro avg       0.33      0.25      0.29         5
weighted avg       0.53      0.40      0.46         5

Accuracy: 0.8
Classification Report GB:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.80      1.00      0.89         4

    accuracy                           0.80         5
   macro

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
