Linear Regression

In [134]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Read In CSV
df = pd.read_csv('2024Weeks1-2.csv')
teamID_df = pd.read_csv('TeamID.csv')
teamDict = teamID_df.to_dict('records')

# Function to get Team Names from ID
def getTeamName(id):
    temp = []
    for i in id:
        count = 0
        for j in teamDict:
            if count == i - 1:
                temp.append(j['Team'])
            count = count + 1
    return temp

# Split into past and present
dfTrain = df.head(16)
dfPredict = df.tail(len(df)-16)

# Step 1: Infer the Win/Loss outcome for the home team (1 = win, 0 = loss)
dfTrain['Home Win'] = dfTrain['Home Total Score'] > dfTrain['Away Total Score']
# Step 2: Select relevant features (quarterly scores, home/away teams, etc.)
features = ['Home ML', 'Away ML', 'Home Team ID', 'Away Team ID']

dfTrain = dfTrain[features + ['Home Win']].dropna()
#label_encoder = LabelEncoder()
#df['Home Team'] = label_encoder.fit_transform(df['Home Team'])
#df['Away Team'] = label_encoder.fit_transform(df['Away Team'])

# Step 3: Define features (X) and target (y)
X = dfTrain.drop(columns=['Home Win'])
y = dfTrain['Home Win'].astype(int)
# Step 4: Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Initialize and train a Logistic Regression model (linear regression for classification)
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 6: Make predictions on the test set
y_pred = model.predict(X_test)

# Step 7: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Model Eval Output
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

results = pd.DataFrame({
    'Home Team': getTeamName(X_test['Home Team ID']),
    'Away Team': getTeamName(X_test['Away Team ID']),
    'Actual': y_test,
    'Predicted': y_pred
})
print("Training Results:")
print(results)

#Predict Next Week Games
dfPredict = dfPredict[features].dropna()

X_next_games = dfPredict[features]

predictions = model.predict(X_next_games)

#Prediction Output
prediction_results = pd.DataFrame({
    'Home Team': getTeamName(dfPredict['Home Team ID']),
    'Away Team': getTeamName(dfPredict['Away Team ID']),
    'Predicted Home Win': predictions
})

print("Predictions for Next Week's Games:")
print(prediction_results)

Accuracy: 0.5
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.67      0.67      0.67         3

    accuracy                           0.50         4
   macro avg       0.33      0.33      0.33         4
weighted avg       0.50      0.50      0.50         4

Training Results:
              Home Team             Away Team  Actual  Predicted
0    Kansas City Chiefs      Baltimore Ravens       1          1
1   Philadelphia Eagles     Green Bay Packers       1          1
5    Cincinnati Bengals  New England Patriots       0          1
14        Detroit Lions      Los Angeles Rams       1          0
Predictions for Next Week's Games:
                Home Team             Away Team  Predicted Home Win
0          Miami Dolphins         Buffalo Bills                   1
1          Dallas Cowboys    New Orleans Saints                   1
2           Detroit Lions  Tampa Bay Buccaneers       

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfTrain['Home Win'] = dfTrain['Home Total Score'] > dfTrain['Away Total Score']
