<a href="https://colab.research.google.com/github/mnbeebe/MLB/blob/main/LinearModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

In [48]:
class LinearRegression():
    def __init__(self, learningRate=0.01, iterations=1000):
        self.learningRate = learningRate
        self.iterations = iterations

    #Normalizes each feature to have a mean of 0 and stdev of 1
    def normalize(self, X):
        return (X - np.mean(X, axis=0)) / np.std(X, axis=0)

    #Initializes equation of line to zero
    def fit(self, X, Y):
        self.rows, self.columns = X.shape
        self.weights = pd.DataFrame(np.zeros(self.columns))
        self.bias = 0
        self.X = self.normalize(X)
        self.Y = Y

        for i in range(self.iterations):
            self.updateWeights()

    #Uses gradient descent to adjust line
    def updateWeights(self):
        Y_pred = self.predict(self.X)

        dW = - ((self.X.T).dot(self.Y - Y_pred)) / self.rows
        db = - np.sum(self.Y - Y_pred) / self.rows

        self.weights = self.weights - self.learningRate * dW
        self.bias = self.bias - self.learningRate * db

    def predict(self, X):
        return X.dot(self.weights) + self.bias

    #Calculate coefficient of determination of the model's prediction
    def score(self, X, Y):
        Y_pred = self.predict(self.normalize(X))
        u = ((Y - Y_pred) ** 2).sum()
        v = ((Y - Y.mean()) ** 2).sum()
        return 1 - (u/v)

In [49]:
#Testing the model with NFL stats
from google.colab import drive
drive.mount('/content/drive')
path="/content/drive/MyDrive/Colab Notebooks/NAISS/archive/team_stats_2003_2023.csv"
df = pd.read_csv(path)
df.head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,year,team,wins,losses,win_loss_perc,points,points_opp,points_diff,mov,g,...,rush_td,rush_yds_per_att,rush_fd,penalties,penalties_yds,pen_fd,score_pct,turnover_pct,exp_pts_tot,ties
0,2003,New England Patriots,14,2,0.875,348,238,110,6.9,16,...,9,3.4,91,111,998,26,27.9,11.3,-136.51,
1,2003,Miami Dolphins,10,6,0.625,311,261,50,3.1,16,...,14,3.7,99,103,913,22,28.1,17.2,-177.92,
2,2003,Buffalo Bills,6,10,0.375,243,279,-36,-2.3,16,...,13,3.9,96,106,891,22,21.9,17.6,-230.07,
3,2003,New York Jets,6,10,0.375,283,299,-16,-1.0,16,...,8,4.0,78,69,550,15,32.4,11.8,-107.89,
4,2003,Baltimore Ravens,10,6,0.625,391,281,110,6.9,16,...,18,4.8,115,126,970,23,31.8,16.6,-220.5,


In [50]:
X = df[["points_diff", "total_yards", "turnovers"]].values
Y = df[["wins"]].values
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25)

In [51]:
model = LinearRegression()
model.fit(X_train, Y_train)
model.score(X_test, Y_test)


0.841781821825278