<a href="https://colab.research.google.com/github/stevenpunn/NFL-Prediction-Model/blob/main/NFL_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

In [6]:
# clean the data
pbp2024 = pd.read_csv('/content/NFL_pbp_24.csv')
boxscores2024 = pd.read_csv('/content/boxScores2024.csv')

In [8]:
pbp2024.head()

Unnamed: 0,GameId,GameDate,Quarter,Minute,Second,OffenseTeam,DefenseTeam,Down,ToGo,YardLine,...,IsTwoPointConversion,IsTwoPointConversionSuccessful,RushDirection,YardLineFixed,YardLineDirection,IsPenaltyAccepted,PenaltyTeam,IsNoPlay,PenaltyType,PenaltyYards
0,2024091513,2024-09-15,1,12,41,HOU,CHI,1,10,57,...,0,0,RIGHT TACKLE,43,OPP,0,,0,,0
1,2024091513,2024-09-15,1,11,56,HOU,CHI,2,10,57,...,0,0,,43,OPP,1,HOU,1,FALSE START,5
2,2024091513,2024-09-15,1,11,47,HOU,CHI,2,15,52,...,0,0,,48,OPP,0,,0,,0
3,2024102011,2024-10-20,3,11,14,PIT,NYJ,1,10,61,...,0,0,,39,OPP,0,,0,,0
4,2024102011,2024-10-20,3,10,45,PIT,NYJ,2,9,62,...,0,0,RIGHT TACKLE,38,OPP,0,,0,,0


In [10]:
print(boxscores2024.columns)

Index(['Week', 'Date', 'Visitor', 'VisitorScore', 'Home', 'HomeScore', 'OT'], dtype='object')


In [19]:
# change 'GameDate' format in play by play file to match 'Date' format in box scores
pbp2024['GameDate'] = pd.to_datetime(pbp2024['GameDate']).dt.strftime('%m/%d/%y')

pbp2024.head()

  pbp2024['GameDate'] = pd.to_datetime(pbp2024['GameDate']).dt.strftime('%m/%d/%y')


Unnamed: 0,GameId,GameDate,Quarter,Minute,Second,OffenseTeam,DefenseTeam,Down,ToGo,YardLine,...,IsTwoPointConversion,IsTwoPointConversionSuccessful,RushDirection,YardLineFixed,YardLineDirection,IsPenaltyAccepted,PenaltyTeam,IsNoPlay,PenaltyType,PenaltyYards
0,2024091513,09/15/24,1,12,41,HOU,CHI,1,10,57,...,0,0,RIGHT TACKLE,43,OPP,0,,0,,0
1,2024091513,09/15/24,1,11,56,HOU,CHI,2,10,57,...,0,0,,43,OPP,1,HOU,1,FALSE START,5
2,2024091513,09/15/24,1,11,47,HOU,CHI,2,15,52,...,0,0,,48,OPP,0,,0,,0
3,2024102011,10/20/24,3,11,14,PIT,NYJ,1,10,61,...,0,0,,39,OPP,0,,0,,0
4,2024102011,10/20/24,3,10,45,PIT,NYJ,2,9,62,...,0,0,RIGHT TACKLE,38,OPP,0,,0,,0


In [18]:
# find abbreviated names for teams from box scores
homeTeam_unique = boxscores2024['Home'].unique()
visitorTeam_unique = boxscores2024['Visitor'].unique()

# find abbreviated names for teams in the play-by-play data
pbpName_unique = set(pbp2024['OffenseTeam'].dropna().unique()).union(set(pbp2024['DefenseTeam'].dropna().unique()))

In [20]:
# map team names to abbreviations
team_abbreviations = {
    'Arizona Cardinals': 'ARI',
    'Atlanta Falcons': 'ATL',
    'Baltimore Ravens': 'BAL',
    'Buffalo Bills': 'BUF',
    'Carolina Panthers': 'CAR',
    'Chicago Bears': 'CHI',
    'Cincinnati Bengals': 'CIN',
    'Cleveland Browns': 'CLE',
    'Dallas Cowboys': 'DAL',
    'Denver Broncos': 'DEN',
    'Detroit Lions': 'DET',
    'Green Bay Packers': 'GB',
    'Houston Texans': 'HOU',
    'Indianapolis Colts': 'IND',
    'Jacksonville Jaguars': 'JAX',
    'Kansas City Chiefs': 'KC',
    'Las Vegas Raiders': 'LV',
    'Los Angeles Chargers': 'LAC',
    'Los Angeles Rams': 'LA',
    'Miami Dolphins': 'MIA',
    'Minnesota Vikings': 'MIN',
    'New England Patriots': 'NE',
    'New Orleans Saints': 'NO',
    'New York Giants': 'NYG',
    'New York Jets': 'NYJ',
    'Philadelphia Eagles': 'PHI',
    'Pittsburgh Steelers': 'PIT',
    'San Francisco 49ers': 'SF',
    'Seattle Seahawks': 'SEA',
    'Tampa Bay Buccaneers': 'TB',
    'Tennessee Titans': 'TEN',
    'Washington Commanders': 'WAS'
}

# replace team names with abbreivations
pbp2024['OffenseTeam'] = pbp2024['OffenseTeam'].map(team_abbreviations)
pbp2024['DefenseTeam'] = pbp2024['DefenseTeam'].map(team_abbreviations)

pbp2024.head()

Unnamed: 0,GameId,GameDate,Quarter,Minute,Second,OffenseTeam,DefenseTeam,Down,ToGo,YardLine,...,IsTwoPointConversion,IsTwoPointConversionSuccessful,RushDirection,YardLineFixed,YardLineDirection,IsPenaltyAccepted,PenaltyTeam,IsNoPlay,PenaltyType,PenaltyYards
0,2024091513,09/15/24,1,12,41,,,1,10,57,...,0,0,RIGHT TACKLE,43,OPP,0,,0,,0
1,2024091513,09/15/24,1,11,56,,,2,10,57,...,0,0,,43,OPP,1,HOU,1,FALSE START,5
2,2024091513,09/15/24,1,11,47,,,2,15,52,...,0,0,,48,OPP,0,,0,,0
3,2024102011,10/20/24,3,11,14,,,1,10,61,...,0,0,,39,OPP,0,,0,,0
4,2024102011,10/20/24,3,10,45,,,2,9,62,...,0,0,RIGHT TACKLE,38,OPP,0,,0,,0
