In [1]:
import numpy as np
import pandas as pd

import pickle

import matplotlib.pyplot as plt

In [2]:
path = '/Users/pranav/nfl_betting_system/kaggle_data/spreadspoke_scores.csv'
team_names_path = '/Users/pranav/nfl_betting_system/kaggle_data/nfl_teams.csv'
data = pd.read_csv(path)

In [3]:
data.head()
axes_to_remove = ['stadium', 'stadium_neutral', 'weather_temperature', 
                  'weather_wind_mph', 'weather_humidity', 'weather_detail']
#drop all the columns above, not relevant to the study
testData = data.drop(axes_to_remove, axis=1)

In [4]:
#remove all games before the 2000-2001 season
game_data = testData.query("schedule_season >= 2000")
#remove all playoff games
game_data = game_data[game_data['schedule_playoff'] == False]
game_data = game_data.drop('schedule_playoff', axis=1)
game_data = game_data.reset_index(drop=True)

In [5]:
subs = {'San Diego Chargers' : 'Los Angeles Chargers',
        'St. Louis Rams' : 'Los Angeles Rams',
        'Washington Redskins' : 'Washington Football Team'
}
#update team names since 2000
game_data = game_data.replace(subs)

In [6]:
teams_dict = {'Baltimore Ravens': 'BAL', 'Jacksonville Jaguars': 'JAX',
              'Carolina Panthers': 'CAR', 'Green Bay Packers': 'GB',
              'Detroit Lions': 'DET','Cincinnati Bengals': 'CIN',
              'Los Angeles Rams': 'LAR','Oakland Raiders': 'OAK',
              'Houston Texans': 'HOU', 'Tennessee Titans': 'TEN', 'New York Jets': 'NYJ',
              'Los Angeles Chargers': 'LAC',
              'Indianapolis Colts': 'IND',
              'Washington Football Team': 'WAS', 'Miami Dolphins': 'MIA', 'Buffalo Bills': 'BUF',
              'Minnesota Vikings': 'MIN', 'Atlanta Falcons': 'ATL',
              'New Orleans Saints': 'NO', 'Pittsburgh Steelers': 'PIT','Kansas City Chiefs': 'KC', 
              'Denver Broncos': 'DEN', 'New York Giants': 'NYG',
              'Cleveland Browns': 'CLE', 'Philadelphia Eagles': 'PHI', 'Dallas Cowboys': 'DAL',
              'Arizona Cardinals': 'ARI', 'Chicago Bears': 'CHI', 'New England Patriots': 'NE',
              'San Francisco 49ers': 'SF','Tampa Bay Buccaneers': 'TB','Seattle Seahawks': 'SEA'}
game_data['home_team_id'] = game_data.team_home.map(teams_dict)
game_data['away_team_id'] = game_data.team_away.map(teams_dict)
game_data = game_data.dropna(subset=['spread_favorite', 'over_under_line', 'team_favorite_id'])
game_data = game_data.reset_index(drop=True)

In [7]:
game_data['over_under_line'] = game_data['over_under_line'].apply(pd.to_numeric, errors='coerce')
#game_data.dtypes

In [8]:
def addData(df):
    df['total_points'] = df['score_home'] + df['score_away']
    df['point_diff'] = abs(df['score_home'] - df['score_away'])
    df['home_team_fav'] = (df['team_favorite_id'] == df['home_team_id'])
    return df
game_data = addData(game_data)

In [9]:
def addResults(df):
    df['over_under_result'] = np.where(df['total_points']>df['over_under_line'], 
                                       True, 
                                       False)
    df['winner'] = np.where(df['score_home']>df['score_away'], 
                            df['team_home'], 
                            df['team_away'])
    df['winner'] = df.winner.map(teams_dict)
    df['winner'] = np.where(df['score_home'] == df['score_away'],
                            'TIE',
                            df['winner'])
    df['spread_covered'] = np.where((df['team_favorite_id']==df['winner']) & 
                                    (df['point_diff']>=abs(df['spread_favorite'])), 
                                    True, 
                                    False)
    return df
game_data = addResults(game_data)

In [10]:
#organize the data
ordering = [#logistical information
                   'schedule_date', 'schedule_season', 'schedule_week',
                   #scoring information
                   'home_team_id', 'team_home', 'score_home', 'score_away', 'team_away', 'away_team_id',
                   #spread related information
                   'team_favorite_id', 'spread_favorite', 'point_diff', 'spread_covered', 
                   #O/U related information
                   'over_under_line', 'total_points', 'over_under_result',
                   #home team success and winner
                   'home_team_fav', 'winner',]
game_data = game_data[ordering]

In [11]:
savePath = '/Users/pranav/nfl_betting_system/data_processing_files/base_game_info.csv'
game_data.to_csv(savePath, index=False)

In [12]:
game_data

Unnamed: 0,schedule_date,schedule_season,schedule_week,home_team_id,team_home,score_home,score_away,team_away,away_team_id,team_favorite_id,spread_favorite,point_diff,spread_covered,over_under_line,total_points,over_under_result,home_team_fav,winner
0,09/03/2000,2000,1,ATL,Atlanta Falcons,36,28,San Francisco 49ers,SF,ATL,-6.5,8,True,46.5,64,True,True,ATL
1,09/03/2000,2000,1,BUF,Buffalo Bills,16,13,Tennessee Titans,TEN,BUF,-1.0,3,True,40.0,29,False,True,BUF
2,09/03/2000,2000,1,CLE,Cleveland Browns,7,27,Jacksonville Jaguars,JAX,JAX,-10.5,20,True,38.5,34,False,False,JAX
3,09/03/2000,2000,1,DAL,Dallas Cowboys,14,41,Philadelphia Eagles,PHI,DAL,-6.0,27,False,39.5,55,True,True,PHI
4,09/03/2000,2000,1,GB,Green Bay Packers,16,20,New York Jets,NYJ,GB,-2.5,4,False,44.0,36,False,True,NYJ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5099,12/29/2019,2019,17,MIN,Minnesota Vikings,19,21,Chicago Bears,CHI,CHI,-4.5,2,False,36.0,40,True,False,CHI
5100,12/29/2019,2019,17,NE,New England Patriots,24,27,Miami Dolphins,MIA,NE,-17.0,3,False,45.5,51,True,True,MIA
5101,12/29/2019,2019,17,NYG,New York Giants,17,34,Philadelphia Eagles,PHI,PHI,-3.5,17,True,44.5,51,True,False,PHI
5102,12/29/2019,2019,17,SEA,Seattle Seahawks,21,26,San Francisco 49ers,SF,SF,-3.5,5,True,47.0,47,False,False,SF
