In [11]:
import pandas as pd
import datetime as dt
import os

In [2]:
df = pd.read_html('https://www.pro-football-reference.com/years/2023/preseason.htm')

In [7]:
nfl_pre_df = df[2]
nfl_pre_df = nfl_pre_df.drop(columns='Unnamed: 5')
nfl_pre_df = nfl_pre_df.rename(columns={"Unnamed: 2":"DOM",
                                        "Pts":"VisPts",
                                        "Pts.1":"HomePts"})
nfl_pre_df

Unnamed: 0,Week,Day,DOM,VisTm,VisPts,HomeTm,HomePts
0,,Thu,August 3,New York Jets,16,Cleveland Browns,21
1,1.0,Thu,August 10,Houston Texans,20,New England Patriots,9
2,1.0,Thu,August 10,Minnesota Vikings,13,Seattle Seahawks,24
3,1.0,Fri,August 11,Green Bay Packers,36,Cincinnati Bengals,19
4,1.0,Fri,August 11,Washington Commanders,17,Cleveland Browns,15
5,1.0,Fri,August 11,Denver Broncos,17,Arizona Cardinals,18
6,1.0,Fri,August 11,New York Giants,16,Detroit Lions,21
7,1.0,Fri,August 11,Atlanta Falcons,19,Miami Dolphins,3
8,1.0,Fri,August 11,Pittsburgh Steelers,27,Tampa Bay Buccaneers,17
9,1.0,Sat,August 12,Los Angeles Chargers,34,Los Angeles Rams,17


In [8]:
# Create date string from Day and Date columns
nfl_pre_df['Date'] = ""

for i in range(len(nfl_pre_df)):
    month_day = nfl_pre_df.loc[i, "DOM"].split(" ")
    month = dt.datetime.strptime(month_day[0], '%B').month
    day = int(month_day[1])
    date = dt.date(2023,month,day).strftime('%m/%d/%Y')
    
    nfl_pre_df.loc[i, 'Date'] = date

In [9]:
nfl_pre_df = nfl_pre_df.drop(columns=['DOM','Week','Day','VisPts','HomePts'])
nfl_pre_df.head()

Unnamed: 0,VisTm,HomeTm,Date
0,New York Jets,Cleveland Browns,08/03/2023
1,Houston Texans,New England Patriots,08/10/2023
2,Minnesota Vikings,Seattle Seahawks,08/10/2023
3,Green Bay Packers,Cincinnati Bengals,08/11/2023
4,Washington Commanders,Cleveland Browns,08/11/2023


In [12]:
team_path = os.path.join("..","..","Resources","team.csv")
team_df = pd.read_csv(team_path)

# Take only nfl teams and reset index

nfl_teams_df = team_df[team_df['league_id'] == 2].reset_index()

nfl_teams_df.head()

Unnamed: 0,index,team_id,team,venue_id,league_id
0,30,31,Las Vegas Raiders,30,2
1,31,32,Kansas City Chiefs,31,2
2,32,33,Dallas Cowboys,32,2
3,33,34,Carolina Panthers,33,2
4,34,35,New Orleans Saints,34,2


In [13]:
# Confirm all team names match with mlb_teams_df
team_nf = []

for i in range(len(nfl_pre_df)):
    home_team = nfl_pre_df.loc[i, 'HomeTm']
    away_team = nfl_pre_df.loc[i, 'VisTm']
    
    if home_team not in list(nfl_teams_df['team']):
        if home_team not in team_nf:
            print(f"{home_team} not found. Adding to tracker...")
            team_nf.append(home_team)
    
    if away_team not in list(nfl_teams_df['team']):
        if away_team not in team_nf:
            print(f"{away_team} not found. Adding to tracker...")
            team_nf.append(away_team)
            
print(team_nf)

[]


In [14]:
# Assign each row a 'home_id', 'away_id', and 'venue_id'
nfl_pre_df['home_id'] = ''
nfl_pre_df['away_id'] = ''
nfl_pre_df['venue_id'] = ''

for i in range(len(nfl_pre_df)):
    home_team = nfl_teams_df[nfl_teams_df['team'] == nfl_pre_df.loc[i, 'HomeTm']]
    away_team = nfl_teams_df[nfl_teams_df['team'] == nfl_pre_df.loc[i, 'VisTm']]
    
    nfl_pre_df.loc[i, 'home_id'] = int(home_team['team_id'])
    nfl_pre_df.loc[i, 'venue_id'] = int(home_team['venue_id'])
    nfl_pre_df.loc[i, 'away_id'] = int(away_team['team_id'])

nfl_pre_df

Unnamed: 0,VisTm,HomeTm,Date,home_id,away_id,venue_id
0,New York Jets,Cleveland Browns,08/03/2023,38,52,37
1,Houston Texans,New England Patriots,08/10/2023,40,54,39
2,Minnesota Vikings,Seattle Seahawks,08/10/2023,48,62,47
3,Green Bay Packers,Cincinnati Bengals,08/11/2023,55,44,53
4,Washington Commanders,Cleveland Browns,08/11/2023,38,37,37
5,Denver Broncos,Arizona Cardinals,08/11/2023,60,36,57
6,New York Giants,Detroit Lions,08/11/2023,39,51,38
7,Atlanta Falcons,Miami Dolphins,08/11/2023,41,50,40
8,Pittsburgh Steelers,Tampa Bay Buccaneers,08/11/2023,56,42,54
9,Los Angeles Chargers,Los Angeles Rams,08/12/2023,57,58,55


In [15]:
nfl_pre_df["CST Gametime"] = "6:00 PM"
nfl_pre_formatted_df = nfl_pre_df[["Date", "CST Gametime", "home_id", "away_id", "venue_id"]]
nfl_pre_formatted_df = nfl_pre_formatted_df.rename(columns={"Date":"event_date",
                                                   "CST Gametime": "event_time_cst"})
nfl_pre_formatted_df.head()

Unnamed: 0,event_date,event_time_cst,home_id,away_id,venue_id
0,08/03/2023,6:00 PM,38,52,37
1,08/10/2023,6:00 PM,40,54,39
2,08/10/2023,6:00 PM,48,62,47
3,08/11/2023,6:00 PM,55,44,53
4,08/11/2023,6:00 PM,38,37,37


In [16]:
output_path = os.path.join("..","..","Resources","nfl_pre_events.csv")
nfl_pre_formatted_df.to_csv(output_path)