# Webscrape NFL Games, Weather

### Set-up

In [2]:
# Import packages
import pandas as pd
import numpy as np
import string
from bs4 import BeautifulSoup
import requests
import time
import os
import sys

# Directory
WORKING_DIRECTORY = "SET WD HERE" 

### Set relevant web scraping info

In [3]:
# Get weeks of season to check
preseason_wks = ["Pre Season Week " + str(x) for x in list(range(1, 5))]
regular_wks = ["Week " + str(x) for x in list(range(1, 19))]
other_wks = ["Wildcard Weekend", "Divisional Playoffs", "Conf Championships", "Pro Bowl", "Superbowl"]
all_wks = preseason_wks + regular_wks + other_wks

# Seasons to check
seasons = list(range(2009, 2024))

# Main webpage
webpage_main = "https://www.nflweather.com/week/"

### Web scrape

In [4]:
game_df = pd.DataFrame()
for season in seasons:
    try:
        if season == 2010:
            webpage = webpage_main + str(season) + "/week-1-2"
        else:
            webpage = webpage_main + str(season) + "/week-1"
        
        response = requests.get(webpage)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Get season-week options
        weeks_raw = soup.find_all("option")
        weeks = []
        for week in weeks_raw:
            week_val = week["value"]
            if week_val == "2023":
                break
            weeks.append(week_val)

        for wk in weeks:
            webpage = webpage_main + str(season) + "/" + wk
            # Make request
            response = requests.get(webpage)
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Relevant attributes
            sp_points = soup.find_all("div", {"class": "game-points"}) # Points
            sp_teams = soup.find_all("a", {"class": "text-dark"}) # Teams
            sp_dome = soup.find_all("img", {"class": "game-box-weather-image"}) # Dome
            sp_weather = soup.find_all("div", {"class": "mx-2"}) # Weather
            
            #--- Clean each attribute ---#
            
            # Points
            team_1_pts = []
            team_2_pts = []
            for i in range(0, len(sp_points)):
                if (i + 1) % 2 != 0:
                    team_1_pts.append(sp_points[i].get_text())
                else:
                    team_2_pts.append(sp_points[i].get_text())
            
            # Teams
            teams = []
            for i in range(0, len(sp_teams)):
                if (i + 1) % 3 == 0:
                    teams.append(sp_teams[i]["title"])
                    
            # Dome/stadium type
            dome = []
            for i in sp_dome:
                dome_type = i["src"]
                dome.append(dome_type)
                
            # Weather
            temperature = []
            other_conditions = []
            for i in range(0, len(sp_weather)):
                if (i + 1) % 2 != 0:
                    temperature.append(sp_weather[i].get_text())
                else:
                    other_conditions.append(sp_weather[i].get_text())   
            
            #--- Add attributes to dataframe ---#
            df = pd.DataFrame(
                {'teams':teams,
                 'team 1 pts':team_1_pts,
                 'team 2 pts':team_2_pts,
                 'dome':dome,
                 'temp': temperature,
                 'conditions':other_conditions
                 })
            
            df['week'] = wk
            df['season'] = season
            
            game_df = pd.concat([game_df, df])
            
            time.sleep(1) # Time buffer between webpage calls
            print(str(season) + " " + wk)
    except:
        continue

2009 pre-season-week-1
2009 pre-season-week-2
2009 pre-season-week-3
2009 pre-season-week-4
2009 week-1
2009 week-2
2009 week-3
2009 week-4
2009 week-5
2009 week-6
2009 week-7
2009 week-8
2009 week-9
2009 week-10
2009 week-11
2009 week-12
2009 week-13
2009 week-14
2009 week-15
2009 week-16
2009 week-17
2009 wildcard-weekend
2009 divisional-playoffs
2009 conf-championships
2009 pro-bowl
2009 superbowl
2010 pre-season-week-1-2
2010 pre-season-week-2-2
2010 pre-season-week-3-2
2010 pre-season-week-4-2
2010 week-1-2
2010 week-2-2
2010 week-3-2
2010 week-4-2
2010 week-5-2
2010 week-6-2
2010 week-7-2
2010 week-8-2
2010 week-9-2
2010 week-10-2
2010 week-11-2
2010 week-12-2
2010 week-13-2
2010 week-14-2
2010 week-15-2
2010 week-16-2
2010 week-17-2
2010 wildcard-weekend-2
2010 divisional-playoffs-2
2010 conf-championships-2
2010 pro-bowl-2
2010 superbowl-2
2011 pre-season-week-1
2011 pre-season-week-2
2011 pre-season-week-3
2011 pre-season-week-4
2011 week-1
2011 week-2
2011 week-3
2011 week-4


In [5]:
game_df

Unnamed: 0,teams,team 1 pts,team 2 pts,dome,temp,conditions,week,season
0,Click to see more details about Bills at Titans,18,21,/climates/few.png,87 °F,A Few Clouds,pre-season-week-1,2009
1,Click to see more details about Patriots at Ea...,27,25,/climates/sct.png,75 °F,Partly Cloudy,pre-season-week-1,2009
2,Click to see more details about Redskins at Ra...,0,23,/climates/scttsra.png,75 °F,Light Rain,pre-season-week-1,2009
3,Click to see more details about Cardinals at S...,10,20,/climates/few.png,73 °F,Fair,pre-season-week-1,2009
4,Click to see more details about Cowboys at Rai...,10,31,/climates/few.png,70 °F,A Few Clouds,pre-season-week-1,2009
...,...,...,...,...,...,...,...,...
11,Click to see more details about Bears At Chiefs,10,41,/climates/clear.png,82 °F,Clear,week-3,2023
12,Click to see more details about Cowboys At Car...,16,28,/climates/dome.webp,93 °F,Clear,week-3,2023
13,Click to see more details about Steelers At Ra...,23,18,/climates/dome.webp,87 °F,Clear,week-3,2023
14,Click to see more details about Eagles At Bucc...,25,11,/climates/mostly_cloudy_n.png,87 °F,Mostly Cloudy,week-3,2023


In [8]:
season = seasons[0]
wk = all_wks[0]
wk_lower_cleaned = wk.lower().replace(" ", "-")
webpage = webpage_main + str(season) + "/" + wk_lower_cleaned

# Make request
response = requests.get(webpage)
soup = BeautifulSoup(response.text, 'html.parser')
            
# Relevant attributes
sp_points = soup.find_all("div", {"class": "game-points"}) # Points
sp_teams = soup.find_all("a", {"class": "text-dark"}) # Teams
sp_dome = soup.find_all("img", {"class": "game-box-weather-image"}) # Dome
sp_weather = soup.find_all("div", {"class": "mx-2"}) # Weather

In [5]:
game_df

In [9]:
webpage = "https://www.nflweather.com/week/2020/week-3"
response = requests.get(webpage)

In [10]:
soup = BeautifulSoup(response.text, 'html.parser')
soup

<!DOCTYPE html>

<html>
<head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1, shrink-to-fit=no" name="viewport"/>
<title>NFLWeather</title>
<meta content="authenticity_token" name="csrf-param">
<meta content="/d9Hufee5P3a44jMKT+6fJpVSejStbj6t1/HUqNbAPYm2QeDAYwowOaRK6+J7ZbCsyBITCGw9jL8bPBbZv9nuw==" name="csrf-token">
<link href="/assets/application-6c07eae901b7d9330386f308e458330c32a2464e59ee46a57e10a482564d7f8c.css" media="all" rel="stylesheet"/>
<script src="/assets/application-efd0efb8a6acc5ac5dfcb6d8d064af6ad9dd02c595ea24d47bff569dc1f10f10.js"></script>
<link href="/assets/favicon-139bed0ed1ee258509c67d275498f7443abae55a2186fbc396d7f05d86728f74.ico" rel="shortcut icon" type="image/x-icon"/>
<meta content="NFLWeather" property="og:title"/>
<meta content="NFLWeather.com™ is the only place to receive every weather forecast, updated twice an hour, for every football game every week." property="og:description"/>
<meta content="https://www.nflweather.com/logo_

In [8]:
soup.find_all("div", {"class": "game-points"}) # Points
# soup.find_all("a", {"class": "text-dark"})[8]["title"] # Teams
# soup.find_all("img", {"class": "game-box-weather-image"})[0]["src"] # Dome
# soup.find_all("div", {"class": "mx-2"}) # Weather
# soup.find_all("span", {"class": ""}) # Wind?
# weeks_raw = soup.find_all("option") # TO DO: incorporate weeks
# weeks = []
# for week in weeks_raw:
#     week_val = week["value"]
#     if week_val == "2023":
#         break
#     weeks.append(week_val)
#     print(week["value"])
# weeks

[<div class="game-points ps-1 pe-2"></div>,
 <div class="game-points pe-1 ps-2"></div>,
 <div class="game-points ps-1 pe-2"></div>,
 <div class="game-points pe-1 ps-2"></div>,
 <div class="game-points ps-1 pe-2"></div>,
 <div class="game-points pe-1 ps-2"></div>,
 <div class="game-points ps-1 pe-2"></div>,
 <div class="game-points pe-1 ps-2"></div>,
 <div class="game-points ps-1 pe-2"></div>,
 <div class="game-points pe-1 ps-2"></div>,
 <div class="game-points ps-1 pe-2"></div>,
 <div class="game-points pe-1 ps-2"></div>,
 <div class="game-points ps-1 pe-2"></div>,
 <div class="game-points pe-1 ps-2"></div>,
 <div class="game-points ps-1 pe-2"></div>,
 <div class="game-points pe-1 ps-2"></div>,
 <div class="game-points ps-1 pe-2"></div>,
 <div class="game-points pe-1 ps-2"></div>,
 <div class="game-points ps-1 pe-2"></div>,
 <div class="game-points pe-1 ps-2"></div>,
 <div class="game-points ps-1 pe-2"></div>,
 <div class="game-points pe-1 ps-2"></div>,
 <div class="game-points ps-1 pe

### Get links to player profiles for each player

In [2]:
# Pages with player names use first letter of last names
alphabet = list(string.ascii_lowercase)
player_name_link_df = pd.DataFrame()

# Loop through alphabet, get names and links for all players
for letter in alphabet:
    # Request URL, call BeautifulSoup
    webpage = "https://en.hispanosnba.com/players/nba-all/" + letter
    response = requests.get(webpage)
    if response.status_code == 404: # Some letters may not have pages, skip them
        continue 
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Get relevant part of webpage
    players = soup.find_all("table", {"class": "tblprm"})[0]
    player_name_links = players.find_all("a")

    # Loop through and add names, links
    for player in player_name_links:
        player_title = player["title"]
        player_link = player["href"]
        df = pd.DataFrame({'name': [player_title], 'link': [player_link]})
        player_name_link_df = pd.concat([player_name_link_df, df])
            
    # Set time buffer so as to not trigger any issues visiting website
    time.sleep(1)