# Webscraping for the 2022 NFL Weather for each week 

## Imports

In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import logging

In [9]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [17]:
def parse_nfl_weather(html):
    soup = BeautifulSoup(html, 'html.parser')
    games_data = []

    game_boxes = soup.find_all('div', class_='game-box')
    for game in game_boxes:
        # Extracting Date and Time
        date_time_div = game.find('div', class_='fw-bold text-wrap')
        date_time = date_time_div.get_text(strip=True) if date_time_div else None

        # Extracting team names and scores
        team_boxes = game.find_all('div', class_='team-game-box')
        if team_boxes and len(team_boxes) >= 2:
            away_team_span = team_boxes[0].find('span', class_='fw-bold')
            away_team = away_team_span.get_text(strip=True) if away_team_span else None
            away_score_div = team_boxes[0].find('div', class_='game-points')
            away_score = away_score_div.get_text(strip=True) if away_score_div else None

            home_team_span = team_boxes[1].find('span', class_='fw-bold ms-1')
            home_team = home_team_span.get_text(strip=True) if home_team_span else None
            home_score_div = team_boxes[1].find('div', class_='game-points')
            home_score = home_score_div.get_text(strip=True) if home_score_div else None
        else:
            away_team, home_team, away_score, home_score = None, None, None, None

        # Extracting Weather Conditions
        weather_info = game.select('.text-break.col-md-4.d-flex')[0]
        temperature = weather_info.find_all('span')[0].get_text(strip=True) if weather_info else None
        weather_condition = weather_info.find_all('span')[1].get_text(strip=True) if weather_info else None

        # Appending the extracted data
        games_data.append({
            'Date_Time': date_time,
            'Away_Team': away_team,
            'Home_Team': home_team,
            'Away_Score': away_score,
            'Home_Score': home_score,
            'Temperature': temperature,
            'Weather_Condition': weather_condition
        })

        # Optional: Logging each game's data
        # logging.info(f"Scraped data: {date_time}, {away_team} vs {home_team}, Score: {away_score}-{home_score}, Temp: {temperature}, Condition: {weather_condition}")

    return games_data


In [18]:
def scrape_nfl_weather_for_season():
    all_weeks_data = []
    for week in range(1, 22):  # Including weeks for playoffs and Super Bowl
        url = f"https://www.nflweather.com/week/2022/week-{week}"
        response = requests.get(url)
        week_data = parse_nfl_weather(response.content)
        all_weeks_data.extend(week_data)
        print(f"Week {week} data scraped")
    
    return all_weeks_data

In [19]:
season_data = scrape_nfl_weather_for_season()

Week 1 data scraped
Week 2 data scraped
Week 3 data scraped
Week 4 data scraped
Week 5 data scraped
Week 6 data scraped
Week 7 data scraped
Week 8 data scraped
Week 9 data scraped
Week 10 data scraped
Week 11 data scraped
Week 12 data scraped
Week 13 data scraped
Week 14 data scraped
Week 15 data scraped
Week 16 data scraped
Week 17 data scraped
Week 18 data scraped
Week 19 data scraped
Week 20 data scraped
Week 21 data scraped


In [20]:
df = pd.DataFrame(season_data)

# Save the DataFrame to a CSV file
csv_file_path = './src/nfl_weather_data.csv' 
df.to_csv(csv_file_path, index=False)

print(f"Data saved to {csv_file_path}")

Data saved to ./src/nfl_weather_data.csv


In [None]:
# Load the schedule and weather data
url
schedule_df = pd.read_csv('')
weather_df = pd.read_csv('path_to_weather_data.csv')

# Preprocess the date in weather data to match the schedule data
weather_df['Game_Date'] = pd.to_datetime(weather_df['Date_Time']).dt.strftime('%Y%m%d')

# Create a unique identifier in both dataframes for merging
schedule_df['Game_Identifier'] = schedule_df['Game_Date'].astype(str) + schedule_df['Home_Team'] + schedule_df['Away_Team']
weather_df['Game_Identifier'] = weather_df['Game_Date'].astype(str) + weather_df['Home_Team'] + weather_df['Away_Team']

# Merge the dataframes on the identifier
merged_df = pd.merge(schedule_df, weather_df, on='Game_Identifier', how='left')

# Now you have a merged dataframe with both schedule and weather information