In [1]:
from bs4 import BeautifulSoup
import requests as req
from time import sleep
import pandas as pd
import os
import time
from datetime import datetime as dt
from tqdm import tqdm
#from tqdm import tqdm


# configure pandas
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
        "(KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36"
    )
}

# Historical Odds Data - data not ready until Thursday
___

In [2]:
url = f"https://www.sportsoddshistory.com/nfl-game-season/?y=2022"
page = req.get(url, headers=headers).text
soup = BeautifulSoup(page, "html.parser")
tables = soup.find_all("table", attrs={"class":"soh1"})
pd_list = []
for i, table in enumerate(tables[2:]):
    df = pd.read_html(str(table))
    df = df[0]
    df.columns = df.iloc[-2]
    df = df.iloc[:-2,: ]
    df["week"] = i + 1
    pd_list.append(df)
df = pd.concat(pd_list)
print(df.head())
df.to_csv("data/nfl_weekly_betting_summary.csv")

   Day          Date Time (ET)  NaN             Favorite         Score  Spread  NaN             Underdog Over/Under Notes  week
0  Thu   Sep 8, 2022      8:20  NaN        Buffalo Bills       W 31-10  W -2.5    @     Los Angeles Rams       U 52   NaN     1
1  Sun  Sep 11, 2022      1:00  NaN   New Orleans Saints       W 27-26  L -5.5    @      Atlanta Falcons     O 43.5   NaN     1
2  Sun  Sep 11, 2022      1:00    @    Carolina Panthers       L 24-26  L -1.5  NaN     Cleveland Browns       O 42   NaN     1
3  Sun  Sep 11, 2022      1:00  NaN  San Francisco 49ers       L 10-19    L -6    @        Chicago Bears       U 38   NaN     1
4  Sun  Sep 11, 2022      1:00    @   Cincinnati Bengals  L 20-23 (OT)    L -7  NaN  Pittsburgh Steelers       U 44   NaN     1


# Ranking Data
____

## Team Stats

In [3]:
date = "2022-11-29"
url = "https://www.teamrankings.com/nfl/stats/"
page = req.get(url, headers=headers).text
soup = BeautifulSoup(page, "html.parser")
pbar = tqdm([a["href"] for a in soup.find(attrs={"class":"column large-2"}).find_all("a") if a["href"] != "#"])

for end_point in pbar:
    time.sleep(.25)
    url = f"https://www.teamrankings.com{end_point}?date={date}"
    page = req.get(url, headers=headers).text
    soup = BeautifulSoup(page, "html.parser")
    table = soup.find("table")
    df = pd.read_html(str(table))[0].replace("--",0)
    df["asof"] = date
    df["url"] = end_point
    file_name = end_point.replace('/', '_').replace("-","_") + "_" + date.replace("-", "")
    full_file_path = f"data/team_ranks/{file_name}.csv"
    df.to_csv(full_file_path)
    pbar.set_description(full_file_path)
print(df.head())

data/team_ranks/_nfl_stat_opponent_penalties_per_play_20221129.csv: 100%|██████████| 226/226 [02:32<00:00,  1.49it/s]                     

   Rank       Team  2022  Last 3  Last 1  Home  Away  2021        asof                                    url
0     1  NY Giants  0.06    0.06    0.10  0.05  0.07  0.05  2022-11-29  /nfl/stat/opponent-penalties-per-play
1     2  Tennessee  0.06    0.06    0.07  0.06  0.06  0.05  2022-11-29  /nfl/stat/opponent-penalties-per-play
2     3     Denver  0.06    0.06    0.03  0.07  0.05  0.05  2022-11-29  /nfl/stat/opponent-penalties-per-play
3     4   Carolina  0.06    0.05    0.03  0.05  0.06  0.05  2022-11-29  /nfl/stat/opponent-penalties-per-play
4     5    Detroit  0.05    0.06    0.04  0.05  0.05  0.04  2022-11-29  /nfl/stat/opponent-penalties-per-play





## Player Stats

In [4]:
url = "https://www.teamrankings.com/nfl/stats/"
page = req.get(url, headers=headers).text
soup = BeautifulSoup(page, "html.parser")

pbar = tqdm([a["href"] for a in soup.find_all(attrs={"class":"column large-2"})[-1].find_all("a") if a["href"] != "#"])

for end_point in pbar:
    time.sleep(.25)
    url = f"https://www.teamrankings.com{end_point}?season_id=20"
    page = req.get(url, headers=headers).text
    soup = BeautifulSoup(page, "html.parser")
    table = soup.find("table")
    df = pd.read_html(str(table))[0].replace("--",0)
    df["asof"] = dt.now()
    df["url"] = end_point
    file_name = end_point.replace('/', '_').replace("-","_") + "_" + pd.to_datetime(date).strftime("%Y%m%d").replace("-", "")
    full_file_path = f"data/team_ranks/{file_name}.csv"
    df.to_csv(full_file_path)
    pbar.set_description(full_file_path)
print(df.head())

data/team_ranks/_nfl_player_stat_scoring_total_points_20221129.csv: 100%|██████████| 112/112 [01:23<00:00,  1.34it/s]                   

   Rank          Player                  Team Pos  Value                       asof                                    url
0     1      Tyler Bass         Buffalo Bills   K    101 2022-11-30 02:28:27.242824  /nfl/player-stat/scoring-total-points
1     2   Justin Tucker      Baltimore Ravens   K     99 2022-11-30 02:28:27.242824  /nfl/player-stat/scoring-total-points
2     3       Nick Folk  New England Patriots   K     95 2022-11-30 02:28:27.242824  /nfl/player-stat/scoring-total-points
3     4     Jason Myers      Seattle Seahawks   K     93 2022-11-30 02:28:27.242824  /nfl/player-stat/scoring-total-points
4     5  Daniel Carlson     Las Vegas Raiders   K     91 2022-11-30 02:28:27.242824  /nfl/player-stat/scoring-total-points





## Team Schedules


In [6]:
url = "https://www.teamrankings.com/nfl/"
page = req.get(url, headers=headers).text
soup = BeautifulSoup(page, "html.parser")

pbar = tqdm([li.find("a")["href"] for li in soup.find("div", attrs={"data-section":"nfl"}).find_all(attrs={"class":"teams-group"})[0].find_all("li")])

for end_point in pbar:
    time.sleep(.25)
    url = f"https://www.teamrankings.com{end_point}"
    page = req.get(url, headers=headers).text
    soup = BeautifulSoup(page, "html.parser")
    table = soup.find("table", attrs={"class":"tr-table datatable scrollable"})
    df = pd.read_html(str(table), displayed_only=False)[0].replace("--",0)
    df["asof"] = dt.now()
    df["url"] = end_point
    file_name = end_point.replace('/', '_').replace("-","_") + "_" + pd.to_datetime(date).strftime("%Y%m%d").replace("-", "")    
    
    full_file_path = f"data/team_ranks/{file_name}.csv"
    df.to_csv(full_file_path)
    pbar.set_description(full_file_path)

print(df.head())

AttributeError: 'NoneType' object has no attribute 'find_all'

In [7]:
url = "https://www.teamrankings.com/nfl/schedules/season/?week=557"
page = req.get(url, headers=headers).text
soup = BeautifulSoup(page, "html.parser")
table = soup.find("table")
df = pd.read_html(str(table), displayed_only=False)[0]
df.columns = ["teams", "time", "location"]


# 557 == week 11
url = "https://www.teamrankings.com/nfl/schedules/season/?week=557"
page = req.get(url, headers=headers).text
soup = BeautifulSoup(page, "html.parser")
table = soup.find("table")
df_schedule = pd.read_html(str(table), displayed_only=False)[0]
df_schedule.columns = ["teams", "time", "location"]
df_schedule["home"] = df_schedule.teams.str.split("@", expand=True)[1].str.strip("")
df_schedule["away"] = df_schedule.teams.str.split("@", expand=True)[0].str.strip()
df_schedule


ValueError: No tables found