In [19]:
# Dictionary with key indicating table name, list index zero is link to the table's site and list index 1 is the HTML ID of the table
master_table_dict = {
    "conversions" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_team_conversions"],
    "conversions_against" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_team_conversions"],
    "drive_averages" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_drives"],
    "drive_averages_against" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_drives"],
    "kick_and_punt_returns" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_returns"],
    "kick_and_punt_returns_against" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_returns"],
    "kicking" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_kicking"],
    "passing_offense" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_passing"],
    "passing_defense" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_passing"],
    "punting" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_punting"],
    "punting_against" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_punting"],
    "rushing_offense" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_rushing"],
    "rushing_defense" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_rushing"],
    "scoring_offense" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_team_scoring"],
    "scoring_defense" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_team_scoring"],
    "team_advanced_defense" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_advanced_defense"],
    "team_defense_statistics" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_team_stats"],
    "team_offense" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_team_stats"]
}

## Pull All Data

In [20]:
import requests
from bs4 import BeautifulSoup, Comment
import pandas as pd
import time
from io import StringIO
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, module="pandas")

headers = {"User-Agent": "Mozilla/5.0"}
dataframes = {}

for key, (url, div_id) in master_table_dict.items():
    print(f"Fetching '{key}' from {url} ...")

    resp = requests.get(url, headers=headers)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # first try: div directly in HTML
    div = soup.find("div", id=div_id)

    # second try: div hidden inside HTML comments
    if div is None:
        comments = soup.find_all(string=lambda text: isinstance(text, Comment))
        for c in comments:
            if div_id in c:
                soup_comment = BeautifulSoup(c, "html.parser")
                div = soup_comment.find("div", id=div_id)
                break

    if div is None:
        print(f"⚠️  Skipping '{key}': no div found for {div_id}")
        continue

    table = div.find("table") if div else None
    if table is None:
        print(f"⚠️  Skipping '{key}': no table found inside div {div_id}")
        continue

    try:
        df = pd.read_html(StringIO(str(table)))[0]
        dataframes[key] = df
        print(f"✅ Saved table '{key}' with {df.shape[0]} rows and {df.shape[1]} columns.")
    except Exception as e:
        print(f"❌ Error parsing '{key}': {e}")

    # sleep 10 seconds between requests
    print("⏳ Waiting 10 seconds to respect site guidelines...")
    time.sleep(10)

print("\n✅ Finished fetching all tables.")

Fetching 'conversions' from https://www.pro-football-reference.com/years/2025/index.htm ...
✅ Saved table 'conversions' with 33 rows and 12 columns.
⏳ Waiting 10 seconds to respect site guidelines...
Fetching 'conversions_against' from https://www.pro-football-reference.com/years/2025/opp.htm ...
✅ Saved table 'conversions_against' with 33 rows and 12 columns.
⏳ Waiting 10 seconds to respect site guidelines...
Fetching 'drive_averages' from https://www.pro-football-reference.com/years/2025/index.htm ...
✅ Saved table 'drive_averages' with 33 rows and 12 columns.
⏳ Waiting 10 seconds to respect site guidelines...
Fetching 'drive_averages_against' from https://www.pro-football-reference.com/years/2025/opp.htm ...
✅ Saved table 'drive_averages_against' with 33 rows and 12 columns.
⏳ Waiting 10 seconds to respect site guidelines...
Fetching 'kick_and_punt_returns' from https://www.pro-football-reference.com/years/2025/index.htm ...
✅ Saved table 'kick_and_punt_returns' with 35 rows and 14 

## Engineer `conversions` Tables

In [21]:
dataframes["conversions"].columns = [
    "Rk_conversions", "Tm", "G", "3DAtt", "3DConv", "3D%", 
    "4DAtt", "4DConv", "4D%", "RZAtt", "RZTD", "RZPct"
]
conversions_1 = dataframes["conversions"].reset_index(drop=True)

dataframes["conversions_against"].columns = [
    "Rk_conversions_against", "Tm", "G", "3DAtt_against", "3DConv_against", "3D%_against", 
    "4DAtt_against", "4DConv_against", "4D%_against", "RZAtt_against", "RZTD_against", "RZPct_against"
]
conversions_against_1 = dataframes["conversions_against"].reset_index(drop=True)

In [22]:
print(dataframes.keys())

dict_keys(['conversions', 'conversions_against', 'drive_averages', 'drive_averages_against', 'kick_and_punt_returns', 'kick_and_punt_returns_against', 'kicking', 'passing_offense', 'passing_defense', 'punting', 'punting_against', 'rushing_offense', 'rushing_defense', 'scoring_offense', 'scoring_defense', 'team_advanced_defense', 'team_defense_statistics', 'team_offense'])


## Engineer `drive_averages` Tables

In [23]:
dataframes["drive_averages"].columns = [
    "Rk_drive_averages", "Tm", "G", "#Dr", "Plays", "Sc%", 
    "TO%", "Avg_plays", "Avg_yards", "Avg_start", "Avg_time", "Avg_pts"
]
drive_averages_1 = dataframes["drive_averages"].reset_index(drop=True)

dataframes["drive_averages_against"].columns = [
    "Rk_drive_averages_against", "Tm", "G", "#Dr_against", "Plays_against", "Sc%_against", 
    "TO%_against", "Avg_plays_against", "Avg_yards_against", "Avg_start_against", "Avg_time_against", "Avg_pts_against"
]
drive_averages_against_1 = dataframes["drive_averages_against"].reset_index(drop=True)

## Engineer `kick_and_punt_returns` Tables

In [24]:
dataframes["kick_and_punt_returns"].columns = ["Rk_kick_and_punt_returns", "Tm", "G", "Punt_Returns", 
                                         "Punt_return_yds", "Punt_return_TD", "Punt_return_lng", "Punt_return_Y/R", "Kick_return", 
                                         "Kick_return_yds", "Kick_return_TD", "Kick_return_lng", "Kick_return_Y/Rt", "Kick_return_APYd"]

kick_and_punt_returns_1 = dataframes["kick_and_punt_returns"].reset_index(drop=True)

dataframes["kick_and_punt_returns_against"].columns = ["Rk_kick_and_punt_returns_against", "Tm", "G", "Punt_Returns_Against", 
                                         "Punt_return_yds_against", "Punt_return_TD_against", "Punt_return_Y/R_against", "Kick_return_against", 
                                         "Kick_return_yds_against", "Kick_return_TD_against", "Kick_return_Y/Rt_against"]

kick_and_punt_returns_against_1 = dataframes["kick_and_punt_returns_against"].reset_index(drop=True)

## Engineer `kicking` Table

In [26]:
display(dataframes["kicking"])

Unnamed: 0_level_0,Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0-19,0-19,20-29,20-29,30-39,30-39,40-49,...,Scoring,Scoring,Scoring,Scoring,Scoring,Kickoffs,Kickoffs,Kickoffs,Kickoffs,Kickoffs
Unnamed: 0_level_1,Rk,Tm,G,FGA,FGM,FGA,FGM,FGA,FGM,FGA,...,Lng,FG%,XPA,XPM,XP%,KO,KOYds,TB,TB%,KOAvg
0,1.0,Dallas Cowboys,7.0,,,4.0,4.0,2.0,2.0,4.0,...,64.0,100.0%,24.0,23.0,95.8%,45.0,2770.0,14.0,31.1%,61.6
1,2.0,Indianapolis Colts,7.0,,,5.0,5.0,6.0,6.0,3.0,...,52.0,93.8%,22.0,21.0,95.5%,45.0,2604.0,5.0,11.1%,57.9
2,3.0,Kansas City Chiefs,7.0,,,3.0,3.0,5.0,5.0,2.0,...,59.0,80.0%,21.0,18.0,85.7%,39.0,2338.0,6.0,15.4%,59.9
3,4.0,Los Angeles Rams,7.0,,,4.0,3.0,4.0,3.0,4.0,...,51.0,71.4%,21.0,19.0,90.5%,39.0,2344.0,14.0,35.9%,60.1
4,5.0,Seattle Seahawks,6.0,,,1.0,1.0,4.0,3.0,4.0,...,56.0,78.6%,19.0,19.0,100.0%,35.0,2179.0,6.0,17.1%,62.3
5,6.0,Detroit Lions,6.0,,,2.0,2.0,2.0,2.0,2.0,...,58.0,77.8%,24.0,24.0,100.0%,37.0,2195.0,5.0,13.5%,59.3
6,7.0,Washington Commanders,7.0,,,1.0,1.0,2.0,1.0,4.0,...,56.0,71.4%,18.0,18.0,100.0%,37.0,2108.0,7.0,18.9%,57.0
7,8.0,Tampa Bay Buccaneers,6.0,,,1.0,1.0,6.0,5.0,6.0,...,65.0,76.5%,15.0,14.0,93.3%,34.0,2067.0,1.0,2.9%,60.8
8,9.0,San Francisco 49ers,7.0,,,5.0,4.0,5.0,4.0,5.0,...,59.0,90.0%,12.0,11.0,91.7%,37.0,2257.0,8.0,21.6%,61.0
9,10.0,New Orleans Saints,7.0,,,6.0,6.0,5.0,4.0,3.0,...,54.0,70.0%,11.0,11.0,100.0%,31.0,1866.0,6.0,19.4%,60.2


In [25]:
dataframes["kicking"].columns = ["Rk_FG", "Tm", "G", "FGA", "FGM", "FG%", "XPA", "XPM", "XP%"]

kicking_1 = dataframes["kicking"].reset_index(drop=True)

ValueError: Length mismatch: Expected axis has 25 elements, new values have 9 elements

## Engineer `passing` Tables

In [None]:
dataframes["passing_offense"].columns = ["Rk_passing", "Tm", "G", 
                   "Cmp", "Passing_Att", "Cmp%", "Passing_yds", 
                   "Passing_tds", "Passing_td%", "Int_thrown", "Int%_thrown", 
                   "Passing_lng", "Y/A_passing", "AY/A_passing", "Y/C_passing", 
                   "Y/G_passing", "rate", "Sk_allowed", "SkYds_allowed", "Sk%_allowed", 
                   "NY/A_passing", "ANY/A_passing", "4QC", "GWD", "EXP_passing"]
passing_1=dataframes["passing_offense"]

dataframes["passing_defense"].columns = ["Rk_passing_defense", "Tm", "G", 
                           "Cmp_allowed", "Att_allowed_passing", "Cmp%_allowed", 
                           "Passing_yds_allowed", "Passing_TDs_allowed", 
                           "Passing_TD%_allowed", "Int", "Passes_defended", 
                           "Int%", "Y/A_passing_allowed", "AY/A_passing_allowed", 
                           "Y/C_passing_allowed", "Y/G_passing_allowed", "Rate_allowed", "Sk", 
                           "SkYds", "QBHits", "TFL", "Sk%", "NY/A_passing_allowed", "NY/A_passing_allowed", "EXP_passing_defense"]

passing_against_1 = dataframes["passing_defense"]

Unnamed: 0,Rk_passing_defense,Tm,G,Cmp_allowed,Att_allowed_passing,Cmp%_allowed,Passing_yds_allowed,Passing_TDs_allowed,Passing_TD%_allowed,Int,...,Y/G_passing_allowed,Rate_allowed,Sk,SkYds,QBHits,TFL,Sk%,NY/A_passing_allowed,NY/A_passing_allowed.1,EXP_passing_defense
0,1.0,Atlanta Falcons,6.0,92.0,156.0,59.0,847.0,7.0,4.5,6.0,...,141.2,75.1,15.0,87.0,33.0,25.0,8.8,5.0,4.2,2.59
1,2.0,Houston Texans,5.0,90.0,153.0,58.8,876.0,3.0,2.0,5.0,...,175.2,70.4,12.0,93.0,26.0,23.0,7.3,5.3,4.3,11.95
2,3.0,Buffalo Bills,6.0,106.0,165.0,64.2,1003.0,7.0,4.2,2.0,...,167.2,92.2,15.0,84.0,28.0,35.0,8.3,5.6,5.9,-4.29
3,4.0,Minnesota Vikings,6.0,109.0,161.0,67.7,1104.0,8.0,5.0,2.0,...,184.0,101.0,16.0,97.0,27.0,36.0,9.0,6.2,6.6,14.49
4,5.0,Cleveland Browns,7.0,131.0,200.0,65.5,1216.0,12.0,6.0,5.0,...,173.7,94.3,18.0,129.0,36.0,46.0,8.3,5.6,5.6,-12.15


## Engineer `punting` Tables

In [None]:
dataframes["punting"].columns = ["punting_rk", "Tm", "G", "Pnt", 
                   "Pnt_Yds_total", "Y/P", "RetYds", "Net", 
                   "NY/P", "Lng", "TB", "TB%", "In20", "In20%", "Blk_allowed"]

punting_1 = dataframes["punting"].reset_index(drop=True)

dataframes["punting_against"].columns = ["Punting_against_rk", "Tm", "G", "Pnt_against", "Pnt_Yds_against", "Y/P_against", "Blk"]

punting_against_1 = dataframes["punting_against"].reset_index(drop=True)

  punting_rk                   Tm  G Pnt Pnt_Yds_total   Y/P RetYds   Net  \
0          1     Cleveland Browns  5  29          1285  44.3    145  1080   
1          2  Philadelphia Eagles  6  30          1578  52.6    265  1253   
2          3   Cincinnati Bengals  5  27          1433  53.1    163  1150   
3          4     Tennessee Titans  5  24          1181  49.2     92  1045   
4          5    Minnesota Vikings  5  24          1179  49.1     86  1093   

   NY/P Lng TB    TB% In20  In20% Blk_allowed  
0  36.0  58  4  13.8%   11  37.9%           1  
1  41.8  70  3  10.0%    6  20.0%           0  
2  42.6  70  6  22.2%    7  25.9%           0  
3  43.5  65  2   8.3%    9  37.5%           0  
4  45.5  77  0   0.0%    6  25.0%           0  


## Engineer `rushing` Tables

In [None]:
dataframes["rushing_offense"].columns = ["Rk_rushing", "Tm", "G", "Rushing_Att", "Rushing_yds", "Rushing_tds", "Lng_rushing", 
                   "Y/A_rushing", "Y/G_rushing", "Fmb", "EXP_rushing"]

rushing_1 = dataframes["rushing_offense"]

dataframes["rushing_defense"].columns = ["Rk_rushing_allowed", "Tm", "G", "Rushing_att_allowed", 
                           "Rushing_Yds_allowed", "Rushing_Tds_allowed", "Y/A_rushing_allowed", "Y/G_rushing_allowed", "EXP_rushing_allowed"]

rushing_against_1 = dataframes["rushing_defense"]

## Engineer `scoring` Tables

In [None]:
dataframes["scoring_offense"].columns = ["Rk_scoring_offense", "Tm", "G", "RshTd", 
                           "RecTd", "PR_TD", "KR_TD", "FblTD", "IntTD", 
                           "OthTD", "AllTD", "2PM", "2PA", "D2P", "XPM", "XPA", 
                           "FGM", "FGA", "Sfty", "Pts", "Pts/G"]

scoring_offense_1 = dataframes["scoring_offense"]

dataframes["scoring_defense"].columns = ["Rk_scoring_defense", "Tm", "G", "RshTD_allowed", 
                           "RecTD_allowed", "PR_TD_allowed", "KR_TD_allowed", "FblTD_allowed", 
                           "IntTD_allowed", "OthTD_allowed", "AllTD_allowed", "2PM_allowed", "2PA_allowed", 
                           "D2P_allowed", "XPM_allowed", "XPA_allowed", "FGM_allowed", "FGA_allowed", 
                           "Sfty_allowed", "Pts_allowed", "Pts/G_allowed"]

scoring_defense_1 = dataframes["scoring_defense"]

SyntaxError: '[' was never closed (3413533781.py, line 1)

## Engineer `team_advanced_defense` Table

In [None]:
dataframes["team_advanced_defense"].columns = ["Tm", "G", "Att_allowed_passing", 
                                 "Cmp_allowed_passing", "Yds_allowed_passing", 
                                 "TD_allowed_passing", "DADOT", "Air_yards_Cmp", "YAC_allowed", 
                                 "Bltz", "Bltz%", "Hrry", "Hrry%", "QBKD", "QBKD%", "Sk", "Prss", "Prss%", "MTkl"]

team_advanced_defense_1 = dataframes["team_advanced_defense"]

Unnamed: 0,Tm,G,Att_allowed_passing,Cmp_allowed_passing,Yds_allowed_passing,TD_allowed_passing,DADOT,Air_yards_Cmp,YAC_allowed,Bltz,Bltz%,Hrry,Hrry%,QBKD,QBKD%,Sk,Prss,Prss%,MTkl
0,Atlanta Falcons,4,104,60,540,5,9.8,416,186,46,37.1%,5,4.0%,9,8.7%,10,24,19.4%,12
1,Buffalo Bills,5,133,86,770,6,6.8,450,386,40,25.2%,20,12.6%,6,4.5%,13,39,24.5%,23
2,Carolina Panthers,5,153,100,1022,7,7.8,568,472,29,17.2%,2,1.2%,6,3.9%,5,13,7.7%,26
3,Chicago Bears,4,114,84,860,10,6.8,444,442,21,17.1%,8,6.5%,12,10.5%,5,25,20.3%,34
4,Cincinnati Bengals,5,177,120,1295,12,6.4,518,832,32,16.6%,13,6.7%,11,6.2%,10,34,17.6%,49


## Engineer `team_stats` Tables

In [None]:
dataframes["team_defense_statistics"].columns = ["Rk_team_defense", "Tm", "G", "PA", 
                                   "Yds_allowed", "Ply_allowed", "Y/P_allowed", 
                                   "Takeaways", "FL_forced", "1stD_allowed", "Cmp_allowed_passing", 
                                   "Att_allowed_passing", "Yds_allowed_passing", "TDs_allowed_passing", 
                                   "Int", "NY/A_allowed_passing", "1stD_allowed_passing", "Rushing_att_allowed", 
                                   "Rushing_Yds_allowed", "Rushing_Tds_allowed", "Y/A_rushing_allowed", "1stD_allowed_rushing", 
                                   "Pen_defense", "Pen_yds_defense", "1stPy_defense", "Sc%_allowed", "TO%", "EXP_allowed"]

team_defense_statistics_1 = dataframes["team_defense_statistics"].reset_index(drop=True)

dataframes["team_offense"].columns = ["Rk_team_offense", "Tm", "G", "PF", "Yds", "Ply", 
                        "Y/P", "TO_allowed", "FL_allowed", "1stD", "Cmp_passing", 
                        "Att_passing", "Passing_Yds", "Passing_TDs", "Int_allowed", "NY/A_passing", 
                        "1stD_passing", "Rushing_Att", "Rushing_Yds", "RushingTDs", "Rushing_Y/A", 
                        "Rushing_1stD", "Pen_offense","Pen_Yds_offense", "1stPy_offense", "Sc%", "TO%_allowed", "EXP"]

team_offense_1 = dataframes["team_offense"].reset_index(drop=True)

  Rk_team_defense                  Tm  G  PA Yds_allowed Ply_allowed  \
0               1      Houston Texans  5  61        1329         281   
1               2      Denver Broncos  5  84        1443         300   
2               3   Green Bay Packers  4  84        1133         254   
3               4     Atlanta Falcons  4  86         976         208   
4               5  Indianapolis Colts  5  89        1575         300   

  Y/P_allowed Takeaways FL_forced 1stD_allowed  ... Rushing_Yds_allowed  \
0         4.7         6         1           82  ...                 453   
1         4.8         4         3           87  ...                 442   
2         4.5         2         0           72  ...                 310   
3         4.7         6         3           58  ...                 436   
4         5.3         8         2          102  ...                 490   

  Rushing_Tds_allowed Y/A_rushing_allowed 1stD_allowed_rushing Pen_defense  \
0                   5                 

## Final Join of All Tables

In [None]:
import functools
dfs = [
conversions_1, conversions_against_1, drive_averages_1,
drive_averages_against_1, kick_and_punt_returns_against_1, kick_and_punt_returns_1,
kicking_1, passing_against_1, passing_1, punting_1, punting_against_1,
rushing_against_1, rushing_1, scoring_defense_1, scoring_offense_1, 
team_advanced_defense_1, team_defense_statistics_1, team_offense_1
]

# for i, df in enumerate(dfs):
#     print(f"Dataset {i+1} G dtype:", df["G"].dtype)

# for df in dfs:
#     df["G"] = df["G"].astype(str)

for df in dfs:
    if "G" in df.columns:
        df.drop(columns=["G"], inplace=True)

merged_nfl_data = functools.reduce(
    lambda left, right: pd.merge(left, right, on="Tm", how="outer"),
    dfs
)

merged_nfl_data



Unnamed: 0,Rk_conversions,Tm,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,...,Rushing_Yds,RushingTDs,Rushing_Y/A,Rushing_1stD,Pen_offense,Pen_Yds_offense,1stPy_offense,Sc%_y,TO%_allowed,EXP
0,1.0,San Francisco 49ers,70.0,32.0,45.7%,8.0,6.0,75.0%,19.0,8.0,...,426.0,0.0,3.1,25.0,32.0,268.0,10.0,41.5,15.1,26.95
1,2.0,Los Angeles Chargers,69.0,32.0,46.4%,5.0,3.0,60.0%,13.0,5.0,...,584.0,2.0,4.9,30.0,39.0,299.0,11.0,38.0,12.0,8.65
2,3.0,New York Giants,81.0,32.0,39.5%,14.0,6.0,42.9%,22.0,9.0,...,757.0,7.0,4.2,54.0,49.0,470.0,17.0,36.1,13.1,8.46
3,4.0,Green Bay Packers,54.0,29.0,53.7%,3.0,2.0,66.7%,17.0,12.0,...,458.0,4.0,3.8,25.0,35.0,245.0,11.0,45.2,4.8,40.93
4,5.0,Miami Dolphins,60.0,28.0,46.7%,7.0,4.0,57.1%,13.0,10.0,...,411.0,2.0,4.2,19.0,34.0,241.0,2.0,39.5,11.6,23.65
5,6.0,Indianapolis Colts,58.0,27.0,46.6%,8.0,6.0,75.0%,25.0,15.0,...,641.0,11.0,4.4,39.0,39.0,301.0,14.0,63.6,6.8,79.78
6,7.0,Arizona Cardinals,68.0,27.0,39.7%,4.0,4.0,100.0%,17.0,10.0,...,591.0,4.0,4.8,26.0,41.0,273.0,9.0,36.0,10.0,12.64
7,8.0,Las Vegas Raiders,67.0,27.0,40.3%,9.0,4.0,44.4%,14.0,5.0,...,563.0,2.0,4.4,29.0,31.0,279.0,8.0,31.5,18.5,-10.45
8,9.0,Tampa Bay Buccaneers,65.0,27.0,41.5%,5.0,3.0,60.0%,17.0,8.0,...,552.0,3.0,4.0,29.0,35.0,306.0,16.0,47.2,3.8,41.03
9,10.0,Kansas City Chiefs,65.0,27.0,41.5%,10.0,9.0,90.0%,19.0,12.0,...,600.0,6.0,4.8,42.0,42.0,350.0,6.0,51.1,4.3,46.25


In [None]:
merged_nfl_data = merged_nfl_data[
    ~merged_nfl_data["Tm"].isin(["League Total", "Avg Team", "Avg Tm/G"])
]

#merged_nfl_data

Unnamed: 0,Rk_conversions,Tm,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,...,Rushing_Yds,RushingTDs,Rushing_Y/A,Rushing_1stD,Pen_offense,Pen_Yds_offense,1stPy_offense,Sc%_y,TO%_allowed,EXP
0,1,San Francisco 49ers,70,32,45.7%,8,6,75.0%,19,8,...,426,0,3.1,25,32,268,10,41.5,15.1,26.95
1,2,Los Angeles Chargers,69,32,46.4%,5,3,60.0%,13,5,...,584,2,4.9,30,39,299,11,38.0,12.0,8.65
2,3,New York Giants,81,32,39.5%,14,6,42.9%,22,9,...,757,7,4.2,54,49,470,17,36.1,13.1,8.46
3,4,Green Bay Packers,54,29,53.7%,3,2,66.7%,17,12,...,458,4,3.8,25,35,245,11,45.2,4.8,40.93
4,5,Miami Dolphins,60,28,46.7%,7,4,57.1%,13,10,...,411,2,4.2,19,34,241,2,39.5,11.6,23.65
5,6,Indianapolis Colts,58,27,46.6%,8,6,75.0%,25,15,...,641,11,4.4,39,39,301,14,63.6,6.8,79.78
6,7,Arizona Cardinals,68,27,39.7%,4,4,100.0%,17,10,...,591,4,4.8,26,41,273,9,36.0,10.0,12.64
7,8,Las Vegas Raiders,67,27,40.3%,9,4,44.4%,14,5,...,563,2,4.4,29,31,279,8,31.5,18.5,-10.45
8,9,Tampa Bay Buccaneers,65,27,41.5%,5,3,60.0%,17,8,...,552,3,4.0,29,35,306,16,47.2,3.8,41.03
9,10,Kansas City Chiefs,65,27,41.5%,10,9,90.0%,19,12,...,600,6,4.8,42,42,350,6,51.1,4.3,46.25


In [None]:
merged_nfl_data.columns[merged_nfl_data.columns.duplicated()]

Index(['NY/A_passing_allowed'], dtype='object')

In [None]:
merged_nfl_data.columns.value_counts()

NY/A_passing_allowed    2
Rk_conversions          1
Pnt_against             1
FGM_allowed             1
FGA_allowed             1
                       ..
EXP_passing_defense     1
Rk_passing              1
Cmp                     1
Passing_Att             1
EXP                     1
Name: count, Length: 255, dtype: int64

In [None]:
merged_nfl_data = merged_nfl_data.loc[:, ~merged_nfl_data.columns.duplicated()]

In [None]:
merged_nfl_data

Unnamed: 0,Rk_conversions,Tm,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,...,Rushing_Yds,RushingTDs,Rushing_Y/A,Rushing_1stD,Pen_offense,Pen_Yds_offense,1stPy_offense,Sc%_y,TO%_allowed,EXP
0,1,San Francisco 49ers,70,32,45.7%,8,6,75.0%,19,8,...,426,0,3.1,25,32,268,10,41.5,15.1,26.95
1,2,Los Angeles Chargers,69,32,46.4%,5,3,60.0%,13,5,...,584,2,4.9,30,39,299,11,38.0,12.0,8.65
2,3,New York Giants,81,32,39.5%,14,6,42.9%,22,9,...,757,7,4.2,54,49,470,17,36.1,13.1,8.46
3,4,Green Bay Packers,54,29,53.7%,3,2,66.7%,17,12,...,458,4,3.8,25,35,245,11,45.2,4.8,40.93
4,5,Miami Dolphins,60,28,46.7%,7,4,57.1%,13,10,...,411,2,4.2,19,34,241,2,39.5,11.6,23.65
5,6,Indianapolis Colts,58,27,46.6%,8,6,75.0%,25,15,...,641,11,4.4,39,39,301,14,63.6,6.8,79.78
6,7,Arizona Cardinals,68,27,39.7%,4,4,100.0%,17,10,...,591,4,4.8,26,41,273,9,36.0,10.0,12.64
7,8,Las Vegas Raiders,67,27,40.3%,9,4,44.4%,14,5,...,563,2,4.4,29,31,279,8,31.5,18.5,-10.45
8,9,Tampa Bay Buccaneers,65,27,41.5%,5,3,60.0%,17,8,...,552,3,4.0,29,35,306,16,47.2,3.8,41.03
9,10,Kansas City Chiefs,65,27,41.5%,10,9,90.0%,19,12,...,600,6,4.8,42,42,350,6,51.1,4.3,46.25


In [None]:
#merged_nfl_data.to_csv("/Users/stephenmcdevitt/DS-440-Capstone/datasets/mergeddata.csv")

In [None]:
# datasets = [
#     conversions, conversions_against,
#     drive_averages, drive_averages_against,
#     kick_and_punt_returns_against, kick_and_punt_returns,
#     kicking, passing_against, passing,
#     punting, punting_against, rushing_against,
#     rushing, scoring_defense, scoring_offense,
#     team_advanced_defense, team_defense_statistics, team_offense
# ]

# dataset_names = ["conversions", "conversions_against",
#     "drive_averages", "drive_averages_against",
#     "kick_and_punt_returns_against", "kick_and_punt_returns",
#     "kicking", "passing_against", "passing",
#     "punting", "punting_against", "rushing_against",
#     "rushing", "scoring_defense", "scoring_offense",
#     "team_advanced_defense", "team_defense_statistics", "team_offense"]

# for name, df in zip(dataset_names, datasets):
#     if "Tm" not in df.columns or "G" not in df.columns:
#         print(f"⚠️ {name} is missing one of the merge columns! Columns: {df.columns.tolist()}")




# merged_df = functools.reduce(lambda left, right: pd.merge(left, right, on=["Tm" ,"G"], how="outer"), dfs)

In [None]:
"""
import base64
import requests

# Replace these with your own values
token = "YOUR_GITHUB_ACCESS_TOKEN"
username = "YOUR_GITHUB_USERNAME"
repo = "YOUR_REPO_NAME"

# Convert DataFrames to CSV strings
csv_conversions = conversions_1.to_csv(index=False)
csv_conversions_against = conversions_against_1.to_csv(index=False)

# Helper function to upload to GitHub
def upload_to_github(filename, content, message):
    url = f"https://api.github.com/repos/{username}/{repo}/contents/{filename}"
    data = {
        "message": message,
        "content": base64.b64encode(content.encode()).decode()
    }
    headers = {"Authorization": f"token {token}"}
    response = requests.put(url, json=data, headers=headers)
    print(response.json())

# Upload both files
upload_to_github("conversions.csv", csv_conversions, "Add conversions table")
upload_to_github("conversions_against.csv", csv_conversions_against, "Add conversions_against table")
"""