In [None]:
# Dictionary with key indicating table name, list index zero is link to the table's site and list index 1 is the HTML ID of the table
master_table_dict = {
    "conversions" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_team_conversions"],
    "conversions_against" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_team_conversions"],
    "drive_averages" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_drives"],
    "drive_averages_against" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_drives"],
    "kick_and_punt_returns" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_returns"],
    "kick_and_punt_returns_against" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_returns"],
    "kicking" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_kicking"],
    "passing_offense" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_passing"],
    "passing_defense" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_passing"],
    "punting" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_punting"],
    "punting_against" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_punting"],
    "rushing_offense" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_rushing"],
    "rushing_defense" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_rushing"],
    "scoring_offense" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_team_scoring"],
    "scoring_defense" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_team_scoring"],
    "team_advanced_defense" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_advanced_defense"],
    "team_defense_statistics" : ["https://www.pro-football-reference.com/years/2025/opp.htm", "div_team_stats"],
    "team_offense" : ["https://www.pro-football-reference.com/years/2025/index.htm", "div_team_stats"]
}

## Pull All Data

In [2]:
import requests
from bs4 import BeautifulSoup, Comment
import pandas as pd
import time
from io import StringIO
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, module="pandas")

headers = {"User-Agent": "Mozilla/5.0"}
dataframes = {}

for key, (url, div_id) in master_table_dict.items():
    print(f"Fetching '{key}' from {url} ...")

    resp = requests.get(url, headers=headers)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # first try: div directly in HTML
    div = soup.find("div", id=div_id)

    # second try: div hidden inside HTML comments
    if div is None:
        comments = soup.find_all(string=lambda text: isinstance(text, Comment))
        for c in comments:
            if div_id in c:
                soup_comment = BeautifulSoup(c, "html.parser")
                div = soup_comment.find("div", id=div_id)
                break

    if div is None:
        print(f"⚠️  Skipping '{key}': no div found for {div_id}")
        continue

    table = div.find("table") if div else None
    if table is None:
        print(f"⚠️  Skipping '{key}': no table found inside div {div_id}")
        continue

    try:
        df = pd.read_html(StringIO(str(table)))[0]
        dataframes[key] = df
        print(f"✅ Saved table '{key}' with {df.shape[0]} rows and {df.shape[1]} columns.")
    except Exception as e:
        print(f"❌ Error parsing '{key}': {e}")

    # sleep 10 seconds between requests
    print("⏳ Waiting 10 seconds to respect site guidelines...")
    time.sleep(10)

print("\n✅ Finished fetching all tables.")

Fetching 'conversions' from https://www.pro-football-reference.com/years/2025/index.htm ...
✅ Saved table 'conversions' with 33 rows and 12 columns.
⏳ Waiting 10 seconds to respect site guidelines...
Fetching 'conversions_against' from https://www.pro-football-reference.com/years/2025/opp.htm ...
✅ Saved table 'conversions_against' with 33 rows and 12 columns.
⏳ Waiting 10 seconds to respect site guidelines...
Fetching 'drive_averages' from https://www.pro-football-reference.com/years/2025/index.htm ...
✅ Saved table 'drive_averages' with 33 rows and 12 columns.
⏳ Waiting 10 seconds to respect site guidelines...
Fetching 'drive_averages_against' from https://www.pro-football-reference.com/years/2025/opp.htm ...
✅ Saved table 'drive_averages_against' with 33 rows and 12 columns.
⏳ Waiting 10 seconds to respect site guidelines...
Fetching 'kick_and_punt_returns' from https://www.pro-football-reference.com/years/2025/index.htm ...
✅ Saved table 'kick_and_punt_returns' with 35 rows and 14 

## Engineer `conversions` Tables

In [None]:
dataframes["conversions"].columns = [
    "Rk_conversions", "Tm", "G", "3DAtt", "3DConv", "3D%", 
    "4DAtt", "4DConv", "4D%", "RZAtt", "RZTD", "RZPct"
]
conversions_1 = dataframes["conversions"].reset_index(drop=True)

dataframes["conversions_against"].columns = [
    "Rk_conversions_against", "Tm", "G", "3DAtt_against", "3DConv_against", "3D%_against", 
    "4DAtt_against", "4DConv_against", "4D%_against", "RZAtt_against", "RZTD_against", "RZPct_against"
]
conversions_against_1 = dataframes["conversions_against"].reset_index(drop=True)

Unnamed: 0,Rk_conversions,Tm,G,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct
0,1.0,San Francisco 49ers,7.0,101.0,48.0,47.5%,10.0,6.0,60.0%,23.0,11.0,47.8%
1,2.0,Los Angeles Chargers,7.0,100.0,46.0,46.0%,10.0,7.0,70.0%,24.0,10.0,41.7%
2,3.0,Arizona Cardinals,7.0,96.0,43.0,44.8%,7.0,4.0,57.1%,27.0,15.0,55.6%
3,4.0,Kansas City Chiefs,7.0,90.0,40.0,44.4%,15.0,12.0,80.0%,30.0,20.0,66.7%
4,5.0,New York Giants,7.0,96.0,38.0,39.6%,16.0,8.0,50.0%,24.0,11.0,45.8%


In [12]:
print(dataframes.keys())

dict_keys(['conversions', 'conversions_against', 'drive_averages', 'drive_averages_against', 'kick_and_punt_returns', 'kick_and_punt_returns_against', 'kicking', 'kicking_against', 'passing_offense', 'passing_defense', 'punting', 'punting_against', 'rushing_offense', 'rushing_defense', 'scoring_offense', 'scoring_defense', 'team_advanced_defense', 'team_defense_statistics', 'team_offense'])


## Engineer `drive_averages` Tables

In [None]:
dataframes["drive_averages"].columns = [
    "Rk_drive_averages", "Tm", "G", "#Dr", "Plays", "Sc%", 
    "TO%", "Avg_plays", "Avg_yards", "Avg_start", "Avg_time", "Avg_pts"
]
drive_averages_1 = dataframes["drive_averages"].reset_index(drop=True)

dataframes["drive_averages_against"].columns = [
    "Rk_drive_averages_against", "Tm", "G", "#Dr_against", "Plays_against", "Sc%_against", 
    "TO%_against", "Avg_plays_against", "Avg_yards_against", "Avg_start_against", "Avg_time_against", "Avg_pts_against"
]
drive_averages_against_1 = dataframes["drive_averages_against"].reset_index(drop=True)

   Rk_drive_averages                    Tm    G  #Dr  Plays   Sc%   TO%  \
0                1.0        Denver Broncos  7.0   80    466  35.0   7.5   
1                2.0      Cleveland Browns  7.0   78    472  26.9  10.3   
2                3.0  Jacksonville Jaguars  7.0   78    472  32.1   7.7   
3                4.0      Tennessee Titans  7.0   77    425  28.6  15.6   
4                5.0         New York Jets  7.0   75    433  33.3  10.7   

   Avg_plays  Avg_yards Avg_start Avg_time  Avg_pts  
0        5.8       30.4  Own 30.2     2:42     2.00  
1        6.1       24.3  Own 31.5     2:46     1.37  
2        6.1       30.1  Own 29.0     2:47     1.73  
3        5.5       21.4  Own 29.4     2:27     1.17  
4        5.8       25.4  Own 29.1     2:41     1.59  


## Engineer `kick_and_punt_returns` Tables

In [None]:
dataframes["kick_and_punt_returns"].columns = ["Rk_kick_and_punt_returns", "Tm", "G", "Punt_Returns", 
                                         "Punt_return_yds", "Punt_return_TD", "Punt_return_lng", "Punt_return_Y/R", "Kick_return", 
                                         "Kick_return_yds", "Kick_return_TD", "Kick_return_lng", "Kick_return_Y/Rt", "Kick_return_APYd"]

kick_and_punt_returns_1 = dataframes["kick_and_punt_returns"].reset_index(drop=True)

dataframes["kick_and_punt_returns_against"].columns = ["Rk_kick_and_punt_returns_against", "Tm", "G", "Punt_Returns_Against", 
                                         "Punt_return_yds_against", "Punt_return_TD_against", "Punt_return_Y/R_against", "Kick_return_against", 
                                         "Kick_return_yds_against", "Kick_return_TD_against", "Kick_return_Y/Rt_against"]

kick_and_punt_returns_against_1 = dataframes["kick_and_punt_returns_against"].reset_index(drop=True)

  Rk_kick_and_punt_returns_against                    Tm  G  \
0                                1   Philadelphia Eagles  6   
1                                2       New York Giants  6   
2                                3         Detroit Lions  5   
3                                4         Buffalo Bills  5   
4                                5  Tampa Bay Buccaneers  5   

  Punt_Returns_Against Punt_return_yds_against Punt_return_TD_against  \
0                   19                     265                      0   
1                   10                      47                      0   
2                   10                      85                      0   
3                    6                      49                      0   
4                    6                      99                      0   

  Punt_return_Y/R_against Kick_return_against Kick_return_yds_against  \
0                    13.9                  27                     593   
1                     4.7           

## Engineer `kicking` Tables

In [None]:
dataframes["kicking"].columns = ["Rk_FG", "Tm", "G", "FGA", "FGM", "FG%", "XPA", "XPM", "XP%"]

kicking_1 = dataframes["kicking"].reset_index(drop=True)

  Rk_FG                   Tm  G FGA FGM    FG% XPA XPM     XP%
0     1   Cincinnati Bengals  5   6   5  83.3%  20  19   95.0%
1     2     Cleveland Browns  5   8   6  75.0%  15  15  100.0%
2     3        New York Jets  5  14  13  92.9%  17  16   94.1%
3     4       Miami Dolphins  5  12  11  91.7%  14  12   85.7%
4     5  Pittsburgh Steelers  4   6   5  83.3%   9   9  100.0%


## Engineer `passing` Tables

In [109]:
passing_against.columns = ["Rk_passing_defense", "Tm", "G", 
                           "Cmp_allowed", "Att_allowed_passing", "Cmp%_allowed", 
                           "Passing_yds_allowed", "Passing_TDs_allowed", 
                           "Passing_TD%_allowed", "Int", "Passes_defended", 
                           "Int%", "Y/A_passing_allowed", "AY/A_passing_allowed", 
                           "Y/C_passing_allowed", "Y/G_passing_allowed", "Rate_allowed", "Sk", 
                           "SkYds", "QBHits", "TFL", "Sk%", "NY/A_passing_allowed", "NY/A_passing_allowed", "EXP_passing_defense"]

passing_against_1 = passing_against
passing_against_1.head()

Unnamed: 0,Rk_passing_defense,Tm,G,Cmp_allowed,Att_allowed_passing,Cmp%_allowed,Passing_yds_allowed,Passing_TDs_allowed,Passing_TD%_allowed,Int,...,Y/G_passing_allowed,Rate_allowed,Sk,SkYds,QBHits,TFL,Sk%,NY/A_passing_allowed,NY/A_passing_allowed.1,EXP_passing_defense
0,1,Atlanta Falcons,4,60,104,57.7,540,5,4.8,3.0,...,135.0,78.3,10,62,23,15,8.8,4.7,4.4,0.24
1,2,Buffalo Bills,5,86,133,64.7,770,6,4.5,2.0,...,154.0,91.0,13,67,24,26,8.9,5.3,5.5,-0.83
2,3,Minnesota Vikings,5,90,138,65.2,788,5,3.6,2.0,...,157.6,88.9,13,87,23,28,8.6,5.2,5.3,35.46
3,4,Green Bay Packers,4,107,157,68.2,823,6,3.8,2.0,...,205.8,89.8,11,63,35,18,6.5,4.9,5.1,-15.1
4,5,Chicago Bears,4,84,114,73.7,860,10,8.8,7.0,...,215.0,99.5,5,26,17,17,4.2,7.2,6.3,-21.75


In [110]:
passing.columns = ["Rk_passing", "Tm", "G", 
                   "Cmp", "Passing_Att", "Cmp%", "Passing_yds", 
                   "Passing_tds", "Passing_td%", "Int_thrown", "Int%_thrown", 
                   "Passing_lng", "Y/A_passing", "AY/A_passing", "Y/C_passing", 
                   "Y/G_passing", "rate", "Sk_allowed", "SkYds_allowed", "Sk%_allowed", 
                   "NY/A_passing", "ANY/A_passing", "4QC", "GWD", "EXP_passing"]
passing_1=passing
passing_1.head()

Unnamed: 0,Rk_passing,Tm,G,Cmp,Passing_Att,Cmp%,Passing_yds,Passing_tds,Passing_td%,Int_thrown,...,Y/G_passing,rate,Sk_allowed,SkYds_allowed,Sk%_allowed,NY/A_passing,ANY/A_passing,4QC,GWD,EXP_passing
0,1,San Francisco 49ers,5,134,202,66.3,1453,10,5.0,5,...,290.6,94.3,7,38,3.3,7.0,6.8,2.0,3.0,51.23
1,2,Los Angeles Rams,5,122,184,66.3,1448,11,6.0,2,...,289.6,106.8,9,55,4.7,7.5,8.2,1.0,1.0,52.87
2,3,Dallas Cowboys,5,142,200,71.0,1361,10,5.0,4,...,272.2,98.7,7,36,3.4,6.6,6.7,2.0,1.0,59.43
3,4,Indianapolis Colts,5,108,153,70.6,1265,6,3.9,2,...,253.0,103.9,4,34,2.5,8.1,8.2,1.0,1.0,65.02
4,5,Seattle Seahawks,5,100,137,73.0,1227,9,6.6,3,...,245.4,114.0,6,34,4.2,8.6,8.9,0.0,2.0,59.66


In [111]:
punting.columns = ["punting_rk", "Tm", "G", "Pnt", 
                   "Pnt_Yds_total", "Y/P", "RetYds", "Net", 
                   "NY/P", "Lng", "TB", "TB%", "In20", "In20%", "Blk_allowed"]

punting_1 = punting.drop(index=0).reset_index(drop=True)
print(punting_1.head())

  punting_rk                   Tm  G Pnt Pnt_Yds_total   Y/P RetYds   Net  \
0          1     Cleveland Browns  5  29          1285  44.3    145  1080   
1          2  Philadelphia Eagles  6  30          1578  52.6    265  1253   
2          3   Cincinnati Bengals  5  27          1433  53.1    163  1150   
3          4     Tennessee Titans  5  24          1181  49.2     92  1045   
4          5    Minnesota Vikings  5  24          1179  49.1     86  1093   

   NY/P Lng TB    TB% In20  In20% Blk_allowed  
0  36.0  58  4  13.8%   11  37.9%           1  
1  41.8  70  3  10.0%    6  20.0%           0  
2  42.6  70  6  22.2%    7  25.9%           0  
3  43.5  65  2   8.3%    9  37.5%           0  
4  45.5  77  0   0.0%    6  25.0%           0  


In [112]:
punting_against.columns = ["Punting_against_rk", "Tm", "G", "Pnt_against", "Pnt_Yds_against", "Y/P_against", "Blk"]

punting_against_1 = punting_against.drop(index=0).reset_index(drop=True)
print(punting_against_1.head())

  Punting_against_rk                  Tm  G Pnt_against Pnt_Yds_against  \
0                  1  Indianapolis Colts  5          11             469   
1                  2     Atlanta Falcons  4          13             633   
2                  3       Buffalo Bills  5          21            1025   
3                  4  Kansas City Chiefs  5          12             596   
4                  5   Green Bay Packers  4          17             810   

  Y/P_against Blk  
0        42.6   1  
1        48.7   0  
2        48.8   0  
3        49.7   0  
4        47.6   0  


In [113]:
rushing_against.columns = ["Rk_rushing_allowed", "Tm", "G", "Rushing_att_allowed", 
                           "Rushing_Yds_allowed", "Rushing_Tds_allowed", "Y/A_rushing_allowed", "Y/G_rushing_allowed", "EXP_rushing_allowed"]

rushing_against_1 = rushing_against

In [114]:
rushing.columns = ["Rk_rushing", "Tm", "G", "Rushing_Att", "Rushing_yds", "Rushing_tds", "Lng_rushing", 
                   "Y/A_rushing", "Y/G_rushing", "Fmb", "EXP_rushing"]

rushing_1 = rushing
rushing_1.head()



Unnamed: 0,Rk_rushing,Tm,G,Rushing_Att,Rushing_yds,Rushing_tds,Lng_rushing,Y/A_rushing,Y/G_rushing,Fmb,EXP_rushing
0,1,Washington Commanders,5,133,782,7,60,5.9,156.4,6,21.43
1,2,Buffalo Bills,5,162,772,9,44,4.8,154.4,6,16.44
2,3,New York Giants,6,181,757,7,24,4.2,126.2,7,7.66
3,4,New York Jets,5,133,722,4,43,5.4,144.4,9,2.6
4,5,Denver Broncos,5,142,703,6,50,5.0,140.6,2,0.88


In [115]:
scoring_defense.columns = ["Rk_scoring_defense", "Tm", "G", "RshTD_allowed", 
                           "RecTD_allowed", "PR_TD_allowed", "KR_TD_allowed", "FblTD_allowed", 
                           "IntTD_allowed", "OthTD_allowed", "AllTD_allowed", "2PM_allowed", "2PA_allowed", 
                           "D2P_allowed", "XPM_allowed", "XPA_allowed", "FGM_allowed", "FGA_allowed", 
                           "Sfty_allowed", "Pts_allowed", "Pts/G_allowed"]

scoring_defense_1 = scoring_defense
scoring_defense_1.head()

Unnamed: 0,Rk_scoring_defense,Tm,G,RshTD_allowed,RecTD_allowed,PR_TD_allowed,KR_TD_allowed,FblTD_allowed,IntTD_allowed,OthTD_allowed,...,2PM_allowed,2PA_allowed,D2P_allowed,XPM_allowed,XPA_allowed,FGM_allowed,FGA_allowed,Sfty_allowed,Pts_allowed,Pts/G_allowed
0,1,Houston Texans,5,5,3,,,,,,...,,1,,7,7,2,7,,61,12.2
1,2,Denver Broncos,5,2,4,,,,,,...,,0,,6,6,14,14,,84,16.8
2,3,Indianapolis Colts,5,2,8,,,,,,...,1.0,1,,9,9,6,10,,89,17.8
3,4,Arizona Cardinals,5,3,5,,,1.0,,,...,,2,,6,7,12,14,,96,19.2
4,5,Minnesota Vikings,5,4,5,,,,1.0,,...,,0,,10,10,9,11,,97,19.4


In [116]:
scoring_offense.columns = ["Rk_scoring_offense", "Tm", "G", "RshTd", 
                           "RecTd", "PR_TD", "KR_TD", "FblTD", "IntTD", 
                           "OthTD", "AllTD", "2PM", "2PA", "D2P", "XPM", "XPA", 
                           "FGM", "FGA", "Sfty", "Pts", "Pts/G"]

scoring_offense_1 = scoring_offense
scoring_offense_1.head()

Unnamed: 0,Rk_scoring_offense,Tm,G,RshTd,RecTd,PR_TD,KR_TD,FblTD,IntTD,OthTD,...,2PM,2PA,D2P,XPM,XPA,FGM,FGA,Sfty,Pts,Pts/G
0,1.0,Houston Texans,5.0,3.0,8.0,,,,,,...,,2.0,,9.0,9.0,11.0,13.0,,108.0,21.6
1,2.0,Denver Broncos,5.0,6.0,8.0,,,,,,...,1.0,1.0,,13.0,13.0,6.0,7.0,,117.0,23.4
2,3.0,Indianapolis Colts,5.0,11.0,6.0,,,,1.0,,...,1.0,4.0,,14.0,14.0,13.0,14.0,,163.0,32.6
3,4.0,Arizona Cardinals,5.0,4.0,6.0,,,1.0,,,...,,0.0,,11.0,11.0,8.0,10.0,1.0,103.0,20.6
4,5.0,Minnesota Vikings,5.0,4.0,8.0,,,1.0,1.0,,...,2.0,3.0,,11.0,11.0,8.0,9.0,,123.0,24.6


In [117]:
team_advanced_defense.columns = ["Tm", "G", "Att_allowed_passing", 
                                 "Cmp_allowed_passing", "Yds_allowed_passing", 
                                 "TD_allowed_passing", "DADOT", "Air_yards_Cmp", "YAC_allowed", 
                                 "Bltz", "Bltz%", "Hrry", "Hrry%", "QBKD", "QBKD%", "Sk", "Prss", "Prss%", "MTkl"]

team_advanced_defense_1 = team_advanced_defense
team_advanced_defense_1.head()

Unnamed: 0,Tm,G,Att_allowed_passing,Cmp_allowed_passing,Yds_allowed_passing,TD_allowed_passing,DADOT,Air_yards_Cmp,YAC_allowed,Bltz,Bltz%,Hrry,Hrry%,QBKD,QBKD%,Sk,Prss,Prss%,MTkl
0,Atlanta Falcons,4,104,60,540,5,9.8,416,186,46,37.1%,5,4.0%,9,8.7%,10,24,19.4%,12
1,Buffalo Bills,5,133,86,770,6,6.8,450,386,40,25.2%,20,12.6%,6,4.5%,13,39,24.5%,23
2,Carolina Panthers,5,153,100,1022,7,7.8,568,472,29,17.2%,2,1.2%,6,3.9%,5,13,7.7%,26
3,Chicago Bears,4,114,84,860,10,6.8,444,442,21,17.1%,8,6.5%,12,10.5%,5,25,20.3%,34
4,Cincinnati Bengals,5,177,120,1295,12,6.4,518,832,32,16.6%,13,6.7%,11,6.2%,10,34,17.6%,49


In [118]:
team_defense_statistics.columns = ["Rk_team_defense", "Tm", "G", "PA", 
                                   "Yds_allowed", "Ply_allowed", "Y/P_allowed", 
                                   "Takeaways", "FL_forced", "1stD_allowed", "Cmp_allowed_passing", 
                                   "Att_allowed_passing", "Yds_allowed_passing", "TDs_allowed_passing", 
                                   "Int", "NY/A_allowed_passing", "1stD_allowed_passing", "Rushing_att_allowed", 
                                   "Rushing_Yds_allowed", "Rushing_Tds_allowed", "Y/A_rushing_allowed", "1stD_allowed_rushing", 
                                   "Pen_defense", "Pen_yds_defense", "1stPy_defense", "Sc%_allowed", "TO%", "EXP_allowed"]

team_defense_statistics_1 = team_defense_statistics.drop(index=0).reset_index(drop=True)
print(team_defense_statistics_1.head())

  Rk_team_defense                  Tm  G  PA Yds_allowed Ply_allowed  \
0               1      Houston Texans  5  61        1329         281   
1               2      Denver Broncos  5  84        1443         300   
2               3   Green Bay Packers  4  84        1133         254   
3               4     Atlanta Falcons  4  86         976         208   
4               5  Indianapolis Colts  5  89        1575         300   

  Y/P_allowed Takeaways FL_forced 1stD_allowed  ... Rushing_Yds_allowed  \
0         4.7         6         1           82  ...                 453   
1         4.8         4         3           87  ...                 442   
2         4.5         2         0           72  ...                 310   
3         4.7         6         3           58  ...                 436   
4         5.3         8         2          102  ...                 490   

  Rushing_Tds_allowed Y/A_rushing_allowed 1stD_allowed_rushing Pen_defense  \
0                   5                 

In [119]:
team_offense.columns = ["Rk_team_offense", "Tm", "G", "PF", "Yds", "Ply", 
                        "Y/P", "TO_allowed", "FL_allowed", "1stD", "Cmp_passing", 
                        "Att_passing", "Passing_Yds", "Passing_TDs", "Int_allowed", "NY/A_passing", 
                        "1stD_passing", "Rushing_Att", "Rushing_Yds", "RushingTDs", "Rushing_Y/A", 
                        "Rushing_1stD", "Pen_offense","Pen_Yds_offense", "1stPy_offense", "Sc%", "TO%_allowed", "EXP"]

team_offense_1 = team_offense.drop(index=0).reset_index(drop=True)
print(team_offense_1.head())

  Rk_team_offense                  Tm  G   PF   Yds  Ply  Y/P TO_allowed  \
0               1       Detroit Lions  5  174  1825  306    6          3   
1               2  Indianapolis Colts  5  163  1906  303  6.3          3   
2               3       Buffalo Bills  5  153  1979  324  6.1          4   
3               4      Dallas Cowboys  5  151  2033  332  6.1          6   
4               5    Seattle Seahawks  5  146  1792  286  6.3          8   

  FL_allowed 1stD  ... Rushing_Yds RushingTDs Rushing_Y/A Rushing_1stD  \
0          1  104  ...         674          8         4.4           37   
1          1  116  ...         641         11         4.4           39   
2          2  122  ...         772          9         4.8           43   
3          2  125  ...         672          7         5.4           38   
4          5  104  ...         565          6           4           33   

  Pen_offense Pen_Yds_offense 1stPy_offense   Sc% TO%_allowed    EXP  
0          32             2

In [128]:
dfs = [
conversions_1, conversions_against_1, drive_averages_1,
drive_averages_against_1, kick_and_punt_returns_against_1, kick_and_punt_returns_1,
kicking_1, passing_against_1, passing_1, punting_1, punting_against_1,
rushing_against_1, rushing_1, scoring_defense_1, scoring_offense_1, 
team_advanced_defense_1, team_defense_statistics_1, team_offense_1
]

# for i, df in enumerate(dfs):
#     print(f"Dataset {i+1} G dtype:", df["G"].dtype)

# for df in dfs:
#     df["G"] = df["G"].astype(str)

for df in dfs:
    if "G" in df.columns:
        df.drop(columns=["G"], inplace=True)

merged_nfl_data = functools.reduce(
    lambda left, right: pd.merge(left, right, on="Tm", how="outer"),
    dfs
)

merged_nfl_data



Unnamed: 0,Rk_conversions,Tm,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,...,Rushing_Yds,RushingTDs,Rushing_Y/A,Rushing_1stD,Pen_offense,Pen_Yds_offense,1stPy_offense,Sc%_y,TO%_allowed,EXP
0,1.0,San Francisco 49ers,70.0,32.0,45.7%,8.0,6.0,75.0%,19.0,8.0,...,426.0,0.0,3.1,25.0,32.0,268.0,10.0,41.5,15.1,26.95
1,2.0,Los Angeles Chargers,69.0,32.0,46.4%,5.0,3.0,60.0%,13.0,5.0,...,584.0,2.0,4.9,30.0,39.0,299.0,11.0,38.0,12.0,8.65
2,3.0,New York Giants,81.0,32.0,39.5%,14.0,6.0,42.9%,22.0,9.0,...,757.0,7.0,4.2,54.0,49.0,470.0,17.0,36.1,13.1,8.46
3,4.0,Green Bay Packers,54.0,29.0,53.7%,3.0,2.0,66.7%,17.0,12.0,...,458.0,4.0,3.8,25.0,35.0,245.0,11.0,45.2,4.8,40.93
4,5.0,Miami Dolphins,60.0,28.0,46.7%,7.0,4.0,57.1%,13.0,10.0,...,411.0,2.0,4.2,19.0,34.0,241.0,2.0,39.5,11.6,23.65
5,6.0,Indianapolis Colts,58.0,27.0,46.6%,8.0,6.0,75.0%,25.0,15.0,...,641.0,11.0,4.4,39.0,39.0,301.0,14.0,63.6,6.8,79.78
6,7.0,Arizona Cardinals,68.0,27.0,39.7%,4.0,4.0,100.0%,17.0,10.0,...,591.0,4.0,4.8,26.0,41.0,273.0,9.0,36.0,10.0,12.64
7,8.0,Las Vegas Raiders,67.0,27.0,40.3%,9.0,4.0,44.4%,14.0,5.0,...,563.0,2.0,4.4,29.0,31.0,279.0,8.0,31.5,18.5,-10.45
8,9.0,Tampa Bay Buccaneers,65.0,27.0,41.5%,5.0,3.0,60.0%,17.0,8.0,...,552.0,3.0,4.0,29.0,35.0,306.0,16.0,47.2,3.8,41.03
9,10.0,Kansas City Chiefs,65.0,27.0,41.5%,10.0,9.0,90.0%,19.0,12.0,...,600.0,6.0,4.8,42.0,42.0,350.0,6.0,51.1,4.3,46.25


In [130]:
merged_nfl_data = merged_nfl_data[
    ~merged_nfl_data["Tm"].isin(["League Total", "Avg Team", "Avg Tm/G"])
]

merged_nfl_data

Unnamed: 0,Rk_conversions,Tm,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,...,Rushing_Yds,RushingTDs,Rushing_Y/A,Rushing_1stD,Pen_offense,Pen_Yds_offense,1stPy_offense,Sc%_y,TO%_allowed,EXP
0,1,San Francisco 49ers,70,32,45.7%,8,6,75.0%,19,8,...,426,0,3.1,25,32,268,10,41.5,15.1,26.95
1,2,Los Angeles Chargers,69,32,46.4%,5,3,60.0%,13,5,...,584,2,4.9,30,39,299,11,38.0,12.0,8.65
2,3,New York Giants,81,32,39.5%,14,6,42.9%,22,9,...,757,7,4.2,54,49,470,17,36.1,13.1,8.46
3,4,Green Bay Packers,54,29,53.7%,3,2,66.7%,17,12,...,458,4,3.8,25,35,245,11,45.2,4.8,40.93
4,5,Miami Dolphins,60,28,46.7%,7,4,57.1%,13,10,...,411,2,4.2,19,34,241,2,39.5,11.6,23.65
5,6,Indianapolis Colts,58,27,46.6%,8,6,75.0%,25,15,...,641,11,4.4,39,39,301,14,63.6,6.8,79.78
6,7,Arizona Cardinals,68,27,39.7%,4,4,100.0%,17,10,...,591,4,4.8,26,41,273,9,36.0,10.0,12.64
7,8,Las Vegas Raiders,67,27,40.3%,9,4,44.4%,14,5,...,563,2,4.4,29,31,279,8,31.5,18.5,-10.45
8,9,Tampa Bay Buccaneers,65,27,41.5%,5,3,60.0%,17,8,...,552,3,4.0,29,35,306,16,47.2,3.8,41.03
9,10,Kansas City Chiefs,65,27,41.5%,10,9,90.0%,19,12,...,600,6,4.8,42,42,350,6,51.1,4.3,46.25


In [134]:
merged_nfl_data.columns[merged_nfl_data.columns.duplicated()]

Index(['NY/A_passing_allowed'], dtype='object')

In [135]:
merged_nfl_data.columns.value_counts()

NY/A_passing_allowed    2
Rk_conversions          1
Pnt_against             1
FGM_allowed             1
FGA_allowed             1
                       ..
EXP_passing_defense     1
Rk_passing              1
Cmp                     1
Passing_Att             1
EXP                     1
Name: count, Length: 255, dtype: int64

In [136]:
merged_nfl_data = merged_nfl_data.loc[:, ~merged_nfl_data.columns.duplicated()]

In [137]:
merged_nfl_data

Unnamed: 0,Rk_conversions,Tm,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,...,Rushing_Yds,RushingTDs,Rushing_Y/A,Rushing_1stD,Pen_offense,Pen_Yds_offense,1stPy_offense,Sc%_y,TO%_allowed,EXP
0,1,San Francisco 49ers,70,32,45.7%,8,6,75.0%,19,8,...,426,0,3.1,25,32,268,10,41.5,15.1,26.95
1,2,Los Angeles Chargers,69,32,46.4%,5,3,60.0%,13,5,...,584,2,4.9,30,39,299,11,38.0,12.0,8.65
2,3,New York Giants,81,32,39.5%,14,6,42.9%,22,9,...,757,7,4.2,54,49,470,17,36.1,13.1,8.46
3,4,Green Bay Packers,54,29,53.7%,3,2,66.7%,17,12,...,458,4,3.8,25,35,245,11,45.2,4.8,40.93
4,5,Miami Dolphins,60,28,46.7%,7,4,57.1%,13,10,...,411,2,4.2,19,34,241,2,39.5,11.6,23.65
5,6,Indianapolis Colts,58,27,46.6%,8,6,75.0%,25,15,...,641,11,4.4,39,39,301,14,63.6,6.8,79.78
6,7,Arizona Cardinals,68,27,39.7%,4,4,100.0%,17,10,...,591,4,4.8,26,41,273,9,36.0,10.0,12.64
7,8,Las Vegas Raiders,67,27,40.3%,9,4,44.4%,14,5,...,563,2,4.4,29,31,279,8,31.5,18.5,-10.45
8,9,Tampa Bay Buccaneers,65,27,41.5%,5,3,60.0%,17,8,...,552,3,4.0,29,35,306,16,47.2,3.8,41.03
9,10,Kansas City Chiefs,65,27,41.5%,10,9,90.0%,19,12,...,600,6,4.8,42,42,350,6,51.1,4.3,46.25


In [None]:
merged_nfl_data.to_csv("/Users/stephenmcdevitt/DS-440-Capstone/datasets/mergeddata.csv")

In [100]:
# datasets = [
#     conversions, conversions_against,
#     drive_averages, drive_averages_against,
#     kick_and_punt_returns_against, kick_and_punt_returns,
#     kicking, passing_against, passing,
#     punting, punting_against, rushing_against,
#     rushing, scoring_defense, scoring_offense,
#     team_advanced_defense, team_defense_statistics, team_offense
# ]

# dataset_names = ["conversions", "conversions_against",
#     "drive_averages", "drive_averages_against",
#     "kick_and_punt_returns_against", "kick_and_punt_returns",
#     "kicking", "passing_against", "passing",
#     "punting", "punting_against", "rushing_against",
#     "rushing", "scoring_defense", "scoring_offense",
#     "team_advanced_defense", "team_defense_statistics", "team_offense"]

# for name, df in zip(dataset_names, datasets):
#     if "Tm" not in df.columns or "G" not in df.columns:
#         print(f"⚠️ {name} is missing one of the merge columns! Columns: {df.columns.tolist()}")




# merged_df = functools.reduce(lambda left, right: pd.merge(left, right, on=["Tm" ,"G"], how="outer"), dfs)

In [None]:
"""
import base64
import requests

# Replace these with your own values
token = "YOUR_GITHUB_ACCESS_TOKEN"
username = "YOUR_GITHUB_USERNAME"
repo = "YOUR_REPO_NAME"

# Convert DataFrames to CSV strings
csv_conversions = conversions_1.to_csv(index=False)
csv_conversions_against = conversions_against_1.to_csv(index=False)

# Helper function to upload to GitHub
def upload_to_github(filename, content, message):
    url = f"https://api.github.com/repos/{username}/{repo}/contents/{filename}"
    data = {
        "message": message,
        "content": base64.b64encode(content.encode()).decode()
    }
    headers = {"Authorization": f"token {token}"}
    response = requests.put(url, json=data, headers=headers)
    print(response.json())

# Upload both files
upload_to_github("conversions.csv", csv_conversions, "Add conversions table")
upload_to_github("conversions_against.csv", csv_conversions_against, "Add conversions_against table")
"""