In [7]:
import pandas as pd
import ast

fpi_path = "../Data/Raw/2025/ratings/fpi.csv"
df_fpi = pd.read_csv(fpi_path)

for col in ["resumeRanks", "efficiencies"]:
    df_fpi[col] = df_fpi[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) and x.startswith("{") else {})

resume_df = df_fpi["resumeRanks"].apply(pd.Series).add_prefix("resume_")
eff_df = df_fpi["efficiencies"].apply(pd.Series).add_prefix("eff_")

df_fpi = pd.concat([df_fpi.drop(columns=["resumeRanks", "efficiencies"]), resume_df, eff_df], axis=1)

save_path = "../Data/Processed/2025/ratings/fpi_cleaned.csv"
df_fpi.to_csv(save_path, index=False)
print(f"Cleaned FPI saved to {save_path}")
df_fpi.head()

Cleaned FPI saved to ../Data/Processed/2025/ratings/fpi_cleaned.csv


Unnamed: 0,year,team,conference,fpi,resume_gameControl,resume_remainingStrengthOfSchedule,resume_strengthOfSchedule,resume_averageWinProbability,resume_fpi,resume_strengthOfRecord,eff_specialTeams,eff_defense,eff_offense,eff_overall
0,2025,Notre Dame,FBS Independents,21.993,9,67,11,23,4,28,58.07,73.484,87.213,87.095
1,2025,Oklahoma State,Big 12,-13.542,126,36,36,129,117,122,59.277,19.421,18.323,15.08
2,2025,Kansas State,Big 12,7.722,53,30,54,62,43,83,45.81,62.94,51.688,58.792
3,2025,Baylor,Big 12,6.115,77,47,65,84,46,42,53.436,37.201,71.785,56.97
4,2025,Iowa State,Big 12,9.15,50,58,51,63,34,35,50.147,62.4,64.732,66.761


In [8]:
from src.data_cleaning import flatten_nested_column

year = 2025
df_ppa = pd.read_csv(f"../Data/Raw/{year}/ratings/ppa_team.csv")

# Flatten both offense and defense columns
df_ppa = flatten_nested_column(df_ppa, "offense", prefix="offense")
df_ppa = flatten_nested_column(df_ppa, "defense", prefix="defense")

df_ppa.to_csv(f"../Data/Processed/{year}/ratings/ppa_team_cleaned.csv", index=False)
print("Cleaned PPA data saved")
df_ppa.head()

Cleaned PPA data saved


Unnamed: 0,season,conference,team,offense_cumulative_rushing,offense_cumulative_passing,offense_cumulative_total,offense_thirdDown,offense_secondDown,offense_firstDown,offense_rushing,...,offense_overall,defense_cumulative_rushing,defense_cumulative_passing,defense_cumulative_total,defense_thirdDown,defense_secondDown,defense_firstDown,defense_rushing,defense_passing,defense_overall
0,2025,Mountain West,Air Force,97.2,69.95,167.1,0.74,0.42,0.22,0.32,...,0.41,53.5,1261.1,179.6,1.0,0.56,0.23,0.31,0.67,0.5
1,2025,Mid-American,Akron,-2.8,29.05,20.0,0.25,0.01,-0.08,-0.01,...,0.04,46.2,700.2,116.3,0.57,0.43,0.05,0.19,0.3,0.24
2,2025,SEC,Alabama,12.2,124.91,137.2,0.76,0.22,0.15,0.06,...,0.33,25.9,207.6,46.0,0.49,0.23,-0.12,0.14,0.15,0.14
3,2025,Sun Belt,App State,20.9,18.8,37.9,0.14,0.17,-0.01,0.11,...,0.09,-12.7,618.2,46.7,0.42,0.21,-0.11,-0.08,0.25,0.11
4,2025,Big 12,Arizona,32.8,30.8,63.6,0.42,0.19,-0.06,0.18,...,0.16,11.1,-25.0,8.3,0.22,0.08,-0.21,0.06,-0.01,0.02


In [9]:
year = 2025
df_sp = pd.read_csv(f"../Data/Raw/{year}/ratings/sp.csv")

# Flatten both offense and defense columns
df_sp = flatten_nested_column(df_sp, "offense", prefix="offense")
df_sp = flatten_nested_column(df_sp, "defense", prefix="defense")
df_sp = flatten_nested_column(df_sp, "specialTeams", prefix="specialTeams")

df_sp.to_csv(f"../Data/Processed/{year}/ratings/sp_team_cleaned.csv", index=False)
print("Cleaned SP data saved")
df_ppa.head()

Cleaned SP data saved


Unnamed: 0,season,conference,team,offense_cumulative_rushing,offense_cumulative_passing,offense_cumulative_total,offense_thirdDown,offense_secondDown,offense_firstDown,offense_rushing,...,offense_overall,defense_cumulative_rushing,defense_cumulative_passing,defense_cumulative_total,defense_thirdDown,defense_secondDown,defense_firstDown,defense_rushing,defense_passing,defense_overall
0,2025,Mountain West,Air Force,97.2,69.95,167.1,0.74,0.42,0.22,0.32,...,0.41,53.5,1261.1,179.6,1.0,0.56,0.23,0.31,0.67,0.5
1,2025,Mid-American,Akron,-2.8,29.05,20.0,0.25,0.01,-0.08,-0.01,...,0.04,46.2,700.2,116.3,0.57,0.43,0.05,0.19,0.3,0.24
2,2025,SEC,Alabama,12.2,124.91,137.2,0.76,0.22,0.15,0.06,...,0.33,25.9,207.6,46.0,0.49,0.23,-0.12,0.14,0.15,0.14
3,2025,Sun Belt,App State,20.9,18.8,37.9,0.14,0.17,-0.01,0.11,...,0.09,-12.7,618.2,46.7,0.42,0.21,-0.11,-0.08,0.25,0.11
4,2025,Big 12,Arizona,32.8,30.8,63.6,0.42,0.19,-0.06,0.18,...,0.16,11.1,-25.0,8.3,0.22,0.08,-0.21,0.06,-0.01,0.02


In [15]:
year = 2025
df_records = pd.read_csv(f"../Data/Raw/{year}/games/records.csv")
df_records = df_records[df_records["classification"] == "fbs"]

# Flatten both offense and defense columns
df_records = flatten_nested_column(df_records, "total", prefix="total")
df_records = flatten_nested_column(df_records, "conferenceGames", prefix="conference")
df_records = flatten_nested_column(df_records, "homeGames", prefix="home")
df_records = flatten_nested_column(df_records, "awayGames", prefix="away")
df_records = flatten_nested_column(df_records, "neutralSiteGames", prefix="neutralSite")
df_records = flatten_nested_column(df_records, "regularSeason", prefix="regularSeason")
df_records = flatten_nested_column(df_records, "postseason", prefix="postseason")

df_records.to_csv(f"../Data/Processed/{year}/games/records.csv", index=False)
print("Cleaned Records data saved")
df_records.head()

Cleaned Records data saved


Unnamed: 0,year,teamId,team,classification,conference,division,expectedWins,total_games,total_wins,total_losses,...,neutralSite_losses,neutralSite_ties,regularSeason_games,regularSeason_wins,regularSeason_losses,regularSeason_ties,postseason_games,postseason_wins,postseason_losses,postseason_ties
5,2025,252,BYU,fbs,Big 12,,5.470165,6,6,0,...,0,0,6,6,0,0,0,0,0,0
6,2025,275,Wisconsin,fbs,Big Ten,,2.255898,6,2,4,...,0,0,6,2,4,0,0,0,0,0
7,2025,2433,UL Monroe,fbs,Sun Belt,West,3.066166,6,3,3,...,0,0,6,3,3,0,0,0,0,0
16,2025,48,Delaware,fbs,Conference USA,,2.28879,5,3,2,...,0,0,5,3,2,0,0,0,0,0
21,2025,324,Coastal Carolina,fbs,Sun Belt,East,2.849074,6,3,3,...,0,0,6,3,3,0,0,0,0,0


In [17]:
year = 2025
df_playerUsage = pd.read_csv(f"../Data/Raw/{year}/recruiting/player_usage.csv")

# Flatten both offense and defense columns
df_playerUsage = flatten_nested_column(df_playerUsage, "usage", prefix="usage")

df_playerUsage.to_csv(f"../Data/Processed/{year}/recruiting/player_usage.csv", index=False)
print("Cleaned Player Usage data saved")
df_playerUsage.head()

Cleaned Player Usage data saved


Unnamed: 0,season,id,name,position,team,conference,usage_passingDowns,usage_standardDowns,usage_thirdDown,usage_secondDown,usage_firstDown,usage_rush,usage_pass,usage_overall
0,2025,550577,Jordan Brown,WR,Fresno State,MWC,0.059,0.027,0.063,0.027,0.035,0.0,0.071,0.037
1,2025,3116099,Monte Harrison,WR,Arkansas,SEC,0.0,0.02,0.0,0.0,0.028,0.0,0.026,0.013
2,2025,3149911,Jordan Jackson,WR,Arkansas-Pine Bluff,,0.091,0.05,0.076,0.039,0.083,0.0,0.12,0.063
3,2025,3949586,Jake Taylor,TE,Duke,ACC,0.0,0.043,0.091,0.0,0.03,0.0,0.063,0.03
4,2025,4241676,Kordell Rodgers,TE,Texas Southern,,0.055,0.023,0.106,0.024,0.009,0.0,0.089,0.032


In [18]:
year = 2025
df_recruits = pd.read_csv(f"../Data/Raw/{year}/recruiting/recruits.csv")

# Flatten both offense and defense columns
df_recruits = flatten_nested_column(df_recruits, "hometownInfo", prefix="hometownInfo")

df_recruits.to_csv(f"../Data/Processed/{year}/recruiting/recruits.csv", index=False)
print("Cleaned Player Usage data saved")
df_recruits.head()

Cleaned Player Usage data saved


Unnamed: 0,id,athleteId,recruitType,year,ranking,name,school,committedTo,position,height,weight,stars,rating,city,stateProvince,country,hometownInfo_fipsCode,hometownInfo_longitude,hometownInfo_latitude
0,106192,5141741.0,HighSchool,2025,1,Bryce Underwood,Belleville,Michigan,QB,75.5,205,5,0.9998,Belleville,MI,USA,17163,-83.485211,42.204841
1,106193,5141629.0,HighSchool,2025,2,Keelon Russell,Duncanville,Alabama,QB,75.0,175,5,0.9997,Duncanville,TX,USA,48113,-96.908337,32.6518
2,106194,5141464.0,HighSchool,2025,3,Elijah Griffin,Savannah Christian Prep,Georgia,DL,77.0,300,5,0.9992,Savannah,GA,USA,13051,-81.091177,32.080926
3,106195,5141586.0,HighSchool,2025,4,Dakorien Moore,Duncanville,Oregon,WR,71.0,182,5,0.9981,Duncanville,TX,USA,48113,-96.908337,32.6518
4,106196,5141430.0,HighSchool,2025,5,Michael Fasusi,Lewisville,Oklahoma,OT,77.0,295,5,0.9977,Lewisville,TX,USA,48121,-96.994174,33.046233
