## Simplified Data Cleaning Pipeline for FBRef

In [3]:
from pathlib import Path
import sys
sys.path.append("..")

from src.preprocessing.clean_fbref_data import (
    load_fbref_season_data,
    drop_matches_column,
    find_players_in_multiple_seasons,
    add_age_from_latest_season,
    save_to_interim
)

team_name = "Valencia CF"
raw_base = Path("..", "data", "raw", team_name, "fbref")

# Load all season data
data_2223 = load_fbref_season_data("2223", raw_base)
data_2324 = load_fbref_season_data("2324", raw_base)
data_2425 = load_fbref_season_data("2425", raw_base)

# Combine all into one dict
all_data = {**data_2223, **data_2324, **data_2425}

# Clean noisy columns
all_data = drop_matches_column(all_data)

# Extract continuity tracking (optional, if needed)
season_stats = [
    (data_2223["df_player_stats_2223"], "2223"),
    (data_2324["df_player_stats_2324"], "2324"),
    (data_2425["df_player_stats_2425"], "2425"),
]
multi_season_players = find_players_in_multiple_seasons(season_stats)
multi_season_players = add_age_from_latest_season(multi_season_players, data_2425["df_player_stats_2425"])

# Save cleaned output
save_to_interim(all_data, team_name)

Saved: ../data/interim/Valencia CF/fbref/df_player_stats_2223.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_shooting_2223.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_passing_2223.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_passing_types_2223.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_gca_2223.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_defense_2223.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_possession_2223.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_stats_2324.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_shooting_2324.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_passing_2324.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_passing_types_2324.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_gca_2324.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_defense_2324.csv
Saved: ../data/interim/Valencia CF/fbref/df_player_possession_2324.csv
Saved: ../data/interim/Valencia CF/fbref