In [None]:
import pandas as pd
import numpy as np

In [None]:
def fix_name(name):
    if name in name_fixes:
        return name_fixes[name]
    else:
        return name

name_fixes = {
    "Corey Taylor": "Corey Blackett-Taylor",
    "Craig Carl Curran": "Craig Curran",
    "Chris Edwards": "Christian Edwards",
    "Dave Nugent": "David Nugent",
    "Dylan Mottley Henry": "Dylan Mottley-Henry",
    "Ian Thomas-Moore": "Ian Moore",
    "John-Louis Akpa Akpro": "Jean-Louis Akpa Akpro",
    "John Morrissey": "Johnny Morrissey",
    "Jonathon Margetts": "Johnny Margetts",
    "Joseph Maguire": "Joe Maguire",
    "Kaylden Brown": "Kayleden Brown",
    "Lewis Sinnot": "Lewis Sinnott",
    "Lateef Elford Alliyu": "Lateef Elford-Alliyu",
    "Michael Jackson": "Mike Jackson",
    "Richard Sutton": "Ritchie Sutton",
    "Robert Taylor": "Rob Taylor",
    "Steven O'Leary": "Stephen O'Leary"
}

In [None]:
managers_df = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/output/managers.csv", parse_dates=["date_from", "date_to"])

managers_df.tail(3)

In [None]:
managers_df.to_csv('data/managers.csv', index = False)

# Create `results_df`

In [None]:
def get_manager(game_date):
    manager_name = " & ".join(managers_df[(managers_df.date_from <= game_date) & (managers_df.date_to >= game_date)].manager_name)
    
    return manager_name

results = pd.read_csv("https://raw.githubusercontent.com/petebrown/update-results/main/data/results_df.csv", parse_dates = ["game_date"])

results["manager"] = results.game_date.apply(get_manager)

results.loc[results.goals_for > results.goals_against, "outcome"] = "W"
results.loc[results.goals_for == results.goals_against, "outcome"] = "D"
results.loc[results.goals_for < results.goals_against, "outcome"] = "L"

results.competition = results.competition.str.replace(r"lay-[oO]ffs?", "lay-Offs", regex = True)

results.head(1)

In [None]:
league_positions = pd.read_csv("https://raw.githubusercontent.com/petebrown/league-position-tool/main/docs/input/results_mini.csv", parse_dates = ["game_date"])[["game_date", "ranking", "pts"]].rename(columns = {"ranking": "league_pos"}).sort_values("game_date").reset_index(drop = True)

league_positions.head(3)

In [None]:
results = pd.merge(results, league_positions, how = "left", on = "game_date")

results.head(1)

In [None]:
cup_game_details = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/11v11-extra-details/cup_details.csv", parse_dates = ["game_date"]).rename(columns = {"extra_time": "aet"})

cup_game_details.loc[cup_game_details.game_date == "1984-05-18", "cup_leg"] = np.nan
cup_game_details.loc[cup_game_details.game_date == "1984-05-18", "cup_stage"] = cup_game_details.loc[cup_game_details.game_date == "1984-05-18"].cup_stage.str.replace(" (1st leg)", "")

cup_game_details.loc[cup_game_details.game_date.isin(["2014-10-07", "2014-12-09", "1995-11-08"]), "aet"] = 1

cup_game_details.ko_time = pd.to_datetime(cup_game_details.ko_time.str.replace(".", ":"), format = 
"%I:%M%p").dt.time
cup_game_details.cup_replay = cup_game_details.cup_replay.replace(0, np.nan)
cup_game_details.aet = cup_game_details.aet.replace(0, np.nan)

cup_game_details.head(3)

In [None]:
def outcome_decider(row):
    if row['pen_outcome'] is not np.nan:
        return "pens"
    elif row["away_goal_outcome"] is not np.nan:
        return "away_goals"
    elif row["gg_outcome"] is not np.nan:
        return "golden_goal"
    elif row["agg_outcome"] is not np.nan:
        if row["game_date"] == pd.to_datetime("1975-08-26"):
            return "replay"
        else:
            return "agg"
    else:
        return np.nan
    
def get_cup_outcome(row):
    if row["pen_outcome"] is not np.nan:
        return row["pen_outcome"]
    elif row["away_goal_outcome"] is not np.nan:
        return row["away_goal_outcome"]
    elif row["gg_outcome"] is not np.nan:
        return row["gg_outcome"]
    elif row["agg_outcome"] is not np.nan:
        return row["agg_outcome"]
    else:
        return np.nan
    
def get_outcome_desc(row):
    if row["pen_outcome"] is not np.nan:
        if row["agg_outcome"] is not np.nan:
            desc = row["agg_score"] + " on agg. " + row["pen_outcome"] + " " + row["pen_score"] + " on pens"
            desc = desc.replace("W ", "Won ").replace("L ", "Lost")
            return desc
        else:
            desc = row["pen_outcome"] + " " + row["pen_score"] + " on pens"
            desc = desc.replace("W ", "Won ").replace("L ", "Lost ")
            return desc
    elif row["away_goal_outcome"] is not np.nan:
        desc = row["away_goal_outcome"] + " on away goals rule"
        desc = desc.replace("W ", "Won ").replace("L ", "Lost ")
        return desc
    elif row["gg_outcome"] is not np.nan:
        desc = row["gg_outcome"] + " on golden goal"
        desc = desc.replace("W ", "Won ").replace("L ", "Lost ")
        return desc
    elif row["agg_outcome"] is not np.nan:
        if row["game_date"] == pd.to_datetime("1975-08-26"):
            return "Game replayed"
        else:
            desc = row["agg_outcome"] + " " + row["agg_score"] + " on agg"
            desc = desc.replace("W ", "Won ").replace("L ", "Lost ")
            return desc
    else:
        return np.nan
    
cup_game_details["decider"] = cup_game_details.apply(outcome_decider, axis = 1)    
cup_game_details["cup_outcome"] = cup_game_details.apply(get_cup_outcome, axis = 1)
cup_game_details["outcome_desc"] = cup_game_details.apply(get_outcome_desc, axis = 1)

In [None]:
def calc_game_length(row):
    if row["gg_outcome"] is not np.nan:
        return 116
    elif row["aet"] == 1 and row["gg_outcome"] not in ["W", "L"]:
        return 120
    else:
        return 90

results_df = results.rename(columns = {
    "ssn_game_no": "game_no",
})[["season", "game_date", "game_no", "opposition", "venue", "score", "outcome", "goals_for", "goals_against", "goal_diff", "game_type", "competition", "generic_comp", "ssn_comp_game_no", "league_tier", "league_pos", "pts", "attendance", "weekday", "manager"]]

results_df = pd.merge(left = results_df, right = cup_game_details, how = "left", on = "game_date")

results_df["game_length"] = results_df.apply(calc_game_length, axis = 1)

results_df.to_csv("data/results.csv", index = False)

# Create `goals_df`

In [None]:
sb_match_apps = pd.read_csv(
    "https://raw.githubusercontent.com/petebrown/update-player-stats/main/data/players_df.csv",
    parse_dates = ["game_date"]
).rename(columns = {
    "sb_game_id": "game_id",
    "sb_player_id": "player_id",
})

sb_match_apps.game_id = sb_match_apps.game_id.str.replace("tpg", "").astype(int)

sb_match_apps.player_name = sb_match_apps.player_name.apply(fix_name)

In [None]:
sb_game_ids = sb_match_apps[["game_id", "game_date", "season"]].drop_duplicates().sort_values(by = ["game_date"]).reset_index(drop = True)

sb_game_ids.head(3)

In [None]:
sb_player_ids = sb_match_apps[["player_id", "player_name"]].drop_duplicates().reset_index(drop = True)
sb_player_ids.player_name = sb_player_ids.player_name.apply(fix_name)

sb_player_ids.head(3)

In [None]:
game_nos = results[["game_date", "season", "ssn_game_no"]].rename(columns = {"ssn_game_no": "game_no"}).sort_values(["game_date"]).reset_index(drop = True)

game_nos.head(3)

### Import goal details scraped from Soccerbase

**Seasons:** 1996-97 - 2022/23

**Fields:**
- `player_name`
- `minute`
- `penalty` (0/1)
- `own_goal` (0/1)
- `game_date` _(after join)_

In [None]:
# Import Soccerbase goal details
sb_goals = pd.read_csv("https://raw.githubusercontent.com/petebrown/scrape-goals/main/data/goals.csv")

# Filter for Tranmere goals
sb_goals = sb_goals[sb_goals["goal_type"] == "for"]

# Fix player names
sb_goals.player_name = sb_goals.player_name.apply(fix_name)

# Add game dates via a join
sb_goals = sb_goals.merge(sb_game_ids, on = "game_id", how = "left").sort_values(["game_date", "minute"])

# Reduce to subset of columns
sb_goals = sb_goals[["game_date", "player_name", "minute", "penalty", "own_goal"]]

sb_goals = sb_goals.rename(columns = {"minute": "goal_min"})

sb_goals.head(3)

### Import goals from Complete Record

**Seasons:** 1921/22 - 1995/96 (after filter)

**Fields:**

* `game_date`
* `player_name`
* `goals_scored` (no. of goals scored by player in game)

In [None]:
# Import Complete Record goal details and convert to a dictionary
all_goals = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/output/scorers-long.csv").merge(game_nos, how = "left", on = ["season", "game_no"])[["season", "game_date", "player_name", "goals_scored"]].to_dict("records")

# Initiate empty list for goals
cr_goals = []

# Loop through each goal and one for every goal in goals_scored column
for goal in all_goals:
    n_goals = goal["goals_scored"]

    for i in range(n_goals):
        cr_goals.append(goal)

# Convert to Pandas dataframe
cr_goals = pd.DataFrame(cr_goals).drop("goals_scored", axis = 1)

# Add own_goal column to match sb_goals
cr_goals.loc[cr_goals.player_name == "OG", "own_goal"] = 1
cr_goals.loc[cr_goals.player_name != "OG", "own_goal"] = 0
cr_goals.own_goal = cr_goals.own_goal.astype(int)

# Filter for seasons covered by Soccerbase data, i.e. before 1996/97
cr_goals = cr_goals[cr_goals.season < "1996"]

# Drop the season column
cr_goals = cr_goals.drop("season", axis = 1)

cr_goals.head(3)

### Import manually collected goal minutes

In [None]:
# Import manually collected goal minute data
cr_goal_mins = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/seasons/manual/goal_mins.csv", parse_dates = ["game_date"]).sort_values(["game_date", "goal_min"])

# Amend penalty column to 0/1
cr_goal_mins.penalty = cr_goal_mins.penalty.apply(lambda x: 1 if x == "pen" else 0)

# Add goal numbers for each player in each game, e.g. 1-3 for a hat-trick
cr_goal_mins["pl_goal_no"] = cr_goal_mins.groupby(["player_name", "game_date"]).cumcount() + 1

cr_goal_mins.head(3)

### Join manually collected goal minutes to Complete Record goal data

In [None]:
# For join: add goal numbers for each player in each game, e.g. 1-3 for a hat-trick
cr_goals["pl_goal_no"] = cr_goals.groupby(["player_name", "game_date"]).cumcount() + 1

# Merge goal minutes with Complete Record goal data and drop temporary pl_goal_no column
cr_goals = cr_goals.merge(cr_goal_mins, how = "left", on = ["game_date", "player_name", "pl_goal_no"]).drop(columns = ["pl_goal_no"])

cr_goals.head(3)

### Import FA Trophy goals

**Fields:**

* `game_date`
* `player_name`
* `minute`
* `penalty` (0/1)
* `own_goal` (0/1)

In [None]:
fa_trophy_goals = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/fa-trophy/fa_trophy_goals.csv", parse_dates = ["game_date"]).rename(columns = {"minute": "goal_min"})

fa_trophy_goals.head(3)

In [None]:
# Concatenate Complete Record and Soccerbase goal dataframes
goals_df = pd.concat([cr_goals, sb_goals, fa_trophy_goals], axis = 0)

goals_df = goals_df.sort_values(by = ["game_date", "goal_min"])[["game_date", "player_name", "goal_min", "penalty", "own_goal"]]

goals_df.head(3)

#### Save goals to CSV

In [None]:
goals_df.to_csv('data/goals.csv', index=False)

# Create `sub_mins_df`

In [None]:
season_dates = results[["game_date", "season"]].drop_duplicates().sort_values("game_date").reset_index(drop=True)

season_dates.head(3)

In [None]:
game_nos = season_dates.copy()
game_nos["game_no"] = game_nos.sort_values("game_date").groupby("season").cumcount() + 1

game_nos.head(3)

In [None]:
sb_game_ids.head(3)

In [None]:
sb_player_ids.head(3)

In [None]:
# Import Soccerbase subs and red card CSV
sb_sub_mins = pd.read_csv("https://raw.githubusercontent.com/petebrown/scrape-events/main/data/subs-and-reds.csv").merge(sb_game_ids, how = "left", on = "game_id") \
    .merge(sb_player_ids, how = "left", on = "player_id") \
    .sort_values("game_date") \
    [["game_date", "player_name", "min_on", "min_off"]].reset_index(drop = True)

# Import manual fixes to Soccerbase sub data
sb_sub_min_fixes = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/seasons/manual/sb_sub_fixes.csv", parse_dates = ["game_date"])[["game_date", "player_name", "min_on", "min_off"]]

 # Fix known error with Luke Norris sub v. Barrow 
sb_sub_mins.loc[(sb_sub_mins.game_date == "2023-08-05") & (sb_sub_mins.player_name == "Luke Norris"), 'min_off'] = np.nan

# Filter out sendings off
sb_sub_mins = sb_sub_mins[~((sb_sub_mins.min_on.isna()) & (sb_sub_mins.min_off.isna()))]

# Filter out records for dates with manual fixes
sb_sub_mins = sb_sub_mins[~sb_sub_mins.game_date.isin(sb_sub_min_fixes.game_date.unique())]

# Append manual fixes to Soccerbase sub data
sb_sub_mins = pd.concat([sb_sub_mins, sb_sub_min_fixes]).sort_values("game_date").reset_index(drop = True)

sb_sub_mins.head(3)

In [None]:
cr_sub_mins = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/output/cr_subs_and_reds.csv", parse_dates = ["game_date"], na_values = ["0"])[["game_date", "player_name", "min_off", "min_on"]]

cr_sub_mins.head(3)

In [None]:
fa_trophy_sub_mins = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/fa-trophy/fa_trophy_fixtures.csv", parse_dates = ["game_date"])[["game_date", "player_name", "min_off", "min_on"]].query("min_off > 0 or min_on > 0")

fa_trophy_sub_mins.head(3)

In [None]:
sub_mins_df = pd.concat([cr_sub_mins, sb_sub_mins, fa_trophy_sub_mins]).sort_values("game_date").reset_index(drop = True)

sub_mins_df.head(3)

# Create `shirt_nos_df`

In [None]:
sub_mins_df.to_csv('data/sub_mins.csv', index = False)

In [None]:
shirt_nos_df = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/squad_nos/squad_nos.csv")

shirt_nos_df.head(3)

# Create `red_cards_df`

In [None]:
cr_red_cards = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/seasons/manual/cards_red.csv", parse_dates = ["game_date"])

cr_red_cards.head(3)

In [None]:
sb_red_cards = pd.read_csv("https://raw.githubusercontent.com/petebrown/scrape-events/main/data/subs-and-reds.csv").merge(sb_game_ids, how = "left", on = "game_id").merge(sb_player_ids, how = "left", on = "player_id")[["game_date", "player_name", "min_so"]].sort_values("game_date")

sb_red_cards = sb_red_cards[~sb_red_cards.min_so.isna()].reset_index(drop = True)

sb_red_cards.head(3)

In [None]:
fa_trophy_reds = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/fa-trophy/fa_trophy_cards.csv", parse_dates = ["game_date"])[["game_date", "player_name", "red_card", "min_so"]].query("red_card == 1").drop(columns = "red_card").reset_index(drop = True)

fa_trophy_reds.head(3)

In [None]:
red_cards_df = pd.concat([cr_red_cards, sb_red_cards, fa_trophy_reds], axis = 0).sort_values(["game_date", "min_so"]).reset_index(drop = True)

red_cards_df.head(3)

#### Save red_cards to CSV

In [None]:
red_cards_df.to_csv('data/red_cards.csv', index = False)

# Create `yellow_cards_df`

In [None]:
cr_yellows = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/seasons/manual/cards_yellow.csv", parse_dates = ["game_date"])

cr_yellows.head(3)

In [None]:
sb_yellows = sb_match_apps[["game_date", "player_name", "yellow_cards"]].query("yellow_cards > 0").sort_values(by = "game_date")

sb_yellows.head(3)

In [None]:
fa_trophy_yellows = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/fa-trophy/fa_trophy_cards.csv", parse_dates = ["game_date"])[["game_date", "player_name", "yellow_card"]].query("yellow_card == 1").rename(columns = {"yellow_card": "yellow_cards"})

fa_trophy_yellows.head(3)

In [None]:
yellow_cards_df = pd.concat([cr_yellows, sb_yellows, fa_trophy_yellows], axis = 0).sort_values(by = "game_date").drop(columns=["yellow_cards"]).reset_index(drop = True)

yellow_cards_df.head(3)

#### Save yellow_cards to CSV

In [None]:
yellow_cards_df.to_csv('data/yellow_cards.csv', index = False)

# Create `player_apps_df`

In [None]:
pl_ssns_9798 = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/output/player_ssns_9798.csv")
pl_ssns_9798.head(3)

In [None]:
pl_ssns_9899 = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/output/player_ssns_9899.csv")
pl_ssns_9899.head(3)

In [None]:
pl_ssns = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/output/player_ssns.csv")
pl_ssns.head(3)

In [None]:
cr_pl_ssns = pd.concat([pl_ssns, pl_ssns_9798, pl_ssns_9899], axis = 0).sort_values(by = ["surname", "forename", "ssn"]).reset_index(drop = True)
cr_pl_ssns.head(3)

In [None]:
cr_pl_ssns_prepped = cr_pl_ssns[["season", "disam_name", "player_name"]].drop_duplicates().reset_index(drop = True)

cr_player_apps = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/output/apps_long.csv") \
    .merge(game_nos, how = "left", on = ["season", "game_no"]) \
    .merge(cr_pl_ssns_prepped, how = "left", left_on = ["season", "player_name"], right_on = ["season", "disam_name"]) \
    .rename(columns = {"player_name_y": "player_name"}) \
    [["game_date", "player_name", "shirt_no", "role"]]

cr_player_apps.head(3)

In [None]:
fa_trophy_player_apps = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/fa-trophy/fa_trophy_fixtures.csv", parse_dates = ["game_date"])[["game_date", "player_name", "shirt_no", "role"]]

fa_trophy_player_apps.head(3)

In [None]:
def get_squad_no(season, player_name, game_date):
    try:
        if season == "2014/15" and player_name == "Janoi Donacien" and game_date < pd.Timestamp("2015-03-07"):
            return 19
        elif season == "2014/15" and player_name == "Janoi Donacien" and game_date >= pd.Timestamp("2015-03-07"):
            return  12
        else:
            return shirt_nos_df[(shirt_nos_df.season == season) & (shirt_nos_df.player_name == player_name)].squad_no.values[0]
    except:
        return None

sb_player_apps = sb_match_apps[~sb_match_apps.season.isin(["1996/97", "1997/98", "1998/99"])][["game_date", "player_name"]].copy()

sb_player_apps["shirt_no"] = sb_match_apps.apply(lambda x: get_squad_no(x.season, x.player_name, x.game_date), axis = 1)

In [None]:
def get_role(game_date, player_name):
    subs = sub_mins_df[~sub_mins_df.min_on.isna()].drop(columns=['min_off', 'min_on'])

    subs = subs[(subs.game_date == game_date) & (subs.player_name == player_name)]

    if subs.empty:
        return "starter"
    else:
        return "sub"

sb_player_apps["role"] = sb_player_apps.apply(lambda x: get_role(x.game_date, x.player_name), axis=1)

sb_player_apps.head(3)

In [None]:
player_apps = pd.concat([cr_player_apps, sb_player_apps, fa_trophy_player_apps], axis = 0).sort_values(by = ["game_date", "role", "shirt_no"]).reset_index(drop = True)

player_apps.head(3)

#### Save player_apps to csv

In [None]:
player_apps.to_csv('data/players_apps.csv', index = False)

# Create `subs_df`

In [None]:
cr_subs = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/output/apps_long.csv").merge(game_nos, how = "left", on = ["season", "game_no"]) \
    .merge(cr_pl_ssns_prepped, how = "left", left_on = ["season", "player_name"], right_on = ["season", "disam_name"]) \
    .rename(columns = {"player_name_y": "player_name"}) \
    .sort_values(["game_date", "shirt_no"]) \
    [["game_date", "shirt_no", "player_name", "on_for", "off_for"]]

cr_subs = cr_subs[(cr_subs.on_for.notnull()) | (cr_subs.off_for.notnull())]
cr_subs.head(3)

In [None]:
sb_subs = sb_sub_mins[~sb_sub_mins.game_date.isin(cr_subs.game_date)].copy()
sb_subs.query("game_date == '2023-08-05'")

In [None]:
# Import Soccerbase subs and red card CSV
sb_subs = sb_sub_mins[~sb_sub_mins.game_date.isin(cr_subs.game_date)].copy()

# Get lower number from min_on and min_off
sb_subs['sub_min'] = sb_subs[['min_on', 'min_off']].min(axis = 1)

sb_subs_on = sb_subs[sb_subs.min_on.notnull()]
sb_subs_off = sb_subs[sb_subs.min_off.notnull()]

sb_subs_on.head(3)

In [None]:
sb_subs_on_unique = sb_subs_on[["game_date", "min_on"]].copy()

sb_subs_on_unique = sb_subs_on_unique.groupby([sb_subs_on_unique.game_date, sb_subs_on_unique.min_on]).size().reset_index(name='count').query('count == 1').drop(columns = {"count"})

sb_subs_on_unique = sb_subs_on.merge(sb_subs_on_unique, on = ["game_date", "min_on"], how = "inner") \
    .merge(player_apps, left_on = ["game_date", "player_name"], right_on = ["game_date", "player_name"], how = "inner") \
    [["game_date", "shirt_no", "player_name", "min_on"]]

sb_subs_on_unique.head(3)

In [None]:
sb_subs_off_unique = sb_subs_off[["game_date", "min_off"]].copy()

sb_subs_off_unique = sb_subs_off_unique.groupby([sb_subs_off_unique.game_date, sb_subs_off_unique.min_off]).size().reset_index(name='count').query('count == 1').drop(columns = {"count"})

sb_subs_off_unique = sb_subs_off.merge(sb_subs_off_unique, on = ["game_date", "min_off"], how = "inner") \
    .merge(player_apps, left_on = ["game_date", "player_name"], right_on = ["game_date", "player_name"], how = "inner") \
    [["game_date", "shirt_no", "player_name", "min_off"]]

sb_subs_off_unique.tail(10)

In [None]:
sb_subs_1 = pd.merge(left = sb_subs_on_unique, right = sb_subs_off_unique, how = "inner", left_on = ["game_date", "min_on"], right_on = ["game_date", "min_off"]) \
.rename(
    columns = {
        "shirt_no_x": "shirt_no",
        "player_name_x": "player_name",
        "shirt_no_y": "on_for",
    }
)[["game_date", "shirt_no", "player_name", "min_on", "on_for"]]

sb_subs_1.head(3)

In [None]:
sb_subs_2 = pd.merge(left = sb_subs_on_unique, right = sb_subs_off_unique, how = "inner", left_on = ["game_date", "min_on"], right_on = ["game_date", "min_off"]) \
    .rename(
        columns = {
            "shirt_no_y": "shirt_no",
            "player_name_y": "player_name",
            "shirt_no_x": "off_for",
        }
    )[["game_date", "shirt_no", "player_name", "min_off", "off_for"]]

sb_subs_2.head(3)

In [None]:
sb_subs = pd.concat([sb_subs_1, sb_subs_2], axis = 0).sort_values("game_date").reset_index(drop = True)[["game_date", "shirt_no", "player_name", "on_for", "off_for"]]

sb_subs.head(3)

In [None]:
# Import manual sub fixes
sb_sub_fixes = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/seasons/manual/sb_sub_fixes.csv", parse_dates = ["game_date"])[["game_date", "shirt_no", "player_name", "on_for", "off_for"]].sort_values(["game_date"])

sb_sub_fixes.head(3)

In [None]:
fa_trophy_subs = pd.read_csv("https://raw.githubusercontent.com/petebrown/complete-record/main/fa-trophy/fa_trophy_fixtures.csv", parse_dates = ["game_date"])[["game_date", "shirt_no", "player_name", "on_for", "off_for"]].query("on_for > 0 or off_for > 0")

fa_trophy_subs.head(3)

In [None]:
subs_df = pd.concat([cr_subs, sb_subs, fa_trophy_subs], axis = 0).query("~game_date.isin(@sb_sub_fixes.game_date)")

subs_df = pd.concat([subs_df, sb_sub_fixes], axis = 0).sort_values("game_date").reset_index(drop = True)[["game_date", "shirt_no", "player_name", "on_for", "off_for"]]

subs_df.head(3)

#### Save subs_df to CSV

In [None]:
subs_df.to_csv("data/subs.csv", index = False)

In [None]:
game_lengths = results_df[["game_date", "game_length"]]

game_lengths.head(3)

In [None]:
def calc_mins_played(row):
    role = row["role"]
    min_on = row["min_on"]
    min_off = row["min_off"]
    min_so = row["min_so"]
    game_length = row["game_length"]
    
    # Started, played to end
    if role == "starter" and np.isnan(min_off) and np.isnan(min_so):
        return game_length
    # Started, sent off
    elif role == "starter" and not np.isnan(min_so):
        return min_so
    # Started, subbed off
    elif role == "starter" and not np.isnan(min_off):
        return min_off
    # Subbed on, played to end
    elif role == "sub" and np.isnan(min_off):
        return game_length - min_on
    # Subbed on, sent off
    elif role == "sub" and not np.isnan(min_so):
        return min_so - min_on
    # Subbed on, subbed off
    elif role == "sub" and not np.isnan(min_off):
        return min_off - min_on
    else:
        return "OTHER"
    

player_events = player_apps \
    .merge(sub_mins_df, how = "left", on = ["game_date", "player_name"]) \
    .merge(red_cards_df, how = "left", on = ["game_date", "player_name"]) \
    .merge(game_lengths, how = "left", on = ["game_date"])

player_events["mins_played"] = player_events.apply(calc_mins_played, axis = 1)

player_events.head()

In [None]:
player_events[(player_events.role == "sub") & (np.isnan(player_events.min_on))].tail(10)