In [2]:
library(tidyverse)

results_df <- read_csv("https://raw.githubusercontent.com/petebrown/update-results/main/data/results_df.csv", show_col_types = FALSE)

read_ssn <- function(year) {
    ssn_start <- year
    ssn_end <- substr(as.character(year + 1), 3, 4)

    file_path = paste0("./csv/", ssn_start, "-", ssn_end, ".csv")
    read_csv(file_path, show_col_types = FALSE) %>%
    mutate(
        season = paste0(ssn_start, "/", ssn_end),
        game_no = row_number()
    ) %>%
    select(-starts_with("..."))
}

s21_22 <- read_ssn(1921)
s45_46 <- read_ssn(1945)
s53_54 <- read_ssn(1953)
s68_69 <- read_ssn(1968)
s85_86 <- read_ssn(1985)
s86_87 <- read_ssn(1986)
s87_88 <- read_ssn(1987)
s88_89 <- read_ssn(1988)
s89_90 <- read_ssn(1989)
s90_91 <- read_ssn(1990)
s91_92 <- read_ssn(1991)
s92_93 <- read_ssn(1992)
s93_94 <- read_ssn(1993)
s94_95 <- read_ssn(1994)
s95_96 <- read_ssn(1995)
s96_97 <- read_ssn(1996)
s97_98 <- read_ssn(1997)
s98_99 <- read_ssn(1998)

In [3]:
df_to_long <- function(ssn_df) {
    results_df %>%
    select(
        season,
        ssn_game_no,
        game_date,
        competition,
        venue,
        opposition,
        outcome,
        score,
        attendance
    ) %>%
    rename(
        game_no = ssn_game_no
    ) %>%
    inner_join(
        ssn_df,
        by = c("season", "game_no")
    ) %>%
    arrange(
        game_date
    ) %>%
        mutate(
        across(-game_no, as.character)
    ) %>%
    select(
        -game_date,
        -venue,
        -opposition,
        -outcome,
        -score,
        -attendance
    ) %>%
    pivot_longer(
        cols = -c("season", "competition", "game_no"),
        names_to = "player_name",
        values_to = "appearance"
    ) %>%
    filter(
        !is.na(appearance)
    ) %>%
    mutate(
        off_for = case_when(
            str_detect(appearance, "\\*") ~ "12",
            str_detect(appearance, "#") ~ "13",
            str_detect(appearance, "\\+") ~ "14",
            .default = NA_character_
        ),
        off_for = as.numeric(off_for),
        shirt_no = str_remove_all(appearance, "\\*|\\+|\\#"),
        shirt_no = as.numeric(shirt_no),
        role = ifelse(shirt_no <= 11, "starter", "sub")
    ) %>%
    arrange(
        game_no,
        shirt_no
    )
}

s21_22_long <- df_to_long(s21_22)
s45_46_long <- df_to_long(s45_46)
s53_54_long <- df_to_long(s53_54)
s68_69_long <- df_to_long(s68_69)
s85_86_long <- df_to_long(s85_86)
s86_87_long <- df_to_long(s86_87)
s87_88_long <- df_to_long(s87_88)
s88_89_long <- df_to_long(s88_89)
s89_90_long <- df_to_long(s89_90)
s90_91_long <- df_to_long(s90_91)
s91_92_long <- df_to_long(s91_92)
s92_93_long <- df_to_long(s92_93)
s93_94_long <- df_to_long(s93_94)
s94_95_long <- df_to_long(s94_95)
s95_96_long <- df_to_long(s95_96)
s96_97_long <- df_to_long(s96_97)
s97_98_long <- df_to_long(s97_98)
s98_99_long <- df_to_long(s98_99)

long_ssns <- rbind(s21_22_long, s45_46_long, s86_87_long, s87_88_long, s88_89_long, s89_90_long, s90_91_long, s91_92_long, s92_93_long, s93_94_long, s94_95_long, s95_96_long, s96_97_long, s97_98_long, s98_99_long)

In [4]:
final_ssn_df <- long_ssns %>%
    left_join(
        long_ssns %>%
        filter(
            !is.na(off_for) | role == "sub"
        )  %>%
        select(
            season,
            game_no,
            shirt_no,
            off_for
        ),
    by = c(
        "season" = "season",
        "game_no" = "game_no",
        "shirt_no" = "off_for")
    ) %>%
    rename(
        on_for = shirt_no.y
    ) %>%
    select(
        season,
        game_no,
        player_name,
        shirt_no,
        role,
        on_for,
        off_for
    ) %>%
    mutate(
        shirt_no = case_when(
            .default = shirt_no,
            season == "1997/98" & game_no == 31 & player_name == "Mahon" ~ 15,
            season == "1997/98" & game_no == 31 & player_name == "Hill" ~ 16,
            season == "1997/98" & game_no == 38 & player_name == "Mellon" ~ 15,
            season == "1998/99" & game_no == 4 & player_name == "Koumas" ~ 15,
            season == "1998/99" & game_no == 9 & player_name == "Mahon" ~ 15,
            season == "1998/99" & game_no == 11 & player_name == "Jones G" ~ 15,
            season == "1998/99" & game_no == 19 & player_name == "Challinor" ~ 15,
            season == "1998/99" & game_no == 19 & player_name == "Parkinson" ~ 16,
            season == "1998/99" & game_no == 32 & player_name == "Jones L" ~ 16
        ),
        off_for = case_when(
            .default = off_for,
            season == "1997/98" & game_no == 31 & off_for == 12 ~ 15,
            season == "1997/98" & game_no == 31 & off_for == 13 ~ 16,
            season == "1997/98" & game_no == 38 & off_for == 13 ~ 15,
            season == "1998/99" & game_no == 4 & shirt_no == 7 & player_name == "Mahon" ~ 15,
            season == "1998/99" & game_no == 9 & shirt_no == 11 & player_name == "Mellon" ~ 15,
            season == "1998/99" & game_no == 11 & shirt_no == 8 & player_name == "Santos" ~ 15,
            season == "1998/99" & game_no == 19 & shirt_no == 3 & player_name == "Thompson" ~ 15,
            season == "1998/99" & game_no == 19 & shirt_no == 7 & player_name == "Koumas" ~ 16,
            season == "1998/99" & game_no == 32 & shirt_no == 10 & player_name == "Mellon" ~ 16
        ),
        on_for = case_when(
            .default = on_for,
            season == "1998/99" & game_no == 4 & shirt_no == 15 & player_name == "Koumas" ~ 15,
            season == "1998/99" & game_no == 9 & shirt_no == 15 & player_name == "Mahon" ~ 15,
            season == "1998/99" & game_no == 11 & shirt_no == 15 & player_name == "Jones G" ~ 15,
            season == "1998/99" & game_no == 19 & shirt_no == 15 & player_name == "Challinor" ~ 15,
            season == "1998/99" & game_no == 19 & shirt_no == 16 & player_name == "Parkinson" ~ 16,
            season == "1998/99" & game_no == 32 & shirt_no == 16 & player_name == "Jones L" ~ 16
        )
    )

In [5]:
write_csv(final_ssn_df, "../output/apps_long.csv", na = "")