# `gls_with_games.csv`

In [5]:
library(tidyverse)

df <- read_csv("../output/gls_with_games.csv", show_col_types = FALSE)

head(df, 5)

season,game_no,player_name,goals_scored,game_date,opposition,venue,score,goals_for,goals_against,generic_comp,game_type,league_tier,manager
<chr>,<dbl>,<chr>,<dbl>,<date>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<chr>
1921/22,1,Tom Stuart,1,1921-08-27,Crewe Alexandra,H,4-1,4,1,Football League,League,3,Bert Cooke
1921/22,1,Charles Milnes,1,1921-08-27,Crewe Alexandra,H,4-1,4,1,Football League,League,3,Bert Cooke
1921/22,1,Fred Groves,1,1921-08-27,Crewe Alexandra,H,4-1,4,1,Football League,League,3,Bert Cooke
1921/22,1,John Ford,1,1921-08-27,Crewe Alexandra,H,4-1,4,1,Football League,League,3,Bert Cooke
1921/22,2,John Prentice,1,1921-09-03,Crewe Alexandra,A,1-1,1,1,Football League,League,3,Bert Cooke


### View Goalscorers in wide form

In [2]:
df %>%
    replace(is.na(.), 0) %>%
    mutate(
        generic_comp = case_when(
            .default = generic_comp,
            generic_comp %in% c("Non-League", "Football League") ~ "League"
        ),
        generic_comp = factor(generic_comp, levels = c(
            "League",
            "FA Cup",
            "League Cup",
            "Associate Members' Cup",
            "Full Members' Cup",
            "Anglo-Italian Cup"
        ))
    ) %>%
    group_by(
        player_name,
        generic_comp
    ) %>%
    summarise(
        total_goals = sum(goals_scored),
        .groups = "drop"
    ) %>%
    pivot_wider(
        names_from = generic_comp,
        values_from = total_goals
    ) %>% rowwise(player_name) %>% mutate(Total = sum(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    arrange(
        desc(Total), desc(League), desc("FA Cup"), desc("League Cup"), desc("Associate Members' Cup"), desc("Full Members' Cup"), desc("Anglo_Italian Cup")
    ) %>%
    ungroup() %>% slice_max(
        order_by = Total,
        n = 20
    )

player_name,League,FA Cup,League Cup,Associate Members' Cup,Anglo-Italian Cup,Full Members' Cup,Total
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Ian Muir,142,14.0,6.0,18.0,,,180
John Aldridge,138,4.0,22.0,,3.0,7.0,174
OG,114,4.0,1.0,,,,119
Bunny Bell,102,11.0,,,,,113
Fred Urmson,94,13.0,,,,,107
Barry Dyson,100,,6.0,,,,106
Harold Atkinson,91,13.0,,,,,104
Keith Williams,88,8.0,1.0,,,,97
James Norwood,88,2.0,,,,,90
Ronnie Moore,78,3.0,4.0,1.0,,,86


### Sort seasonal top scorers by proportion of club's league goals they scored

In [3]:
df %>%
    filter(
        game_type == "League"
    ) %>%
    group_by(
        season,
        player_name
    ) %>%
    summarise(
        goals_scored = sum(goals_scored),
        .groups = "drop"
    ) %>%
    ungroup() %>%
    group_by(
        season
    ) %>%
    mutate(
        pc_gls = round((goals_scored / sum(goals_scored)) * 100, 1)
    ) %>%
    arrange(
        desc(pc_gls),
        season,
        player_name
    ) %>%
    group_by(season) %>%
    slice_max(
        goals_scored,
        n = 3
    ) %>%
    ungroup() %>%
    arrange(
        desc(pc_gls), desc(goals_scored), season, player_name
    ) %>%
    head(10)

season,player_name,goals_scored,pc_gls
<chr>,<chr>,<dbl>,<dbl>
2018/19,James Norwood,29,46.8
1924/25,Dixie Dean,27,45.8
1987/88,Ian Muir,27,44.3
1953/54,Cyril Done,25,42.4
1995/96,John Aldridge,27,42.2
1933/34,Bunny Bell,34,40.5
1991/92,John Aldridge,22,39.3
1975/76,Ronnie Moore,34,38.2
1984/85,John Clayton,31,37.3
1949/50,Bill Bainbridge,19,37.3


In [4]:
mans_10 <- df %>%
    group_by(manager) %>%
    summarise(
        games = n()
    ) %>%
    filter(games >= 10)

df %>%
    mutate(
        generic_comp = case_when(
            .default = generic_comp,
            generic_comp %in% c("Non-League", "Football League") ~ "League"
        ),
        generic_comp = factor(generic_comp, levels = c(
            "League",
            "FA Cup",
            "League Cup",
            "Associate Members' Cup",
            "Full Members' Cup",
            "Anglo-Italian Cup"
        ))
     ) %>%
    filter(
        generic_comp == "League",
        manager %in% mans_10$manager
    ) %>%
    group_by(
        manager,
        player_name
    ) %>%
    summarise(
        goals_scored = sum(goals_scored),
        .groups = "drop"
    ) %>%
    ungroup() %>%
    group_by(
        manager
    ) %>%
    mutate(
        pc_gls = round((goals_scored / sum(goals_scored)) * 100, 1)
    ) %>%
    arrange(
        desc(pc_gls),
        manager,
        player_name
    ) %>%
    slice_max(
        pc_gls,
        n = 1
    ) %>%
    arrange(desc(pc_gls))

manager,player_name,goals_scored,pc_gls
<chr>,<chr>,<dbl>,<dbl>
Gary Brabin,James Norwood,24,33.3
John Barnes,Ian Thomas-Moore,3,33.3
Jack Carr,Bunny Bell,33,28.9
Keith Hill,James Vaughan,11,28.9
Bill Ridding,Billy Eden,4,28.6
Rob Edwards,Abdulai Bell-Baggie,3,27.3
Rob Edwards,Cole Stockton,3,27.3
Frank Worthington,Ian Muir,29,26.6
Ian Dawes,James Vaughan,5,26.3
Peter Farrell,Keith Williams,67,25.6
