---
title: Data extraction
description: We proceed to extract dataset using WorldFootballR library from Fbref and ...
---

In [22]:
if (!require(worldfootballR)) { 
    install.packages("worldfootballR")
    library(worldfootballR)
}

if (!require(readr)) {
  install.packages("readr")
  library(readr)
}

### Collecting match results

In [23]:
country <- c("ENG", "ESP", "ITA", "GER", "FRA")
year <- c(2018, 2019, 2020, 2021, 2022)
match_result <- fb_match_results(country = country, gender = "M", season_end_year = year, tier = "1st")

In [24]:
columns_to_keep <- c('Competition_Name', 'Country', 'Season_End_Year', 'Date', 'Home', 'HomeGoals', 'Away', 'AwayGoals')
match_result <- match_result[, columns_to_keep]
# Rename columns
colnames(match_result) <- c('League', 'Country', 'Season', 'Date', 'Home', 'HomeGoals', 'Away', 'AwayGoals')
head(match_result)

Unnamed: 0_level_0,League,Country,Season,Date,Home,HomeGoals,Away,AwayGoals
Unnamed: 0_level_1,<chr>,<chr>,<int>,<date>,<chr>,<dbl>,<chr>,<dbl>
1,Premier League,ENG,2018,2017-08-11,Arsenal,4,Leicester City,3
2,Premier League,ENG,2018,2017-08-12,Watford,3,Liverpool,3
3,Premier League,ENG,2018,2017-08-12,Crystal Palace,0,Huddersfield,3
4,Premier League,ENG,2018,2017-08-12,West Brom,1,Bournemouth,0
5,Premier League,ENG,2018,2017-08-12,Chelsea,2,Burnley,3
6,Premier League,ENG,2018,2017-08-12,Everton,1,Stoke City,0


In [26]:
# Saving the data
write_csv(match_result, "data/extracted_head_coach.csv")

### Collecting head coach data

In [27]:
country <- c("England", "Spain", "Italy", "Germany", "France")

teams_url <- c()

for (i in seq_along(country)) {
    league_team_url <- tm_league_team_urls(country_name = country[i], start_year = 2018)
    teams_url <- c(teams_url, league_team_url)
}

In [28]:
head_coach <- tm_team_staff_history(team_urls = teams_url, staff_role = "Manager")
unique(head_coach$league)

In [29]:
columns_to_keep <- c('team_name', 'league', 'country', 'staff_name', 'appointed', 'end_date', 'days_in_post', 'matches', 'wins', 'draws', 'losses')
head_coach <- head_coach[, columns_to_keep]

# Rename columns
colnames(head_coach) <- c('Team', 'League', 'Country', 'HeadCoach', 'Appointed', 'EndDate', 'Tenure', 'Matches', 'Wins', 'Draws', 'Losses')

# Filter head_coach records for leagues of interests
league_of_interests <- c('Premier League', 'LaLiga', 'Ligue 1', 'Bundesliga', 'Serie A')
head_coach <- head_coach[head_coach$League %in% league_of_interests,]

head(head_coach)

Unnamed: 0_level_0,Team,League,Country,HeadCoach,Appointed,EndDate,Tenure,Matches,Wins,Draws,Losses
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<date>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Manchester City,Premier League,England,Pep Guardiola,2016-07-01,,2838,461,341,56,64
2,Manchester City,Premier League,England,Manuel Pellegrini,2013-07-01,2016-06-30,1095,166,101,27,38
3,Manchester City,Premier League,England,Roberto Mancini,2009-12-19,2013-05-13,1241,191,113,38,40
4,Manchester City,Premier League,England,Mark Hughes,2008-06-04,2009-12-19,563,77,37,15,25
5,Manchester City,Premier League,England,Sven-G<U+00F6>ran Eriksson,2007-07-06,2008-06-02,332,45,19,11,15
6,Manchester City,Premier League,England,Stuart Pearce,2005-03-11,2007-05-14,794,97,34,19,44


In [31]:
# Saving the data
write_csv(head_coach, "data/extracted_head_coach.csv")