## Event Links Notebook

This notebook uses the events.csv file to generate a list of links to the event results pages, and then saves these to a csv file. 

### Library Imports

In [12]:
library(tidyverse)
library(sjmisc)
library(stringr)
library(glue)
library(purrr)

### Generating Links

In [13]:
# Read in events.csv
events_df <- read_csv(file = "events.csv", show_col_types = FALSE) 
events_df

[1m[22mNew names:
[36m•[39m `` -> `...1`


...1,Event,Link,Date,Participants,Location,City
<dbl>,<chr>,<chr>,<chr>,<dbl>,<chr>,<chr>
1,Tauranga Marathon,https://taurangamarathon.nz,18/09/2022,1570,Tauranga,Tauranga
2,Devonport Half Marathon,https://devonporthalfmarathon.co.nz,2/10/2022,1268,Devonport,Auckland
3,Run Orewa,https://runorewa.nz,16/10/2022,834,Orewa,Auckland
4,Corporate Challenge Wellington,https://corporatechallenge.co.nz,11/11/2020,393,Wellington,Wellington
5,Corporate Challenge Christchurch,https://corporatechallenge.co.nz,16/02/2022,767,Christchurch,Christchurch
6,Corporate Challenge Auckland,https://corporatechallenge.co.nz,18/11/2020,938,Auckland,Auckland
7,Run The Point,https://runthepoint.nz,20/02/2022,771,Hobsonville,Auckland
8,Omaha Half Marathon,https://omahahalfmarathon.co.nz,27/03/2022,1689,Omaha,Auckland
9,Run Devonport,https://rundevonport.nz,7/02/2022,768,Devonport,Auckland
10,Coatesville Half Marathon,https://coatesvillehalfmarathon.co.nz,13/02/2022,1270,Coatesville,Auckland


In [14]:
# Inspect the columns
events_df %>% glimpse()

Rows: 13
Columns: 7
$ ...1         [3m[90m<dbl>[39m[23m 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
$ Event        [3m[90m<chr>[39m[23m "Tauranga Marathon", "Devonport Half Marathon", "Run Orew…
$ Link         [3m[90m<chr>[39m[23m "https://taurangamarathon.nz", "https://devonporthalfmara…
$ Date         [3m[90m<chr>[39m[23m "18/09/2022", "2/10/2022", "16/10/2022", "11/11/2020", "1…
$ Participants [3m[90m<dbl>[39m[23m 1570, 1268, 834, 393, 767, 938, 771, 1689, 768, 1270, 135…
$ Location     [3m[90m<chr>[39m[23m "Tauranga", "Devonport", "Orewa", "Wellington", "Christch…
$ City         [3m[90m<chr>[39m[23m "Tauranga", "Auckland", "Auckland", "Wellington", "Christ…


In [15]:
# Rename first column
events_df <- events_df %>% rename("Event_Id" = ...1)

In [16]:
 # List for holding links - no event will have more than 2
links_list <- vector("list", length = length(2*nrow(events_df)))

# List for holding corresponding event id
event_id_list <- vector("list", length = length(2*nrow(events_df)))

# Iterate over each event and generate the link(s) for the races (append the links to the list)
for (i in 1:nrow(events_df)) {
    
    base_url <- "https://results.runningevents.co.nz/list"
    
    # Extract row values in to separate variables to make the code tidier
    event_id <- events_df[i, "Event_Id"]
    event <- events_df[i, "Event"]
    date <- events_df[i, "Date"]
    location <- events_df[i, "Location"]
    
    # Events with "Run" in their name have 15km and 10km races
    if (str_contains(event, "Run")) {
        event_name <- tolower(str_replace_all(event, pattern=" ", repl="")) # Lowercase and remove whitespace from name
        year <- str_sub(date, start= -4) # Year portion of date
        links_list <- append(links_list, glue("{base_url}/{event_name}/{year}/15km"))
        event_id_list <- append(event_id_list, event_id)
        links_list <- append(links_list, glue("{base_url}/{event_name}/{year}/10km"))  
        event_id_list <- append(event_id_list, event_id)
    }
    
    # Events with "Marathon" in their name have 21km and 10km races
    else if (str_contains(event, "Marathon")) {
        event_name <- tolower(location) # Lowercase location
        year <- str_sub(date, start= -4) # Year portion of date
        links_list <- append(links_list, glue("{base_url}/{event_name}/{year}/21km")) 
        event_id_list <- append(event_id_list, event_id)
        links_list <- append(links_list, glue("{base_url}/{event_name}/{year}/10km")) 
        event_id_list <- append(event_id_list, event_id)
    }
    
    # Events with "Corporate Challenge" in their name only have 5km races
    else if (str_contains(event, "Corporate Challenge")) {
        event_name <- tolower(location) # Lowercase location
        year <- str_sub(date, start= -4) # Year portion of date
        links_list <- append(links_list, glue("{base_url}/cc{event_name}/{year}/5km")) 
        event_id_list <- append(event_id_list, event_id)
    }
    
}

In [18]:
# Create dataframe from the list
event_links_df <- as.data.frame(do.call(rbind, links_list[-1])) # First element will be NULL as the list is only appended to

# Rename the first column
event_links_df  <- event_links_df  %>% rename("Link" = 1)

# Append the event id list
event_links_df <- event_links_df %>%
  add_column(Event_Id = event_id_list[-1], .before = 1)

# Unnest the Event_Id column
event_links_df <- event_links_df %>% 
  unnest(Event_Id) 

event_links_df

Event_Id,Link
<dbl>,<chr>
1,https://results.runningevents.co.nz/list/tauranga/2022/21km
1,https://results.runningevents.co.nz/list/tauranga/2022/10km
2,https://results.runningevents.co.nz/list/devonport/2022/21km
2,https://results.runningevents.co.nz/list/devonport/2022/10km
3,https://results.runningevents.co.nz/list/runorewa/2022/15km
3,https://results.runningevents.co.nz/list/runorewa/2022/10km
4,https://results.runningevents.co.nz/list/ccwellington/2020/5km
5,https://results.runningevents.co.nz/list/ccchristchurch/2022/5km
6,https://results.runningevents.co.nz/list/ccauckland/2020/5km
7,https://results.runningevents.co.nz/list/runthepoint/2022/15km


In [19]:
# Save as CSV file 
#write.csv(event_links_df, "event_links.csv")