In [6]:
# Libraries
library(xml2)
library(rvest)
library(tidyverse)
library(magrittr)
library(purrr)
library(glue)
library(stringr)
library(tidytext)
library(textdata)
library(dplyr)
library(httr)
library(jsonlite)
library(data.table)

In [59]:
# Import datasets
results <- read.csv(file = 'results.csv')
events <- read.csv(file = 'events.csv')

# Only consider half marathon races
results <- results[results$Distance == 21, ]

# Join tables
events <- events %>% rename('Event_Id' = 'X') #change name of events id col name so they match in both tables for joining
results <- left_join(events, results, by = 'Event_Id')

# Convert date to time
results$Time <- as.POSIXct(results$Time, format = "%M:%S") #convert to datetime
results$Time <- as.ITime(results$Time) #convert to just time

# Boxplot of half marathon speeds per each event 
jpeg(file="boxplot_event.jpeg", res=100, width=1200, height=800) #saves result to jpeg
results %>%
ggplot(mapping = aes(x = Time, y = Event)) + # plot
geom_boxplot() +
xlab("Time") + 
ylab("Event") +
theme(plot.title = element_text(size = 18, face = "bold", hjust = 0.5), 
      axis.title.x = element_text(size = 15, face = "bold"), 
      axis.title.y = element_text(size = 15, face = "bold"),
      axis.text.x = element_text(size=13),
      axis.text.y = element_text(size=13))
dev.off()

# Histogram of half marathon times depending on gender
female_times <- results[results$Gender == 'Female',] # select just female times
female_times <- female_times$Time #select times col
male_times <- results[results$Gender == 'Male',] # select just male times
male_times <- male_times$Time #select times col

jpeg(file="hist_gender.jpeg", res=100, width=1200, height=800) #saves result to jpeg
hist(male_times, col='skyblue', border=F, breaks=100, xlab='Time (minutes)', main='Histogram of half marathon times in relation to gender.', xlim=c(60, 240), ylim=c(0, 100), xaxt='n', yaxt='n') #plots male
hist(female_times, add=T,col=scales::alpha('red',.5), border=F, breaks=100) #plots female
legend("topright", inset=.05, title="Gender", c("Female","Male"), fill=c('skyblue', col=scales::alpha('red',.5)))
axis(side=1, at=seq(60,240, 20), labels=seq(60,240, 20))
axis(side=2, at=seq(0, 100, 20), labels=seq(0, 100, 20))
dev.off()

Don't know how to automatically pick scale for object of type ITime. Defaulting to continuous.

“Removed 7 rows containing non-finite values (stat_boxplot).”
