# Olympic Medal Analysis

This Jupyter notebook performs a comprehensive analysis of the all-time Olympic medals dataset, leveraging R in a Jupyter environment.

It covers:
- Fetching the dataset from GitHub
- Aggregating medal counts by NOC
- Merging with population & GDP data from the World Bank
- Exploratory visualizations
- Correlation analysis & heatmap
- Regression modeling


In [None]:
# Install & load necessary R packages
if (!requireNamespace("tidyverse", quietly=TRUE)) install.packages("tidyverse")
if (!requireNamespace("WDI", quiet=TRUE)) install.packages("WDI")
if (!requireNamespace("countrycode", quiet=TRUE)) install.packages("countrycode")
if (!requireNamespace("maps", quiet=TRUE)) install.packages("maps")
if (!requireNamespace("viridis", quiet=TRUE)) install.packages("viridis")
if (!requireNamespace("corrplot", quiet=TRUE)) install.packages("corrplot")

library(tidyverse)
library(WDI)
library(countrycode)
library(maps)
library(viridis)
library(corrplot)


In [None]:
# Fetch the TidyTuesday Olympics dataset
olympics <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-07-27/olympics.csv")

# Aggregate all-time medal counts by NOC
medals <- olympics %>%
  filter(!is.na(noc)) %>%
  group_by(noc) %>%
  summarise(
    gold   = sum(medal == "Gold", na.rm=TRUE),
    silver = sum(medal == "Silver", na.rm=TRUE),
    bronze = sum(medal == "Bronze", na.rm=TRUE),
    total  = gold + silver + bronze
  ) %>%
  ungroup()

head(medals)

In [None]:
# Fetch 2016 population & GDP per capita
wb_ind <- c(pop="SP.POP.TOTL", gdp_pc="NY.GDP.PCAP.KD")
wb_data <- WDI(country=unique(medals$noc), indicator=wb_ind, start=2016, end=2016) %>%
  select(iso2c, population=pop, gdp_pc) %>%
  mutate(noc = countrycode(iso2c, "iso2c", "iso3c")) %>%
  select(noc, population, gdp_pc)

# Merge with medals
medals <- left_join(medals, wb_data, by="noc")
head(medals)