# Genre Collaboration Network (2000-2023)

Network visualization from BigQuery co-occurrence data.

In [None]:
# Load packages
library(tidyverse)
library(igraph)
library(GGally)
library(network)
library(sna)
library(intergraph)
library(patchwork)

theme_set(theme_minimal())

## Load Data

In [None]:
# Load co-occurrence network
edges_all <- read_csv(
  '../../data/sql_query_out/QUERY 2_ Genre Co-Occurrence Network (All Time).csv',
  show_col_types = FALSE
) %>%
  rename(from = genre_1, to = genre_2, weight = co_occurrence_count) %>%
  filter(from != to, weight >= 2)

cat(sprintf('%d genre connections loaded\n', nrow(edges_all)))

edges_all %>% arrange(desc(weight)) %>% head(10)

In [None]:
# Load genre mapping
genre_mapping <- read_csv(
  '../../data/sql_query_out/QUERY 5_ Genre to Main Genre Mapping (For Node Coloring).csv',
  show_col_types = FALSE
) %>%
  mutate(primary_main_genre = tolower(trimws(primary_main_genre)))

cat(sprintf('%d subgenres mapped\n', nrow(genre_mapping)))

genre_mapping %>% count(primary_main_genre, sort = TRUE) %>% head(10)

## Build Network

In [None]:
# Build node list with macro genre mapping
all_genres <- unique(c(edges_all$from, edges_all$to))

macro_mapping <- read_csv('../../data/cleaned/genre_macro_mapping.csv', show_col_types = FALSE)

nodes <- tibble(name = all_genres) %>%
  left_join(genre_mapping %>% select(sub_genre, primary_main_genre, artist_count),
            by = c('name' = 'sub_genre')) %>%
  left_join(macro_mapping, by = c('name' = 'micro_genre')) %>%
  mutate(
    primary_main_genre = ifelse(is.na(primary_main_genre), 'other', primary_main_genre),
    artist_count = ifelse(is.na(artist_count), 1, artist_count),
    macro_genre = ifelse(is.na(macro_genre), 'OTHER', macro_genre)
  )

cat(sprintf('%d nodes, %d edges\n', nrow(nodes), nrow(edges_all)))

nodes %>% arrange(desc(artist_count)) %>% select(name, macro_genre, artist_count) %>% head(15)

## Network Setup

In [ ]:
# 16 macro genre colors
macro_genre_colors <- c(
  'POP' = '#FF6B6B',
  'HIP HOP' = '#2E7D32',
  'COUNTRY' = '#FFD93D',
  'ROCK' = '#4A90D9',
  'R&B' = '#F38181',
  'ELECTRONIC' = '#AA96DA',
  'LATIN' = '#8D6E63',
  'METAL' = '#37474F',
  'JAZZ' = '#FF8C42',
  'BLUES' = '#1E88E5',
  'FOLK' = '#A5D6A7',
  'CLASSICAL' = '#CE93D8',
  'REGGAE' = '#FFEE58',
  'NEW AGE' = '#80DEEA',
  'AVANT-GARDE' = '#BCAAA4',
  'OTHER' = '#9E9E9E'
)

# Create igraph
g <- igraph::graph_from_data_frame(edges_all, directed = FALSE, vertices = nodes)

# Convert to network object
net <- intergraph::asNetwork(g)
net %v% "genre_name" <- nodes$name
net %v% "artist_count" <- nodes$artist_count
net %v% "macro_genre" <- nodes$macro_genre

# Layout
set.seed(42)
layout <- igraph::layout_with_fr(g, weights = 1/igraph::E(g)$weight, niter = 5000) * 1.5

## Hub Genres

In [None]:
# Calculate centrality
hub_metrics <- tibble(
  genre = nodes$name,
  main_genre = nodes$macro_genre,
  artists = nodes$artist_count,
  degree = igraph::degree(g),
  strength = igraph::strength(g),
  betweenness = igraph::betweenness(g, weights = 1/igraph::E(g)$weight)
) %>%
  arrange(desc(strength))

head(hub_metrics, 20)

# Bar chart
p_hubs <- ggplot(head(hub_metrics, 25), aes(x = reorder(genre, strength), y = strength, fill = main_genre)) +
  geom_col() +
  coord_flip() +
  labs(title = 'Top 25 Hub Genres', x = NULL, y = 'Collaboration Strength', fill = 'Genre') +
  scale_fill_manual(values = macro_genre_colors) +
  theme_minimal()

ggsave('../../outputs/genre_network/genre_hubs.png', p_hubs, width = 12, height = 10, dpi = 300, bg = 'white')

## Yearly Networks

In [None]:
# Create network for a given year
create_year_network <- function(year_val) {
  file_path <- sprintf('../../data/sql_query_out/QUERY 4_ Export Individual Year Networks (Example for %d).csv', year_val)
  if (!file.exists(file_path)) return(NULL)
  
  edges_yr <- read_csv(file_path, show_col_types = FALSE) %>%
    rename(from = genre_1, to = genre_2, weight = co_occurrence_count) %>%
    filter(from != to, weight >= 2)
  
  if (nrow(edges_yr) == 0) return(NULL)
  
  genres_yr <- unique(c(edges_yr$from, edges_yr$to))
  nodes_yr <- nodes %>% filter(name %in% genres_yr)
  
  g_yr <- igraph::graph_from_data_frame(edges_yr, directed = FALSE, vertices = nodes_yr)
  net_yr <- intergraph::asNetwork(g_yr)
  
  net_yr %v% "genre_name" <- nodes_yr$name
  net_yr %v% "artist_count" <- nodes_yr$artist_count
  net_yr %v% "macro_genre" <- nodes_yr$macro_genre
  
  set.seed(42)
  layout_yr <- igraph::layout_with_fr(g_yr, weights = 1/igraph::E(g_yr)$weight, niter = 1000) * 1.3
  
  ggnet2(net_yr, mode = layout_yr,
         node.color = "macro_genre", node.size = "artist_count", node.alpha = 0.9,
         edge.alpha = 0.18, edge.color = "gray30",
         label = TRUE, label.size = 3, label.alpha = 1,
         palette = macro_genre_colors, legend.position = "none") +
    scale_x_continuous(expand = expansion(mult = 0.15)) +
    scale_y_continuous(expand = expansion(mult = 0.15)) +
    labs(title = year_val) +
    theme_void() +
    theme(plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))
}

# Key years
key_years <- c(2000, 2005, 2010, 2015, 2020, 2023)
yearly_plots <- map(key_years, create_year_network)
yearly_plots <- yearly_plots[!sapply(yearly_plots, is.null)]

if (length(yearly_plots) > 0) {
  combined <- wrap_plots(yearly_plots, ncol = 3) +
    plot_annotation(title = 'Genre Network Evolution (2000-2023)')
  
  ggsave('../../outputs/genre_network/genre_network_evolution_key_years.png', combined,
         width = 28, height = 19, dpi = 300, bg = 'white')
}

## Export Data

In [None]:
# Export all years for animation
dir.create('../../outputs/genre_network/genre_snapshots_yearly', showWarnings = FALSE, recursive = TRUE)

for (yr in 2000:2023) {
  p <- create_year_network(yr)
  if (!is.null(p)) {
    ggsave(sprintf('../../outputs/genre_network/genre_snapshots_yearly/network_%d.png', yr),
           p, width = 10, height = 10, dpi = 150, bg = 'white')
    cat(sprintf('%d ', yr))
  }
}
cat('\nDone\n')