# Genre Collaboration Network (2000-2023)

Network visualization from BigQuery co-occurrence data.

In [123]:
# Load packages
library(tidyverse)
library(igraph)
library(GGally)
library(network)
library(sna)
library(intergraph)
library(patchwork)

theme_set(theme_minimal())

## Load Data

In [124]:
# Load co-occurrence network
edges_all <- read_csv(
  '../../data/sql_query_out/QUERY 2_ Genre Co-Occurrence Network (All Time).csv',
  show_col_types = FALSE
) %>%
  rename(from = genre_1, to = genre_2, weight = co_occurrence_count) %>%
  filter(from != to, weight >= 2)

cat(sprintf('%d genre connections loaded\n', nrow(edges_all)))

edges_all %>% arrange(desc(weight)) %>% head(10)

8597 genre connections loaded


from,to,weight
<chr>,<chr>,<dbl>
hip hop,rap,287
pop rap,rap,273
dance pop,pop,268
mellow gold,soft rock,243
album rock,rock,239
pop rap,southern hip hop,237
r&b,urban contemporary,224
hip hop,pop rap,218
rap,southern hip hop,213
indie rock,modern rock,212


In [125]:
# Load genre mapping
genre_mapping <- read_csv(
  '../../data/sql_query_out/QUERY 5_ Genre to Main Genre Mapping (For Node Coloring).csv',
  show_col_types = FALSE
) %>%
  mutate(primary_main_genre = tolower(trimws(primary_main_genre)))

cat(sprintf('%d subgenres mapped\n', nrow(genre_mapping)))

genre_mapping %>% count(primary_main_genre, sort = TRUE) %>% head(10)

1562 subgenres mapped


primary_main_genre,n
<chr>,<int>
alternative metal,70
alternative rock,44
alternative dance,43
album rock,36
dance pop,29
alternative country,22
latin,22
anthem worship,19
big room,18
chamber pop,17


## Build Network

In [126]:
# Build node list with macro genre mapping
all_genres <- unique(c(edges_all$from, edges_all$to))

# Load complete mapping (regex + manual assignments)
macro_mapping <- read_csv('../../data/cleaned/genre_network_mapping.csv', show_col_types = FALSE)

nodes <- tibble(name = all_genres) %>%
  left_join(genre_mapping %>% select(sub_genre, artist_count),
            by = c('name' = 'sub_genre')) %>%
  left_join(macro_mapping, by = c('name' = 'micro_genre')) %>%
  mutate(
    artist_count = ifelse(is.na(artist_count), 1, artist_count),
    macro_genre = ifelse(is.na(macro_genre), 'OTHER', macro_genre)
  )

cat(sprintf('%d nodes, %d edges\n', nrow(nodes), nrow(edges_all)))
cat('\nMacro genre distribution:\n')
nodes %>% count(macro_genre, sort = TRUE) %>% print(n = 20)

957 nodes, 8597 edges

Macro genre distribution:
[90m# A tibble: 16 x 2[39m
   macro_genre     n
   [3m[90m<chr>[39m[23m       [3m[90m<int>[39m[23m
[90m 1[39m ROCK          239
[90m 2[39m POP           129
[90m 3[39m ELECTRONIC     93
[90m 4[39m METAL          86
[90m 5[39m HIP HOP        80
[90m 6[39m R&B            62
[90m 7[39m JAZZ           45
[90m 8[39m LATIN          43
[90m 9[39m OTHER          39
[90m10[39m CLASSICAL      36
[90m11[39m FOLK           33
[90m12[39m COUNTRY        32
[90m13[39m BLUES          17
[90m14[39m REGGAE         11
[90m15[39m NEW AGE         8
[90m16[39m AVANT-GARDE     4


## Network Setup

In [127]:
# 16 macro genre colors
macro_genre_colors <- c(
  'POP' = '#f180a6ff',
  'ELECTRONIC' = '#d31f8eff',
  'R&B' = '#FF0800',
  'HIP HOP' = '#9D2A3A',
  'REGGAE' = '#FFB627',
  'LATIN' = '#E07B00',
  'BLUES' = '#1565C0',
  'JAZZ' = '#5C9CE6',
  'ROCK' = '#2832C2',
  'METAL' = '#1F456E',
  'FOLK' = '#6B8E4E',
  'COUNTRY' = '#7bac21ff',
  'CLASSICAL' = '#7B2D8E',
  'NEW AGE' = '#B57EDC',
  'AVANT-GARDE' = '#4A235A',
  'OTHER' = '#757575'
)

# Create igraph
g <- igraph::graph_from_data_frame(edges_all, directed = FALSE, vertices = nodes)

# Convert to network object
net <- intergraph::asNetwork(g)
net %v% "genre_name" <- nodes$name
net %v% "artist_count" <- nodes$artist_count
net %v% "macro_genre" <- nodes$macro_genre

# Layout
set.seed(42)
layout <- igraph::layout_with_fr(g, weights = 1/igraph::E(g)$weight, niter = 5000) * 1.5

## Hub Genres

In [128]:
# Calculate centrality
hub_metrics <- tibble(
  genre = nodes$name,
  main_genre = nodes$macro_genre,
  artists = nodes$artist_count,
  degree = igraph::degree(g),
  strength = igraph::strength(g),
  betweenness = igraph::betweenness(g, weights = 1/igraph::E(g)$weight)
) %>%
  arrange(desc(strength))

head(hub_metrics, 20)

# Bar chart
p_hubs <- ggplot(head(hub_metrics, 25), aes(x = reorder(genre, strength), y = strength, fill = main_genre)) +
  geom_col() +
  coord_flip() +
  labs(title = 'Top 25 Hub Genres', x = NULL, y = 'Collaboration Strength', fill = 'Genre') +
  scale_fill_manual(values = macro_genre_colors) +
  theme_minimal()

ggsave('../../outputs/genre_network/genre_hubs.png', p_hubs, width = 12, height = 10, dpi = 300, bg = 'white')

genre,main_genre,artists,degree,strength,betweenness
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
rock,ROCK,234,172,4062,162032.25
folk rock,ROCK,89,105,2846,45627.0
singer-songwriter,FOLK,54,140,2740,14171.0
modern rock,ROCK,62,153,2723,59555.43
mellow gold,POP,145,96,2591,33109.0
indie rock,ROCK,69,128,2517,16454.51
folk-pop,POP,46,145,2427,28311.01
classic rock,ROCK,172,95,2390,7482.0
hip hop,HIP HOP,94,79,2330,31245.0
rap,HIP HOP,52,71,2217,6171.5


In [129]:
# Full network visualization - FR layout with center scaling and collision detection
library(ggraph)

set.seed(42)

# Select top 15% of nodes by artist count for labels
label_threshold <- quantile(nodes$artist_count, 0.75)
nodes_for_plot <- nodes %>%
  mutate(
    # Wrap text - replace spaces with newlines for multi-word genres
    label = ifelse(artist_count >= label_threshold, 
                   str_replace_all(name, " ", "\n"), 
                   NA_character_)
  )

# Create fresh graph
g_full <- igraph::graph_from_data_frame(edges_all, directed = FALSE, vertices = nodes_for_plot)

# Calculate layout first, then scale it to spread nodes
layout_fr <- create_layout(g_full, layout = 'fr', weights = 1/E(g_full)$weight, niter = 10000)

# Scale coordinates outward from center
center_x <- mean(layout_fr$x)
center_y <- mean(layout_fr$y)
layout_fr$x <- center_x + (layout_fr$x - center_x) * 3.5
layout_fr$y <- center_y + (layout_fr$y - center_y) * 3.5

# Collision detection - push overlapping nodes apart
# Node radius proportional to artist_count
min_size <- 2
max_size <- 18
layout_fr$radius <- min_size + (max_size - min_size) * 
  (layout_fr$artist_count - min(layout_fr$artist_count)) / 
  (max(layout_fr$artist_count) - min(layout_fr$artist_count))

# Iterative collision resolution
for (iter in 1:50) {
  moved <- FALSE
  for (i in 1:(nrow(layout_fr) - 1)) {
    for (j in (i + 1):nrow(layout_fr)) {
      dx <- layout_fr$x[j] - layout_fr$x[i]
      dy <- layout_fr$y[j] - layout_fr$y[i]
      dist <- sqrt(dx^2 + dy^2)
      min_dist <- (layout_fr$radius[i] + layout_fr$radius[j]) * 0.35  # scale factor for plot units
      
      if (dist < min_dist && dist > 0) {
        # Push nodes apart
        overlap <- min_dist - dist
        pushx <- (dx / dist) * overlap * 0.5
        pushy <- (dy / dist) * overlap * 0.5
        
        layout_fr$x[i] <- layout_fr$x[i] - pushx
        layout_fr$y[i] <- layout_fr$y[i] - pushy
        layout_fr$x[j] <- layout_fr$x[j] + pushx
        layout_fr$y[j] <- layout_fr$y[j] + pushy
        moved <- TRUE
      }
    }
  }
  if (!moved) break
}
cat(sprintf("Collision resolution: %d iterations\n", iter))

p_full <- ggraph(layout_fr) +
  geom_edge_link(alpha = 0.05, color = "gray60") +
  geom_node_point(aes(color = macro_genre, size = artist_count), alpha = 0.8) +
  geom_node_text(aes(label = label), size = 1.2, color = "#ffffffff", fontface = "bold", lineheight = 0.8) +
  scale_color_manual(values = macro_genre_colors, name = "Genre") +
  scale_size_continuous(range = c(2, 18), name = "Artists") +
  labs(title = "Billboard Genre Network (2000-2023)",
       subtitle = sprintf("%d nodes, %d edges | Top 15%% labeled", vcount(g_full), ecount(g_full))) +
  theme_void() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 20, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 12, color = "gray40"),
    legend.position = "right",
    plot.margin = margin(20, 20, 20, 20)
  ) +
  coord_fixed(ratio = 1)

ggsave('../../outputs/genre_network/genre_network_full.png', p_full, 
       width = 28, height = 24, dpi = 300, bg = 'white')

cat(sprintf("Saved genre_network_full.png (%d nodes, %d edges)\n", vcount(g_full), ecount(g_full)))

Collision resolution: 50 iterations


"[1m[22mRemoved 709 rows containing missing values or values outside the scale range
(`geom_text()`)."


Saved genre_network_full.png (957 nodes, 8597 edges)


## Yearly Networks

In [130]:
# Create network for a given year - FR layout with "pop" fixed at center
library(ggraph)

create_year_network_circular <- function(year_val) {
  file_path <- sprintf('../../data/sql_query_out/QUERY 4_ Export Individual Year Networks (Example for %d).csv', year_val)
  if (!file.exists(file_path)) return(NULL)
  
  edges_yr <- read_csv(file_path, show_col_types = FALSE) %>%
    rename(from = genre_1, to = genre_2, weight = co_occurrence_count) %>%
    filter(from != to, weight >= 2)
  
  if (nrow(edges_yr) == 0) return(NULL)
  
  genres_yr <- unique(c(edges_yr$from, edges_yr$to))
  nodes_yr <- nodes %>% filter(name %in% genres_yr)
  
  # Ensure "pop" is included
  if (!"pop" %in% nodes_yr$name) {
    pop_node <- nodes %>% filter(name == "pop")
    if (nrow(pop_node) > 0) {
      nodes_yr <- bind_rows(nodes_yr, pop_node)
    }
  }
  
  # Create graph with vertex attributes
  g_yr <- igraph::graph_from_data_frame(edges_yr, directed = FALSE, vertices = nodes_yr)
  
  # Set vertex attributes explicitly
  V(g_yr)$artist_count <- nodes_yr$artist_count[match(V(g_yr)$name, nodes_yr$name)]
  V(g_yr)$macro_genre <- nodes_yr$macro_genre[match(V(g_yr)$name, nodes_yr$name)]
  
  # Calculate FR layout
  set.seed(42)
  layout_mat <- igraph::layout_with_fr(g_yr, weights = 1/E(g_yr)$weight, niter = 5000)
  
  # Find "pop" and move it to center, shift all other nodes accordingly
  node_names <- V(g_yr)$name
  pop_idx <- which(node_names == "pop")
  
  if (length(pop_idx) > 0) {
    # Get pop's current position
    pop_x <- layout_mat[pop_idx, 1]
    pop_y <- layout_mat[pop_idx, 2]
    
    # Shift all nodes so pop is at origin
    layout_mat[, 1] <- layout_mat[, 1] - pop_x
    layout_mat[, 2] <- layout_mat[, 2] - pop_y
  }
  
  # Scale layout for better spread
  layout_mat <- layout_mat * 2
  
  # Create ggraph layout
  layout_df <- create_layout(g_yr, layout = 'manual', x = layout_mat[,1], y = layout_mat[,2])
  
  # Select top nodes for labels (by artist_count)
  label_threshold <- quantile(layout_df$artist_count, 0.7, na.rm = TRUE)
  layout_df$label <- ifelse(
    layout_df$artist_count >= label_threshold | layout_df$name == "pop",
    str_replace_all(layout_df$name, " ", "\n"),
    NA_character_
  )
  
  p <- ggraph(layout_df) +
    geom_edge_link(alpha = 0.1, color = "gray50") +
    geom_node_point(aes(color = macro_genre, size = artist_count), alpha = 0.85) +
    geom_node_text(aes(label = label), size = 2, color = "#333333", fontface = "bold", lineheight = 0.8) +
    scale_color_manual(values = macro_genre_colors, name = "Genre") +
    scale_size_continuous(range = c(2, 12), name = "Artists") +
    labs(title = as.character(year_val)) +
    theme_void() +
    theme(
      plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
      legend.position = "none"
    ) +
    coord_fixed(ratio = 1)
  
  return(p)
}

# Key years
key_years <- c(2000, 2005, 2010, 2015, 2020, 2023)
yearly_plots <- map(key_years, create_year_network_circular)
yearly_plots <- yearly_plots[!sapply(yearly_plots, is.null)]

if (length(yearly_plots) > 0) {
  combined <- wrap_plots(yearly_plots, ncol = 3) +
    plot_annotation(title = 'Genre Network Evolution (2000-2023)',
                    subtitle = 'Force-directed layout with POP anchored at center',
                    theme = theme(
                      plot.title = element_text(hjust = 0.5, size = 20, face = "bold"),
                      plot.subtitle = element_text(hjust = 0.5, size = 14, color = "gray40")
                    ))
  
  ggsave('../../outputs/genre_network/genre_network_evolution_key_years.png', combined,
         width = 28, height = 19, dpi = 300, bg = 'white')
  cat("Saved genre_network_evolution_key_years.png\n")
}

"[1m[22mRemoved 31 rows containing missing values or values outside the scale range
(`geom_text()`)."
"[1m[22mRemoved 31 rows containing missing values or values outside the scale range
(`geom_text()`)."
"[1m[22mRemoved 25 rows containing missing values or values outside the scale range
(`geom_text()`)."
"[1m[22mRemoved 27 rows containing missing values or values outside the scale range
(`geom_text()`)."
"[1m[22mRemoved 17 rows containing missing values or values outside the scale range
(`geom_text()`)."
"[1m[22mRemoved 12 rows containing missing values or values outside the scale range
(`geom_text()`)."


Saved genre_network_evolution_key_years.png


## Export Data

In [131]:
# Export all years for animation - circular layout with "pop" at center
dir.create('../../outputs/genre_network/genre_snapshots_yearly', showWarnings = FALSE, recursive = TRUE)

for (yr in 2000:2023) {
  p <- create_year_network_circular(yr)
  if (!is.null(p)) {
    ggsave(sprintf('../../outputs/genre_network/genre_snapshots_yearly/network_%d.png', yr),
           p, width = 12, height = 12, dpi = 150, bg = 'white')
    cat(sprintf('%d ', yr))
  }
}
cat('\nDone - Circular layout snapshots exported\n')

"[1m[22mRemoved 31 rows containing missing values or values outside the scale range
(`geom_text()`)."


2000 

"[1m[22mRemoved 35 rows containing missing values or values outside the scale range
(`geom_text()`)."


2001 

"[1m[22mRemoved 35 rows containing missing values or values outside the scale range
(`geom_text()`)."


2002 

"[1m[22mRemoved 33 rows containing missing values or values outside the scale range
(`geom_text()`)."


2003 

"[1m[22mRemoved 31 rows containing missing values or values outside the scale range
(`geom_text()`)."


2004 

"[1m[22mRemoved 31 rows containing missing values or values outside the scale range
(`geom_text()`)."


2005 

"[1m[22mRemoved 33 rows containing missing values or values outside the scale range
(`geom_text()`)."


2006 

"[1m[22mRemoved 26 rows containing missing values or values outside the scale range
(`geom_text()`)."


2007 

"[1m[22mRemoved 25 rows containing missing values or values outside the scale range
(`geom_text()`)."


2008 

"[1m[22mRemoved 28 rows containing missing values or values outside the scale range
(`geom_text()`)."


2009 

"[1m[22mRemoved 25 rows containing missing values or values outside the scale range
(`geom_text()`)."


2010 

"[1m[22mRemoved 28 rows containing missing values or values outside the scale range
(`geom_text()`)."


2011 

"[1m[22mRemoved 23 rows containing missing values or values outside the scale range
(`geom_text()`)."


2012 

"[1m[22mRemoved 29 rows containing missing values or values outside the scale range
(`geom_text()`)."


2013 

"[1m[22mRemoved 34 rows containing missing values or values outside the scale range
(`geom_text()`)."


2014 

"[1m[22mRemoved 27 rows containing missing values or values outside the scale range
(`geom_text()`)."


2015 

"[1m[22mRemoved 27 rows containing missing values or values outside the scale range
(`geom_text()`)."


2016 

"[1m[22mRemoved 23 rows containing missing values or values outside the scale range
(`geom_text()`)."


2017 

"[1m[22mRemoved 24 rows containing missing values or values outside the scale range
(`geom_text()`)."


2018 

"[1m[22mRemoved 20 rows containing missing values or values outside the scale range
(`geom_text()`)."


2019 

"[1m[22mRemoved 17 rows containing missing values or values outside the scale range
(`geom_text()`)."


2020 

"[1m[22mRemoved 16 rows containing missing values or values outside the scale range
(`geom_text()`)."


2021 

"[1m[22mRemoved 13 rows containing missing values or values outside the scale range
(`geom_text()`)."


2022 

"[1m[22mRemoved 12 rows containing missing values or values outside the scale range
(`geom_text()`)."


2023 
Done - Circular layout snapshots exported


## Temporal Edges Export for Gephi

Create a single edges file with a year column so edges can be toggled on/off by year in Gephi.

In [132]:
# Create temporal edges file with year column for Gephi dynamic filtering
# Each unique edge pair (from, to) will have 24 rows - one per year (2000-2023)
# Weight = 0 if no connection that year, otherwise the co-occurrence count

# Load all yearly data files and combine
years <- 2000:2023

yearly_edges_list <- map(years, function(yr) {
  file_path <- sprintf('../../data/sql_query_out/QUERY 4_ Export Individual Year Networks (Example for %d).csv', yr)
  if (!file.exists(file_path)) return(NULL)
  
  read_csv(file_path, show_col_types = FALSE) %>%
    rename(from = genre_1, to = genre_2, weight = co_occurrence_count) %>%
    filter(from != to) %>%  # Remove self-loops
    mutate(year = yr)
})

yearly_edges <- bind_rows(yearly_edges_list)
cat(sprintf('Loaded %d yearly edge records\n', nrow(yearly_edges)))

# Get all unique edge pairs from the all-time network (these are canonical pairs)
all_edge_pairs <- edges_all %>%
  select(from, to) %>%
  # Ensure consistent ordering (alphabetical) to avoid duplicates
  mutate(
    genre_a = pmin(from, to),
    genre_b = pmax(from, to)
  ) %>%
  select(genre_a, genre_b) %>%
  distinct()

cat(sprintf('%d unique edge pairs in all-time network\n', nrow(all_edge_pairs)))

# Create full grid: all edge pairs × all years
temporal_grid <- expand_grid(
  all_edge_pairs,
  year = years
)

cat(sprintf('Grid size: %d edge pairs × %d years = %d rows\n', 
            nrow(all_edge_pairs), length(years), nrow(temporal_grid)))

# Normalize yearly edges to same ordering for matching
yearly_edges_normalized <- yearly_edges %>%
  mutate(
    genre_a = pmin(from, to),
    genre_b = pmax(from, to)
  ) %>%
  group_by(genre_a, genre_b, year) %>%
  summarize(weight = sum(weight), .groups = 'drop')

# Join to get weights (0 if no connection that year)
temporal_edges <- temporal_grid %>%
  left_join(yearly_edges_normalized, by = c('genre_a', 'genre_b', 'year')) %>%
  mutate(weight = ifelse(is.na(weight), 0, weight)) %>%
  rename(from = genre_a, to = genre_b)

# Summary stats
cat('\nTemporal edges summary:\n')
cat(sprintf('  Total rows: %d\n', nrow(temporal_edges)))
cat(sprintf('  Non-zero weights: %d (%.1f%%)\n', 
            sum(temporal_edges$weight > 0),
            100 * sum(temporal_edges$weight > 0) / nrow(temporal_edges)))

# Show weight distribution by year
temporal_edges %>%
  group_by(year) %>%
  summarize(
    edges_active = sum(weight > 0),
    total_weight = sum(weight),
    .groups = 'drop'
  ) %>%
  print(n = 24)

Loaded 8870 yearly edge records
8597 unique edge pairs in all-time network
Grid size: 8597 edge pairs <U+00D7> 24 years = 206328 rows

Temporal edges summary:
  Total rows: 206328
  Non-zero weights: 8035 (3.9%)
[90m# A tibble: 24 x 3[39m
    year edges_active total_weight
   [3m[90m<int>[39m[23m        [3m[90m<int>[39m[23m        [3m[90m<dbl>[39m[23m
[90m 1[39m  [4m2[24m000          348         [4m1[24m010
[90m 2[39m  [4m2[24m001          428         [4m1[24m258
[90m 3[39m  [4m2[24m002          417         [4m1[24m162
[90m 4[39m  [4m2[24m003          466         [4m1[24m511
[90m 5[39m  [4m2[24m004          451         [4m1[24m453
[90m 6[39m  [4m2[24m005          455         [4m1[24m598
[90m 7[39m  [4m2[24m006          383         [4m1[24m364
[90m 8[39m  [4m2[24m007          378         [4m1[24m331
[90m 9[39m  [4m2[24m008          347         [4m1[24m036
[90m10[39m  [4m2[24m009          331          942
[90m11[39

In [133]:
# Export temporal edges - same format as genre_network_edges_gephi.csv but with year column
# Format: from,to,weight,year

temporal_edges_export <- temporal_edges %>%
  # Keep same column names as original: from, to, weight
  # Just add year column
  select(from, to, weight, year) %>%
  # Sort by edge pair then year
  arrange(from, to, year)

# Preview - show dance pop -> pop across all years
cat('Sample of temporal edges (dance pop <-> pop):\n')
temporal_edges_export %>% 
  filter(from == 'dance pop', to == 'pop') %>%
  print(n = 24)

# Export
write_csv(temporal_edges_export, '../../outputs/genre_network/genre_network_edges_temporal_gephi.csv')

cat(sprintf('\nExported: genre_network_edges_temporal_gephi.csv\n'))
cat(sprintf('  %d rows (%d edge pairs × %d years)\n', 
            nrow(temporal_edges_export), 
            n_distinct(paste(temporal_edges_export$from, temporal_edges_export$to)),
            length(unique(temporal_edges_export$year))))
cat(sprintf('  Columns: from, to, weight, year\n'))
cat(sprintf('  (Same format as genre_network_edges_gephi.csv + year column)\n'))

Sample of temporal edges (dance pop <-> pop):
[90m# A tibble: 24 x 4[39m
   from      to    weight  year
   [3m[90m<chr>[39m[23m     [3m[90m<chr>[39m[23m  [3m[90m<dbl>[39m[23m [3m[90m<int>[39m[23m
[90m 1[39m dance pop pop       13  [4m2[24m000
[90m 2[39m dance pop pop       19  [4m2[24m001
[90m 3[39m dance pop pop       21  [4m2[24m002
[90m 4[39m dance pop pop       17  [4m2[24m003
[90m 5[39m dance pop pop       14  [4m2[24m004
[90m 6[39m dance pop pop       27  [4m2[24m005
[90m 7[39m dance pop pop       25  [4m2[24m006
[90m 8[39m dance pop pop       34  [4m2[24m007
[90m 9[39m dance pop pop       31  [4m2[24m008
[90m10[39m dance pop pop       31  [4m2[24m009
[90m11[39m dance pop pop       41  [4m2[24m010
[90m12[39m dance pop pop       45  [4m2[24m011
[90m13[39m dance pop pop       33  [4m2[24m012
[90m14[39m dance pop pop       41  [4m2[24m013
[90m15[39m dance pop pop       41  [4m2[24m014
[90m16[39m dance p

In [134]:
# Export original edges CSV (all-time, no year column)
# Format: from,to,weight

edges_export <- edges_all %>%
  select(from, to, weight) %>%
  arrange(desc(weight))

write_csv(edges_export, '../../outputs/genre_network/genre_network_edges_gephi.csv')

cat(sprintf('Exported: genre_network_edges_gephi.csv\n'))
cat(sprintf('  %d edges\n', nrow(edges_export)))
cat(sprintf('  Columns: from, to, weight\n\n'))

# Export nodes CSV
# Format: name,primary_main_genre,artist_count,macro_genre

nodes_export <- nodes %>%
  left_join(genre_mapping %>% select(sub_genre, primary_main_genre), 
            by = c('name' = 'sub_genre')) %>%
  select(name, primary_main_genre, artist_count, macro_genre)

write_csv(nodes_export, '../../outputs/genre_network/genre_network_nodes_gephi.csv')

cat(sprintf('Exported: genre_network_nodes_gephi.csv\n'))
cat(sprintf('  %d nodes\n', nrow(nodes_export)))
cat(sprintf('  Columns: name, primary_main_genre, artist_count, macro_genre\n'))

Exported: genre_network_edges_gephi.csv
  8597 edges
  Columns: from, to, weight

Exported: genre_network_nodes_gephi.csv
  957 nodes
  Columns: name, primary_main_genre, artist_count, macro_genre
