# Data analysis

## Setup

In [None]:
# Load packages
library(readxl)
library(dplyr)
library(ggplot2)
library(stats)
library(stargazer)
library(purrr)
library(stringr)
library(tidyverse)
library(lubridate)
library(lessR)
library(fixest)
library(sandwich)
library(lmtest)
library(car)
library(effsize)


In [None]:
# Import the data
# Set the base directory path
base_directory <- "/Users/julienmbarki/Documents/Doctorat/Publications/Article 2/Data/Code/data_management/" #nolint 

# Create a list of file names
file_names <- c(
    "editorial_playlists_23-24_final.csv",
    "editorial_playlists_22-23_final.csv",
    "editorial_playlists_21-22_final.csv",
    "major_playlists_23-24_final.csv",
    "major_playlists_22-23_final.csv",
    "major_playlists_21-22_final.csv"
)

# Modify the data frame
df_list <- list()

for (i in seq_along(file_names)) {
    file_name <- file_names[i]

    full_path <- file.path(base_directory, file_name)
    df <- read_csv(full_path)

    # Append the data frame to the list
    df_list[[i]] <- df
}

# Combine all data frames
df <- Reduce(function(x, y) merge(x, y, all = TRUE), df_list)

# Export to Excel
write.csv(df, "df_final.csv")


## Data management

In [None]:
# Load data
df <- read.csv("df_final_ter.csv")


### Main measure

In [None]:
# Extract numeric values
df$diversity_clean <- as.numeric(gsub("[^[:digit:].-]", "", df$stirling_index))
df$diversity_clean

df$diversity_clean_2 <- as.numeric(
    gsub("[^[:digit:].-]", "", df$stirling_index_2)
)
df$diversity_clean_2

# Scale values
df$diversity_norm <- rescale(diversity_clean, df, kind = "z")
df$diversity_norm

df$diversity_norm_2 <- rescale(diversity_clean_2, df, kind = "z")
df$diversity_norm_2

# Log values
df$diversity_log <- log(df$diversity_clean + 1)
df$diversity_log

df$diversity_log_2 <- log(df$diversity_clean_2 + 1)
df$diversity_log_2


### Secondary measures

In [None]:
# HH-Index
# Scale values
df$hhi_norm <- rescale(hh_index, df, kind = "z")
df$hhi_norm

df$hhi_norm_2 <- rescale(hh_index_2, df, kind = "z")
df$hhi_norm_2


In [None]:
# Distances
# Scale values
df$dist_norm <- rescale(distances, df, kind = "z")
df$dist_norm

df$dist_norm_2 <- rescale(distances_2, df, kind = "z")
df$dist_norm_2

df$dist_norm_3 <- rescale(distances_3, df, kind = "z")
df$dist_norm_3


### Covariates

In [None]:
# Relevel factors
df$editorial_type <- relevel(as.factor(df$editorial_type), ref = "genre")
table(df$editorial_type)

df$curator <- relevel(as.factor(df$playlist_type), ref = "Editorial")
table(df$curator)


In [None]:
# Log Followers
df$log_followers <- log(df$playlist_followers)
df$log_followers

# Followers class
summary(df$playlist_followers)
df$followers_class <- case_when(
    df$playlist_followers <= 25341 ~ "low",
    df$playlist_followers > 25341 &
    df$playlist_followers <= 98114 ~ "mid_low",
    df$playlist_followers > 98114 &
    df$playlist_followers <= 260541 ~ "mid_high",
    df$playlist_followers > 260541 ~ "high"
)
table(df$followers_class)


In [None]:
# Playlist dates
df <- df %>%
    mutate(
        collection_date = as.Date(collection_date),
        mean_track_date = as.Date(mean_track_date)
    ) %>%
    mutate(
        playlist_date = case_when(
            mean_track_date > collection_date - dyears(1.5) ~ "frontline",
            TRUE ~ "backline"
        )
    )

df$playlist_date <- relevel(as.factor(df$playlist_date), ref = "frontline")
table(df$playlist_date)


## Descriptive stats

### Stats

In [None]:
# Number of unique playlists and per playlist type
length(unique(df$playlist_id))

df %>%
    group_by(editorial_type) %>%
    summarize(n_distinct(playlist_id))

# Number of tracks per playlist and by playlist type
summary(df$nb_tracks)
sd(df$nb_tracks)

df %>%
    group_by(playlist_type) %>%
    summarise(
        count = n(),
        mean_tracks = mean(nb_tracks, na.rm = TRUE),
        median_tracks = median(nb_tracks, na.rm = TRUE),
        min_tracks = min(nb_tracks, na.rm = TRUE),
        max_tracks = max(nb_tracks, na.rm = TRUE),
        sd_tracks = sd(nb_tracks, na.rm = TRUE)
    ) %>%
    arrange(desc(mean_tracks))


In [None]:
# Followers
summary(df$playlist_followers)
sd(df$playlist_followers)

# Followers per playlist type
df %>%
    group_by(playlist_type) %>%
    summarise(
        count = n(),
        mean_followers = mean(playlist_followers, na.rm = TRUE),
        median_followers = median(playlist_followers, na.rm = TRUE),
        min_followers = min(playlist_followers, na.rm = TRUE),
        max_followers = max(playlist_followers, na.rm = TRUE),
        sd_followers = sd(playlist_followers, na.rm = TRUE)
    ) %>%
    arrange(desc(mean_followers))


In [None]:
# Number of clusters per playlist
summary(df$nb_clusters)
summary(df$nb_clusters_2)

df %>%
    group_by(editorial_type) %>%
    summarise(
        count = n(),
        mean_clusters_1 = mean(nb_clusters, na.rm = TRUE),
        mean_clusters_2 = mean(nb_clusters_2, na.rm = TRUE)
    )


In [None]:
# HH-Index
summary(df$hh_index)
summary(df$hh_index_2)

df %>%
    group_by(editorial_type) %>%
    summarise(
        count = n(),
        mean_hhi_1 = mean(hh_index, na.rm = TRUE),
        mean_hhi_2 = mean(hh_index_2, na.rm = TRUE)
    )


In [None]:
# Distances index
summary(df$distances)
summary(df$distances_2)

df %>%
    group_by(editorial_type) %>%
    summarise(
        count = n(),
        mean_dist_1 = mean(distances, na.rm = TRUE),
        mean_dist_2 = mean(distances_2, na.rm = TRUE)
    )


In [None]:
# Striling index
summary(df$diversity_clean)
summary(df$diversity_clean_2)

df %>%
    group_by(editorial_type) %>%
    summarise(
        count = n(),
        mean_div_1 = mean(diversity_clean, na.rm = TRUE),
        mean_div_2 = mean(diversity_clean_2, na.rm = TRUE)
    )


### Plots

In [None]:
ggplot(
  df,
  aes(x = diversity_clean)
  ) +
  geom_histogram(
    bins = 25,
    fill = "lightblue",
    color = "black"
  ) +
  labs(
    x = expression(k*alpha*" Rao-Stirling"), #nolint
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(color = "gray80"),
    panel.grid.minor.y = element_blank(),
    axis.line.x = element_line(linewidth = 1, color = "black"),
    axis.line.y = element_line(linewidth = 1, color = "black")
  )

# Save the plot
ggsave("div_plot_1.png", width = 8, height = 8, dpi = 300)


In [None]:
ggplot(
  df,
  aes(x = diversity_clean_2)
  ) +
  geom_histogram(
    bins = 25,
    fill = "lightblue",
    color = "black"
  ) +
  labs(
    x = expression(k*alpha*" Rao-Stirling"), #nolint
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(color = "gray80"),
    panel.grid.minor.y = element_blank(),
    axis.line.x = element_line(linewidth = 1, color = "black"),
    axis.line.y = element_line(linewidth = 1, color = "black")
  )

# Save the plot
ggsave("div_plot_2.png", width = 8, height = 8, dpi = 300)


In [None]:
# Compute mean Stirling Index per collection date
df_mean <- df %>%
  group_by(collection_date) %>%
  summarize(mean_stirling = mean(diversity_clean, na.rm = TRUE))

# Time series plot with trend line
ggplot(df_mean, aes(x = collection_date, y = mean_stirling)) +
  geom_line(color = "blue", linewidth = 0.5) +
  geom_smooth(method = "lm", se = FALSE, color = "red", linewidth = 1) +
  labs(
    x = "Collection Date",
    y = expression(k*alpha*" Rao-Stirling"), #nolint
  ) +
  theme_minimal() +
  theme(
    panel.background = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(color = "gray80"),
    panel.grid.minor.y = element_blank(),
    axis.title.y = element_text(vjust = +2, size = 12),
    axis.title.x = element_text(vjust = 0.5, size = 12),
    axis.line.x = element_line(linewidth = 1, color = "black"),
    axis.line.y = element_line(linewidth = 1, color = "black"),
    axis.text.y = element_text(
      angle = 90,
      color = "black",
      size = 11,
      face = 1,
      hjust = 0.5
    ),
    aspect.ratio = 0.6,
  )

# Save the plot
ggsave("div_time_plot_1.png", width = 8, height = 5, dpi = 300)


In [None]:
# Time series plot of stirling_index vs collection_date
df_mean <- df %>%
  group_by(collection_date) %>%
  summarize(mean_stirling = mean(diversity_clean_2))

ggplot(
  df_mean,
  aes(
    x = collection_date,
    y = mean_stirling
  )
  ) +
  geom_line(color = "blue", linewidth = 0.5) +  # Original time series
  geom_smooth(method = "lm", se = FALSE, color = "red", linewidth = 1) +
  labs(
    x = "Collection Date",
    y = expression(k*beta*" Rao-Stirling"), #nolint
  ) +
  theme_minimal() +
  theme(
    panel.background = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(color = "gray80"),
    panel.grid.minor.y = element_blank(),
    axis.title.y = element_text(vjust = +2, size = 12),
    axis.title.x = element_text(vjust = 0.5, size = 12),
    axis.line.x = element_line(linewidth = 1, color = "black"),
    axis.line.y = element_line(linewidth = 1, color = "black"),
    axis.text.y = element_text(
      angle = 90,
      color = "black",
      size = 11,
      face = 1,
      hjust = 0.5
    ),
    aspect.ratio = 0.6,
  )

ggsave(
    "div_time_plot_2.png",
    width = 8,
    height = 5,
    dpi = 300
)


#### Editorial type

In [None]:
# Time series plot of stirling_index vs collection_date
df_mean <- df %>%
  group_by(collection_date, editorial_type) %>%
  summarize(mean_stirling = mean(diversity_clean))

ggplot(
  df_mean,
  aes(
    x = collection_date,
    y = mean_stirling,
    color = editorial_type
  )
  ) +
  geom_line(linewidth = 0.5) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1) +
  labs(
    x = "Collection Date",
    y = expression(k*alpha*" Rao-Stirling") #nolint
  ) +
  theme(
    panel.background = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_line(color = "gray80"),
    axis.title.y = element_text(vjust = +2, size = 12),
    axis.title.x = element_text(vjust = 0.5, size = 12),
    axis.line.x = element_line(linewidth = 1, color = "black"),
    axis.line.y = element_line(linewidth = 1, color = "black"),
    axis.text.y = element_text(
      angle = 90,
      color = "black",
      size = 11,
      face = 1,
      hjust = 0.5
    ),
    aspect.ratio = 0.6,
    legend.position = "bottom"
  )

ggsave(
    "div_1_time_plot.png",
    width = 8,
    height = 5,
    dpi = 300
)


In [None]:
# Time series plot of stirling_index_2 vs collection_date
df_mean <- df %>%
  group_by(collection_date, editorial_type) %>%
  summarize(mean_stirling = mean(diversity_clean_2))

ggplot(
  df_mean,
  aes(
    x = collection_date,
    y = mean_stirling,
    color = editorial_type
  )
  ) +
  geom_line(linewidth = 0.5) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1) +
  labs(
    x = "Collection Date",
    y = expression(k*beta*" Rao-Stirling") #nolint
  ) +
  theme(
    panel.background = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_line(color = "gray80"),
    axis.title.y = element_text(vjust = +2, size = 12),
    axis.title.x = element_text(vjust = 0.5, size = 12),
    axis.line.x = element_line(linewidth = 1, color = "black"),
    axis.line.y = element_line(linewidth = 1, color = "black"),
    axis.text.y = element_text(
      angle = 90,
      color = "black",
      size = 11,
      face = 1,
      hjust = 0.5
    ),
    aspect.ratio = 0.6,
    legend.position = "bottom"
  )

ggsave(
    "div_2_time_plot.png",
    width = 8,
    height = 5,
    dpi = 300
)


In [None]:
# Time series plot of distance_index vs collection_date
df_mean <- df %>%
  group_by(collection_date, editorial_type) %>%
  summarize(mean_distances = mean(distances_3))

ggplot(
  df_mean,
  aes(
    x = collection_date,
    y = mean_distances,
    color = editorial_type
  )
  ) +
  geom_line(linewidth = 0.5) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1) +
  labs(
    x = "Collection Date",
    y = "Mean distance"
  ) +
  theme(
    panel.background = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_line(color = "gray80"),
    axis.title.y = element_text(vjust = +2, size = 12),
    axis.title.x = element_text(vjust = 0.5, size = 12),
    axis.line.x = element_line(linewidth = 1, color = "black"),
    axis.line.y = element_line(linewidth = 1, color = "black"),
    axis.text.y = element_text(
      angle = 90,
      color = "black",
      size = 11,
      face = 1,
      hjust = 0.5
    ),
    aspect.ratio = 0.6,
    legend.position = "bottom"
  )

ggsave(
    "div_3_time_plot.png",
    width = 8,
    height = 5,
    dpi = 300
)


#### Curator

In [None]:
# Time series plot of stirling_index vs collection_date
df_mean <- df %>%
  group_by(collection_date, playlist_type) %>%
  summarize(mean_stirling = mean(diversity_clean))

ggplot(
  df_mean,
  aes(
    x = collection_date,
    y = mean_stirling,
    color = playlist_type
  )
  ) +
  geom_line() +
  #geom_point() +
  labs(
    x = "Collection Date",
    y = expression(k*alpha*" Rao-Stirling") #nolint
  ) +
  theme(
    panel.background = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_line(color = "gray80"),
    axis.title.y = element_text(vjust = +2, size = 12),
    axis.title.x = element_text(vjust = 0.5, size = 12),
    axis.line.x = element_line(linewidth = 1, color = "black"),
    axis.line.y = element_line(linewidth = 1, color = "black"),
    axis.text.y = element_text(
      angle = 90,
      color = "black",
      size = 11,
      face = 1,
      hjust = 0.5
    ),
    aspect.ratio = 0.6,
    legend.position = "bottom"
  )

ggsave(
    "div_1_time_plot.png",
    width = 8,
    height = 5,
    dpi = 300
)


In [None]:
# Time series plot of stirling_index_2 vs collection_date
df_mean <- df %>%
  group_by(collection_date, playlist_type) %>%
  summarize(mean_stirling = mean(diversity_clean_2))

ggplot(
  df_mean,
  aes(
    x = collection_date,
    y = mean_stirling,
    color = playlist_type
  )
  ) +
  geom_line() +
  #geom_point() +
  labs(
    x = "Collection Date",
    y = expression(k*beta*" Rao-Stirling") #nolint
  ) +
  theme(
    panel.background = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_line(color = "gray80"),
    axis.title.y = element_text(vjust = +2, size = 12),
    axis.title.x = element_text(vjust = 0.5, size = 12),
    axis.line.x = element_line(linewidth = 1, color = "black"),
    axis.line.y = element_line(linewidth = 1, color = "black"),
    axis.text.y = element_text(
      angle = 90,
      color = "black",
      size = 11,
      face = 1,
      hjust = 0.5
    ),
    aspect.ratio = 0.6,
    legend.position = "bottom"
  )

ggsave(
    "div_2_time_plot.png",
    width = 8,
    height = 5,
    dpi = 300
)


In [None]:
# Time series plot of dist_index vs collection_date
df_mean <- df %>%
  group_by(collection_date, playlist_type) %>%
  summarize(mean_distances = mean(distances_3))

ggplot(
  df_mean,
  aes(
    x = collection_date,
    y = mean_distances,
    color = playlist_type
  )
  ) +
  geom_line() +
  #geom_point() +
  labs(
    x = "Collection Date",
    y = "Mean distance"
  ) +
  theme(
    panel.background = element_blank(),
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_line(color = "gray80"),
    axis.title.y = element_text(vjust = +2, size = 12),
    axis.title.x = element_text(vjust = 0.5, size = 12),
    axis.line.x = element_line(linewidth = 1, color = "black"),
    axis.line.y = element_line(linewidth = 1, color = "black"),
    axis.text.y = element_text(
      angle = 90,
      color = "black",
      size = 11,
      face = 1,
      hjust = 0.5
    ),
    aspect.ratio = 0.6,
    legend.position = "bottom"
  )

ggsave(
    "div_3_time_plot.png",
    width = 8,
    height = 5,
    dpi = 300
)


## Models

### FE static

In [None]:
# k alpha Rao-Stirling
model_1 <- feols(
    diversity_norm ~ editorial_type,
    data = df,
    cluster = c("editorial_type")
)
summary(model_1)

model_2 <- feols(
    diversity_norm ~ editorial_type + nb_tracks + playlist_date +
    followers_class |
    collection_date,
    data = df,
    cluster = c("editorial_type")
)
summary(model_2)


In [None]:
# k beta Rao-Stirling
model_1 <- feols(
    diversity_norm_2 ~ editorial_type,
    data = df,
    cluster = c("editorial_type")
)
summary(model_1)

model_2 <- feols(
    diversity_norm_2 ~ editorial_type + nb_tracks + playlist_date +
    followers_class |
    collection_date,
    data = df,
    cluster = c("editorial_type")
)
summary(model_2)


In [None]:
# Distance index
model <- feols(
    dist_norm_3 ~ editorial_type + nb_tracks + playlist_date +
    followers_class |
    collection_date,
    data = df,
    cluster = c("editorial_type")
)
summary(model)


### Time series

In [None]:
# Create a time index
df$time_index <- as.numeric(df$collection_date - min(df$collection_date))
df$year_month <- format(as.Date(df$collection_date), "%Y-%m")
df$month_index <- as.numeric(as.factor(df$year_month))

# k alpha Rao-Stirling
model_trend <- feols(
  diversity_norm ~ month_index + nb_tracks + playlist_date +
  followers_class * month_index,
  data = df, cluster = c("playlist_id", "collection_date")
  )
summary(model_trend)

# k beta Rao-Stirling
model_trend_2 <- feols(
  diversity_norm_2 ~ month_index + nb_tracks + playlist_date +
  followers_class * month_index,
  data = df, cluster = c("playlist_id", "collection_date")
  )
summary(model_trend_2)


In [None]:
# Evolution of Diversity Over Time with Playlist Type Interaction

# k alpha Rao-Stirling
model_trend <- feols(
  diversity_norm ~ month_index * editorial_type +
  nb_tracks + playlist_date + followers_class * month_index,
  data = df, cluster = c("playlist_id", "collection_date")
  )
summary(model_trend)

# k beta Rao-Stirling
model_trend <- feols(
  diversity_norm_2 ~ month_index * editorial_type +
  nb_tracks + playlist_date + followers_class * month_index,
  data = df, cluster = c("playlist_id", "collection_date")
  )
summary(model_trend)


In [None]:
model_trend <- feols(
  dist_norm_3 ~ time_index + time_index:editorial_type +
  nb_tracks + playlist_date + followers_class * time_index,
  data = df, cluster = c("playlist_id", "collection_date")
  )
summary(model_trend)


In [23]:
# Quadratic model
# Model: Evolution of Diversity Over Time with Playlist Type Interaction
model_trend <- feols(
  diversity_norm ~ month_index^2 +
  nb_tracks + playlist_date + followers_class * month_index,
  data = df, cluster = c("playlist_id", "collection_date")
  )
summary(model_trend)


OLS estimation, Dep. Var.: diversity_norm
Observations: 19,153 
Standard-errors: Clustered (playlist_id & collection_date) 
                                     Estimate Std. Error   t value  Pr(>|t|)
(Intercept)                          0.163945   0.131520  1.246538 0.2149749
I(month_index^2)                    -0.000038   0.000143 -0.268439 0.7888188
nb_tracks                           -0.006718   0.002214 -3.034747 0.0029476
playlist_datebackline                0.031457   0.103473  0.304014 0.7616391
followers_classlow                   0.352938   0.167108  2.112043 0.0367392
followers_classmid_high              0.332350   0.140835  2.359861 0.0198820
followers_classmid_low               0.465896   0.206004  2.261587 0.0255084
month_index                          0.007198   0.005979  1.203778 0.2310249
followers_classlow:month_index      -0.003855   0.004336 -0.889117 0.3757046
followers_classmid_high:month_index -0.006035   0.004222 -1.429539 0.1554263
followers_classmid_low:month_

### Secondary models

#### Playlist curator

In [None]:
model_1 <- feols(
    diversity_norm ~ editorial_type * curator + nb_tracks +
    playlist_date + followers_class | collection_date,
    data = df, cluster = df$editorial_type:df$curator
)
summary(model_1)

model_2 <- feols(
    diversity_norm_2 ~ editorial_type * curator + nb_tracks +
    playlist_date + followers_class | collection_date,
    data = df, cluster = df$editorial_type:df$curator
)
summary(model_2)


In [26]:
# k alpha Rao-Stirling
model_trend <- feols(
  diversity_norm ~ month_index * curator +
  nb_tracks + playlist_date + followers_class * month_index,
  data = df, cluster = c("playlist_id", "collection_date")
  )
summary(model_trend)

# k beta Rao-Stirling
model_trend <- feols(
  diversity_norm_2 ~ month_index * curator +
  nb_tracks + playlist_date + followers_class * month_index,
  data = df, cluster = c("playlist_id", "collection_date")
  )
summary(model_trend)


OLS estimation, Dep. Var.: diversity_norm
Observations: 19,153 
Standard-errors: Clustered (playlist_id & collection_date) 
                                     Estimate Std. Error   t value   Pr(>|t|)
(Intercept)                          0.124227   0.147849  0.840232 0.40243531
month_index                          0.005706   0.002393  2.384775 0.01864280
curatorMajor label                  -0.165284   0.286284 -0.577344 0.56477972
nb_tracks                           -0.005866   0.002687 -2.183158 0.03095445
playlist_datebackline                0.035032   0.102343  0.342304 0.73271587
followers_classlow                   0.479993   0.256365  1.872301 0.06357896
followers_classmid_high              0.340642   0.144344  2.359930 0.01987850
followers_classmid_low               0.548032   0.158488  3.457878 0.00075215
month_index:curatorMajor label      -0.008112   0.006669 -1.216316 0.22623184
month_index:followers_classlow       0.003798   0.008481  0.447768 0.65512123
month_index:follow

OLS estimation, Dep. Var.: diversity_norm_2
Observations: 19,153 
Standard-errors: Clustered (playlist_id & collection_date) 
                                     Estimate Std. Error   t value     Pr(>|t|)
(Intercept)                          0.707129   0.152504  4.636791 0.0000090171
month_index                          0.005674   0.002263  2.506681 0.0135137206
curatorMajor label                  -0.121370   0.351029 -0.345756 0.7301263759
nb_tracks                           -0.012134   0.002985 -4.065175 0.0000857447
playlist_datebackline               -0.060873   0.096262 -0.632362 0.5283432668
followers_classlow                   0.417495   0.314063  1.329334 0.1862388727
followers_classmid_high              0.325308   0.120540  2.698750 0.0079555612
followers_classmid_low               0.496062   0.216646  2.289736 0.0237692557
month_index:curatorMajor label      -0.004523   0.008174 -0.553275 0.5810959506
month_index:followers_classlow       0.004477   0.009747  0.459300 0.64684

#### Playlist dates

In [None]:
# Playlist dates
# OLS model playlist dates and diversity 1
model <- lm(
    diversity_norm ~
    log_followers + nb_tracks +
    avg_track_popularity + avg_artist_popularity + playlist_date,
    data = playlist_level_data
)

# Robust standard errors using HC1
model_robust <- coeftest(
  model, vcov = vcovHC(model, type = "HC1")
)
stargazer(model, model_robust, type = "text")

# OLS model playlist dates and diversity 2
model_2 <- lm(
    diversity_norm_2 ~
    log_followers + nb_tracks +
    avg_track_popularity + avg_artist_popularity + playlist_date,
    data = playlist_level_data
)

# Robust standard errors using HC1
model_robust_2 <- coeftest(
  model_2, vcov = vcovHC(model, type = "HC1")
)
stargazer(model_2, model_robust_2, type = "text")

# OLS model playlist dates and distances
model_3 <- lm(
    dist_norm_3 ~
    log_followers + nb_tracks +
    avg_track_popularity + avg_artist_popularity + playlist_date,
    data = playlist_level_data
)

# Robust standard errors using HC1
model_robust_3 <- coeftest(
  model_3, vcov = vcovHC(model, type = "HC1")
)
stargazer(model_3, model_robust_3, type = "text")
