## What visualizations do we want?
+ Heat map for the how set changed over time

In [1]:
# import packages
import pandas as pd
import json
import numpy as np
import datetime
from matplotlib import pyplot as plt

In [2]:
# import data
setlist_data = pd.read_csv("../data/processed/analysis_dataset.csv")

## individual setlists for our three of greater interest
dijon = pd.read_csv("../data/processed/salt_shed_artists/dijon_setlists.csv")
clairo = pd.read_csv("../data/processed/salt_shed_artists/clairo_setlists.csv")
mk_gee = pd.read_csv("../data/processed/salt_shed_artists/mkgee_setlists.csv")

In [3]:
# some data cleaning
setlist_data["monthly_listen_group"] = pd.Categorical(setlist_data["monthly_listen_group"], ordered=True, categories=["Low", "Medium", "High", "Very high"])

## Visuals

In [10]:
def heatmap(df, cols=3):
    df = df.copy()
    df["event_date"] =  pd.to_datetime(df["event_date"],format="%d-%m-%Y")
    max_month = df["event_date"].dt.month.max()
    if max_month in [4,6,9,11]:
        days=30
    elif (max_month == 2) & (df["event_date"].dt.year.max()%4==0):
        days=29
    elif (max_month == 2):
        days =28
    else:
        days = 31
    null_dates = pd.DataFrame({"event_date":pd.date_range(datetime.date(df["event_date"].dt.year.min(), df["event_date"].dt.month.min(), 1),
                                                    datetime.date(df["event_date"].dt.year.max(), df["event_date"].dt.month.max(), days),
                                                    freq="D"),
                            "notes":"No tour date"})
    heat_map = pd.concat([df[["event_date", "venue", "city", "base_song_pct"]],
                                null_dates]).drop_duplicates("event_date").sort_values("event_date")
    heat_map["dow"] = heat_map["event_date"].dt.dayofweek+2
    heat_map.loc[heat_map["dow"]==8, "dow"] = 1
    heat_map["month"] = heat_map["event_date"].dt.month
    heat_map["month_num"] = heat_map["month"].rank(method="dense")
    heat_map["row_position"] = np.ceil(heat_map["month_num"]/cols)
    heat_map["col_position"] = (heat_map["month_num"]-(heat_map["row_position"]-1)*cols)
    heat_map["diff"] = heat_map["event_date"].dt.day-heat_map["dow"]
    heat_map["row_offset"] = heat_map.groupby(["month_num"])["diff"].rank(method="dense")
    heat_map["x"] = heat_map["dow"]+((heat_map["col_position"]-1)*10)
    heat_map["y"] = -heat_map["row_offset"]-((heat_map["row_position"]-1)*10)
    heat_map["y_adj"] = heat_map["y"]-heat_map["y"].min()+1
    heat_map["pct_norm"] = 1
    heat_map.loc[heat_map["base_song_pct"].isna(),"pct_norm"] = 0
    heat_map.loc[heat_map["base_song_pct"]>heat_map["base_song_pct"].quantile(0.2),"pct_norm"] = 2
    heat_map.loc[heat_map["base_song_pct"]>heat_map["base_song_pct"].quantile(0.4),"pct_norm"] = 3
    heat_map.loc[heat_map["base_song_pct"]>heat_map["base_song_pct"].quantile(0.6),"pct_norm"] = 4
    heat_map.loc[heat_map["base_song_pct"]>heat_map["base_song_pct"].quantile(0.8),"pct_norm"] = 5
    return heat_map

def slope_chart(df, nights):
    '''
    Nights should be a list of dates of the shows of interest.
    '''
    slope = df[df["event_date"].isin(nights)].copy().reset_index()
    slope["rank"] = slope.groupby("id")["index"].rank(method="first")*-1
    slope = pd.pivot(slope,
                     columns="songs",
                     index="event_date",
                     values="rank").reset_index().rename(columns={"event_date":"night"})
    slope["night"] = pd.to_datetime(slope["night"], dayfirst=True)
    return slope

In [11]:
dijon_heat_map = heatmap(dijon, cols=2)

In [12]:
clairo_heat_map = heatmap(clairo, cols=2)

In [13]:
mk_gee_heat_map = heatmap(mk_gee, cols=2)

In [14]:
chicago_slope_dijon = slope_chart(dijon, ["07-12-2025", "08-12-2025"])

In [18]:
first_last_slope_dijon = slope_chart(dijon, ["25-10-2025", "12-12-2025"])

In [19]:
first_last_slope_dijon

songs,night,"""Nü Diamond",(Freak It),(Referee),Annie,Another Baby!,Automatic,Baby!,Big Mike’s,FIRE!,...,TV Blues,Talk Down,The Dress,Yamaha,alley-oop,coogie,jesse,loyal & marie,my man,rock n roll
0,2025-12-12,-21.0,,-17.0,-7.0,-2.0,-10.0,-16.0,-14.0,-11.0,...,-9.0,,-8.0,-12.0,-6.0,-4.0,,,,-5.0
1,2025-10-25,,-4.0,-8.0,,-2.0,-15.0,-1.0,,-6.0,...,,-10.0,-7.0,-5.0,,,-11.0,-13.0,-12.0,


In [15]:
chicago_slope_clairo = slope_chart(clairo, ["16-10-2024", "17-10-2024"])

In [12]:
chicago_slope_dijon = dijon[dijon["city"]=="Chicago"].copy().reset_index()
chicago_slope_dijon["rank"] = chicago_slope_dijon.groupby("id")["index"].rank(method="first")*-1
chicago_slope_dijon.loc[chicago_slope_dijon["id"]=="634c7ac3", "night"] = "12/7/2025"
chicago_slope_dijon.loc[chicago_slope_dijon["id"]=="1b4c6960", "night"] = "12/8/2025"
chicago_slope_dijon = pd.pivot(chicago_slope_dijon,
                                columns="songs",
                                index="night",
                                values="rank").reset_index()
chicago_slope_dijon.to_clipboard(index=False)

In [13]:
chicago_slope_clairo = clairo[clairo["city"]=="Chicago"].copy().reset_index()
chicago_slope_clairo["rank"] = chicago_slope_clairo.groupby("id")["index"].rank(method="first")*-1
chicago_slope_clairo.loc[chicago_slope_clairo["id"]=="4354333f", "night"] = "10/16/2024"
chicago_slope_clairo.loc[chicago_slope_clairo["id"]=="3b507cd8", "night"] = "10/17/2024"
chicago_slope_clairo = pd.pivot(chicago_slope_clairo,
                                columns="songs",
                                index="night",
                                values="rank").reset_index()
chicago_slope_clairo.to_clipboard(index=False)