# Cochella vs Normal Tour Analysis 

Exploring the setlists of 4 artists' Coachella Setlist and shows in the year prior or after the Coachella weekends. 

## Analysis Setup

### Imports and Configurations

In [None]:
# Standard imports to run concert_analytics

import sys
import os

PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(""), "../../"))
sys.path.insert(0, PROJECT_ROOT)

import pandas as pd

from postgres import fetch_table,run_query

import logging

logging.basicConfig(level=logging.INFO)  # DEBUG < INFO < WARNING < ERROR < CRITICAL

pd.set_option("display.max_columns", None)  # Show all columns

project_name = 'Coachella-vs-Normal-Tour'


### Gather Data into CSV and dataframe

In [None]:
# fetch setlist and spotify metadata

query="""
select 
	*
from 
	analytics_project.project_002_coachella_master_setlist_data
"""

df = run_query(query)
df['track_song_name'] = df['track_song_name'].apply(
    lambda x: x if len(x) <= 25 else x[:22] + "..."
)
df.to_csv('coachella_vs_normal_tool_master_setlist_data.csv',index=False)
df.head()

### Plotting Imports

In [None]:
from plotnine import ggplot, aes, geom_col, labs, theme, element_text, element_rect, scale_x_datetime, element_blank,element_line

#Colors
gaffer_colors = {
    "lightAmpOrange": "#FB9E50"
    , "floodPink": "#F986BA"
    , "setlistBlue": "#3C7DC4"
    , "spotRed": "#D64848"
    , "ampOrange": "#F25C05"
    , "lightBlue":"#A7ECF5"
    , "encorePurple": "#5D4E8C"
    , "stageGreen": "#33C27D"
    , "clockYellow": "#F6D357"
    , "spotlightCream": "#FAF3E0"
    , "gafferGrey": "#777777"
    , "backstageBlack":"#1C1C1C"
}

default_labels = labs(
        caption="Source: setlist.fm sourced via Concert Analytics"
    )

def gaffer_theme(fig_width=9,fig_height=16,panel_grid='x'):
    #Basic Fonts
    base_family_axis="Courier New"
    base_family_title="Helvetica"

    # Define the grid line element once
    grid_line = element_line(
        color=gaffer_colors["gafferGrey"],
        linetype="dashed",
        size=0.5
    )

    # Determine which grid lines to show
    panel_grid_y = grid_line if panel_grid in ("y", "both") else element_blank()
    panel_grid_x = grid_line if panel_grid in ("x", "both") else element_blank()

    gaffer_theme = theme(
        # aspect_ratio = 9 / 16
        # , 
        figure_size=(fig_width,fig_height)
        
        # Backgrounds
        , plot_background=element_rect(fill=gaffer_colors["spotlightCream"], color=None)
        , panel_background=element_rect(fill=gaffer_colors["spotlightCream"], color=None)
        , panel_border=element_blank()

        #Margins
        , plot_margin_top= .03
        , plot_margin_bottom=.025
        , plot_margin_right= 0.03
        , plot_margin_left=0.03

        # Titles & subtitles
        , plot_title=element_blank()
        , plot_subtitle=element_blank()
        
        , plot_caption=element_text(
            family=base_family_axis
            , size=12
            , color=gaffer_colors["gafferGrey"]
            , ha='right'
            , margin={'t': 30}  # spacing above caption
        )

        # Axis
        , axis_title=element_text(
            family=base_family_axis
            , size=12
            , color=gaffer_colors["gafferGrey"]
            , weight='ultralight'
            # ,margin={'t': 5, 'r': 5}  # tweak based on axis
        )
        
        , axis_text=element_text(
            family=base_family_axis
            , size=12
            , color=gaffer_colors["gafferGrey"]
            # ,margin={'l':5, 't':20}
        )

        , axis_title_y=element_blank()
        
        , axis_ticks=element_line(
            color=gaffer_colors["spotlightCream"]
        )
        , axis_ticks_minor=element_blank()

        , panel_grid_major_y =panel_grid_y
        , panel_grid_major_x = panel_grid_x
        , panel_grid_minor=element_blank()

        # Legend
        , legend_title=element_blank()
        , legend_background=element_blank()
        , legend_key=element_blank()
        , legend_key_width=10
        , legend_position="top"
        , legend_margin=1
        , legend_direction="vertical"
        # ,legend_margin=margins(t=5, b=5)
        # ,legend_box_margin=margins(t=10)
    )

    return gaffer_theme

def export_chart(plot,chart_number="000",viz_name="unnamed",project_name="no-project",width=16,height=9):
    filename=f"viz/{project_name}_{chart_number}_{viz_name}.svg"

    plot.save(
        filename=filename,
        format="svg",
        width=width,      # inches
        height=height,     # adjust to your chart’s shape
        dpi=300        # high-quality output
    )

## Analysis

### What it's Store for Us

#### Shows over time

In [None]:
chart_number = "001"
viz_name = "Overview-of-Shows"

overview_df = (
    df
    .groupby(["artist_display_name", "coachella_analytics_period"])
    .agg(num_shows=("event_id", "nunique"))
    .reset_index()
)

coachella_analytics_period_order = ["After Coachella","Coachella", "Before Coachella"]

overview_df["coachella_analytics_period"] = pd.Categorical(
    overview_df["coachella_analytics_period"],
    categories=coachella_analytics_period_order,
    ordered=True
)

from IPython.display import display
from plotnine import *
from mizani.breaks import date_breaks
from mizani.formatters import date_format


plot = (
    ggplot(overview_df, aes(x='artist_display_name', y='num_shows', fill='coachella_analytics_period'))
    + geom_col(position="stack")
    + coord_flip()
    + labs(
        x="Artist",
        y="Number of Shows"
    )
    + scale_fill_manual(values={
        "Before Coachella": gaffer_colors["lightBlue"],
        "Coachella": gaffer_colors["stageGreen"],
        "After Coachella": gaffer_colors["lightAmpOrange"]
    })
    + gaffer_theme(fig_width=16, fig_height=9, panel_grid="y")
    + default_labels

)

# plot
export_chart(plot,chart_number=chart_number,viz_name=viz_name,project_name=project_name)


#### Song Frequency vs. Coachella Feature

In [None]:
chart_number = "002"
viz_name = "Overview-Of-Songs"


# Count how often each song was played
artist_song_counts_df = (
    df.groupby(["artist_name_hint","coachella_analytics_period","track_song_name"])
    .agg(
        frequency=("song_name", "count"),
    )
    .reset_index()
    .sort_values("artist_name_hint")
    .sort_values("frequency", ascending=False)
    .reset_index(drop=True)
)

coachella_analytics_period_order = ["After Coachella","Coachella", "Before Coachella"]

artist_song_counts_df["coachella_analytics_period"] = pd.Categorical(
    artist_song_counts_df["coachella_analytics_period"],
    categories=coachella_analytics_period_order,
    ordered=True
)

artists = [
        'JapaneseBreakfast'
        , 'BillieEilish'
        , 'Turnstile'
        ]

from IPython.display import display
from plotnine import ggplot, aes, geom_bar, coord_flip, labs, scale_fill_manual 
from pandas.api.types import CategoricalDtype

for artist in artists:
    # Create the base plot

    artist_df = (
        artist_song_counts_df[artist_song_counts_df["artist_name_hint"] == artist]
        .reset_index(drop=True)
    )

    # Set up ordering of track_song_name by overall frequency per artist
    artist_df["track_song_name"] = pd.Categorical(
        artist_df["track_song_name"],
        categories=artist_df.groupby("track_song_name")["frequency"]
            .sum()
            .sort_values(ascending=True)
            .index,
        ordered=True
    )
    
    plot = (
        ggplot(artist_df, aes(x='track_song_name', y='frequency', fill='coachella_analytics_period'))
        + geom_col(position="stack")
        + coord_flip()
        + labs(
            x="Song Name",
            y="Song Frequency"
        )
        + scale_fill_manual(values={
            "Before Coachella": gaffer_colors["lightBlue"],
            "Coachella": gaffer_colors["stageGreen"],
            "After Coachella": gaffer_colors["lightAmpOrange"]
        })
        + gaffer_theme(fig_width=9,fig_height=12)
        + default_labels
    )
    
    # print(artist)
    # display(plot)
    viz_name_final = f"{artist}-{viz_name}"
    export_chart(plot,chart_number=chart_number,viz_name=viz_name_final,project_name=project_name,width=9,height=12)
    
