# Cochella vs Normal Tour Analysis 

Exploring the setlists of 4 artists' Coachella Setlist and shows in the year prior or after the Coachella weekends. 

## Analysis Setup

### Imports and Configurations

In [None]:
# Standard imports to run concert_analytics

import sys
import os

PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(""), "../../"))
sys.path.insert(0, PROJECT_ROOT)

import pandas as pd

from postgres import fetch_table,run_query

import logging

logging.basicConfig(level=logging.INFO)  # DEBUG < INFO < WARNING < ERROR < CRITICAL

pd.set_option("display.max_columns", None)  # Show all columns


### Gather Data into CSV and dataframe

In [None]:
# fetch setlist and spotify metadata

query="""
select 
	*
from 
	analytics_project.project_002_coachella_master_setlist_data
"""

df = run_query(query)
df['track_song_name'] = df['track_song_name'].apply(
    lambda x: x if len(x) <= 25 else x[:22] + "..."
)
df.to_csv('coachella_vs_normal_tool_master_setlist_data.csv',index=False)
df.head()

### Plotting Imports

In [None]:
from plotnine import ggplot, aes, geom_col, labs, theme, element_text, element_rect, scale_x_datetime, element_blank,element_line

#Colors
gaffer_colors = {
    "lightAmpOrange": "#FB9E50"
    , "floodPink": "#F986BA"
    , "setlistBlue": "#3C7DC4"
    , "spotRed": "#D64848"
    , "ampOrange": "#F25C05"
    , "lightBlue: Appendix":"#A7ECF5"
    , "encorePurple": "#5D4E8C"
    , "stageGreen": "#33C27D"
    , "clockYellow": "#F6D357"
    , "spotlightCream": "#FAF3E0"
    , "gafferGrey": "#777777"
    , "backstageBlack":"#1C1C1C"
}

def gaffer_theme(fig_width=9,fig_height=16):
    #Basic Fonts
    base_family_axis="Courier New"
    base_family_title="Helvetica"

    return theme(
        # aspect_ratio = 9 / 16
        # , 
        figure_size=(fig_width,fig_height)
        
        # Backgrounds
        , plot_background=element_rect(fill=gaffer_colors["spotlightCream"], color=None)
        , panel_background=element_rect(fill=gaffer_colors["spotlightCream"], color=None)
        , panel_border=element_blank()

        #Margins
        ,plot_margin_top= .03
        ,plot_margin_bottom=.025
        ,plot_margin_right= 0.03
        ,plot_margin_left=0.03

        # Titles & subtitles
        ,plot_title=element_blank()

        , plot_subtitle=element_blank()
        
        , plot_caption=element_text(
            family=base_family_axis
            ,size=12
            ,color=gaffer_colors["gafferGrey"]
            ,ha='right'
            ,margin={'t': 30}  # spacing above caption
        )

        # Axis
        ,axis_title=element_text(
            family=base_family_axis
            ,size=12
            ,color=gaffer_colors["gafferGrey"]
            ,weight='ultralight'
            # ,margin={'t': 5, 'r': 5}  # tweak based on axis
        )
        
        ,axis_text=element_text(
            family=base_family_axis
            ,size=12
            ,color=gaffer_colors["gafferGrey"]
            # ,margin={'l':5, 't':20}
        )

        ,axis_title_y=element_blank()
        
        ,axis_ticks=element_line(
            color=gaffer_colors["spotlightCream"]
        )
        ,panel_grid_major_x = element_line(
            color=gaffer_colors["gafferGrey"]
            ,linetype="dashed"
            ,size=.5
        )

        ,axis_ticks_minor=element_blank()
        ,panel_grid_major_y =element_blank()
        ,panel_grid_minor=element_blank()

        # Legend
        ,legend_title=element_blank()
        ,legend_background=element_blank()
        ,legend_key=element_blank()
        ,legend_key_width=10
        , legend_position="top"
        , legend_margin=1
        , legend_direction="vertical"
        # ,legend_margin=margins(t=5, b=5)
        # ,legend_box_margin=margins(t=10)

    
    )

## Analysis

### Song Frequency vs. Coachella Feature

In [None]:
coachella_and_prior_df = df[
    (df["days_before_first_coachella_date"].notna()) |
    (df["is_coachella"] == True)
]

# Count how often each song was played
artist_song_counts_df = (
    coachella_and_prior_df.groupby(["artist_name_hint","track_song_name"])
    .agg(
        frequency=("song_name", "count"),
        played_at_coachella=("is_coachella", "any")  # Boolean: True if ever played at Coachella
    )
    .reset_index()
    .sort_values("artist_name_hint")
    .sort_values("frequency", ascending=False)
    .reset_index(drop=True)
)

artists = ['TameImpala','JapaneseBreakfast','BillieEilish','Turnstile']
# artist_song_counts_df.head()

from IPython.display import display
from plotnine import ggplot, aes, geom_bar, coord_flip, labs, scale_fill_manual 

for artist in artists:
    # Create the base plot

    artist_df = (
        artist_song_counts_df[artist_song_counts_df["artist_name_hint"] == artist]
        .reset_index(drop=True)
    )
    
    plot = (
        ggplot(artist_df)
        + aes(x="reorder(track_song_name, frequency)", y="frequency", fill="played_at_coachella")
        + geom_bar(stat="identity")
        + coord_flip()  # Makes it horizontal
        + scale_fill_manual(
            values={
                True: gaffer_colors["stageGreen"],   # Color for songs played at Coachella
                False: gaffer_colors["gafferGrey"]  # Color for songs not played at Coachella
            }
        )
        + labs(
            x="Song",
            y="Times Played",
            fill="Played at Coachella?",
            title="Song Frequencies Prior to and at Coachella"
        )
        + gaffer_theme()
    )

    
    print(artist)
    display(plot)


    # plot.save(
    #     filename="viz/0002-mewithoutYou-Opener_Frequency.svg",
    #     format="svg",
    #     width=12,      # inches
    #     height=9,     # adjust to your chart’s shape
    #     dpi=300        # high-quality output
    # )