In [1]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Dict, Optional
import numpy as np

# Import our data loader and analysis modules
from parsed_data_loader import load_and_prepare_data, load_all_csvs
from analysis_modules import (
    analyze_peaks_by_city
    ,analyze_song_adoption_overall
    ,calculate_stickiness_metrics
    ,calculate_song_to_artist_ratio
)

from plot_utils import (
    plot_city_trends,
    plot_streams_per_listener_trends,
    plot_city_comparison,
    calculate_streams_per_listener
)

# Set plot style
plt.style.use('fivethirtyeight')
sns.set_palette('husl')

In [2]:
df=load_and_prepare_data()
print(f"Data shape: {df.shape}"
      f"Columns: {df.columns}")

✅ song_velocity_table.csv created with prepared data
Data shape: (22859, 11)Columns: Index(['city', 'previous_period', 'current_period', '%_change', 'week', 'song',
       'song_id', 'measure', 'level', 'period_type', 'grouping'],
      dtype='object')


In [3]:
df[(df['city']=='All Cities') & (df['measure']=='listeners')].head()

Unnamed: 0,city,previous_period,current_period,%_change,week,song,song_id,measure,level,period_type,grouping
0,All Cities,23,11,-52%,20240927,Artist Level,artist,listeners,artist,weekly,city
38,All Cities,7,2,-71%,20240201,The Way That It Was,1711474242,listeners,song,monthly,city
51,All Cities,1,1,0%,20240308,The Way That It Was,1711474242,listeners,song,weekly,city
55,All Cities,2,2,0%,20240913,Kid,1711474235,listeners,song,weekly,city
58,All Cities,3,5,+67%,20240601,Kid,1711474235,listeners,song,monthly,city


In [4]:
# This will show all the visualizations and printed output
city_metrics, song_metrics, category_metrics = analyze_peaks_by_city()



City Performance Summary (First 12 Weeks After Release):

Key Metrics:
- avg_weeks_to_peak: Average number of weeks until peak streaming activity
- peak_streams: Highest number of streams in a single week
- peak_weekly_listeners: Highest number of listeners in any single week
- songs_analyzed: Number of songs analyzed for this city
- songs_peaked_first_week: Number of songs that peaked in their first week
- pct_peaked_first_week: Percentage of songs that peaked in their first week
- songs_still_growing: Number of songs still growing after 12 weeks
- total_streams: Total streams across all songs
- consistency_score: Percentage of songs that were streamed in last 4 weeks
- avg_weekly_streams_per_listener: Average of (streams/listeners) for each week
- avg_weeks_to_adopt: Average number of weeks until first streaming activity

City Categories:
- Early Adopter: Cities that start streaming within the first 33rd percentile of weeks
- Mid Adopter: Cities that start streaming between 33rd and

In [5]:
song_adoption_metrics = analyze_song_adoption_overall()


Song Performance Summary (First 12 Weeks After Release):

Key Metrics:
- peak_streams: Highest number of streams in a single week
- total_streams: Total streams in first 12 weeks
- avg_weekly_streams: Average streams per week
- peak_weekly_listeners: Highest number of listeners in any single week
- avg_weekly_listeners: Average number of listeners per week
- active_cities: Number of cities that have streamed the song
- avg_streams_per_city: Average streams per active city
- avg_weekly_streams_per_listener: Average of (streams/listeners) for each week
- peak_to_total_ratio: Percentage of total streams that occurred at peak
- consistency_score: Percentage of cities that streamed in last 4 weeks
- weeks_to_peak: Number of weeks until peak streaming activity
- weeks_to_adopt: Number of weeks until first streaming activity

Adoption Categories:
- Early Adopter: Songs that start streaming within the first 33rd percentile of weeks
- Mid Adopter: Songs that start streaming between 33rd and 67

In [6]:
song_adoption_metrics.head()

Unnamed: 0,song,release_date,peak_date,peak_streams,weeks_to_peak,weeks_to_adopt,is_still_growing,peaked_first_week,total_streams,avg_weekly_streams,...,avg_weekly_listeners,avg_weekly_streams_per_listener,total_cities,active_cities,avg_streams_per_city,peak_to_total_ratio,consistency_score,adoption_category,log_total_streams,weeks_since_release
0,Holding On,2023-11-10,2023-11-10,28,0.0,0.0,False,True,96,8.0,...,6.5,1.7,39,39,2.5,29.2,15.4,Early Adopter,1.986772,80.7
1,The Way That It Was,2023-11-10,2023-11-10,21,0.0,0.0,False,True,61,4.7,...,4.4,1.5,30,30,2.0,34.4,20.0,Early Adopter,1.792392,80.7
2,Althea,2024-06-21,2024-06-28,1034,1.0,0.0,False,False,3060,235.4,...,284.9,1.3,392,392,7.8,33.8,14.8,Early Adopter,3.485863,48.7
3,Slipping Away,2023-11-10,2023-11-10,22,0.0,0.0,False,True,56,4.3,...,4.2,1.5,26,26,2.2,39.3,15.4,Early Adopter,1.755875,80.7
4,Cycles,2023-11-10,2023-11-10,37,0.0,0.0,False,True,130,10.0,...,9.6,1.5,49,47,2.8,28.5,17.0,Early Adopter,2.117271,80.7


In [7]:
df.head()

Unnamed: 0,city,previous_period,current_period,%_change,week,song,song_id,measure,level,period_type,grouping
0,All Cities,23,11,-52%,20240927,Artist Level,artist,listeners,artist,weekly,city
1,Adelaide,0,1,-,20240927,Artist Level,artist,listeners,artist,weekly,city
2,Allentown,0,1,-,20240927,Artist Level,artist,listeners,artist,weekly,city
3,Birmingham,0,1,-,20240927,Artist Level,artist,listeners,artist,weekly,city
4,Brasília,0,1,-,20240927,Artist Level,artist,listeners,artist,weekly,city


In [8]:
spl_df = calculate_streams_per_listener(df)

In [9]:
spl_df[spl_df.city == 'All Cities'].head()

Unnamed: 0,city,week,song,plays,listeners,streams_per_listener
58,All Cities,20231110,All In,18,12.0,1.5
59,All Cities,20231110,Crash,19,11.0,1.727273
60,All Cities,20231110,Cycles,37,19.0,1.947368
61,All Cities,20231110,Easy,18,13.0,1.384615
62,All Cities,20231110,Holding On,28,17.0,1.647059


In [10]:
stickiness_df = calculate_stickiness_metrics()


Top 5 Cities by Average Monthly Listeners (Artist Level):
- Denver: 9 listeners
- Atlanta: 9 listeners
- New York City: 8 listeners
- Toronto: 7 listeners
- Chicago: 6 listeners

City options for dropdown: ['All Cities', 'Denver', 'Atlanta', 'New York City', 'Toronto', 'Chicago']

Stickiness Metrics Summary (First 12 Weeks After Release):

Key Metrics:
- WAU (Weekly Active Users): Number of unique listeners in a given week
- MAU (Monthly Active Users): Number of unique listeners in the month containing that week
- Stickiness Ratio: (WAU/MAU) * 100 - Higher ratio indicates better retention
  (e.g., 50% means half of monthly listeners were active in the current week)

Note: Analysis limited to first 12 weeks after release and excludes incomplete months


In [11]:
artist_monthly_df = df[
    (df['level'].str.lower() == 'artist') &
    (df['measure'].str.lower() == 'listeners') &
    (df['period_type'].str.lower() == 'monthly')
].copy()

city_avg_mau = (
    artist_monthly_df[artist_monthly_df['city'].str.lower() != 'all cities']
    .groupby('city')['current_period']
    .mean()
    .sort_values(ascending=False)
)
top_cities = city_avg_mau.head(5).index.tolist()

top_cities

['Denver', 'Atlanta', 'New York City', 'Toronto', 'Chicago']

In [12]:
df[(df.city == 'All Cities') 
   & (df.song == 'Althea')
   & (df.measure == 'listeners')].sort_values(by=['week','period_type']).head()

Unnamed: 0,city,previous_period,current_period,%_change,week,song,song_id,measure,level,period_type,grouping
18865,All Cities,0,1145,-,20240601,Althea,1748029276,listeners,song,monthly,city
13591,All Cities,0,581,-,20240621,Althea,1748029276,listeners,song,weekly,city
7311,All Cities,581,812,+40%,20240628,Althea,1748029276,listeners,song,weekly,city
13025,All Cities,1145,1092,-5%,20240701,Althea,1748029276,listeners,song,monthly,city
3977,All Cities,812,504,-38%,20240705,Althea,1748029276,listeners,song,weekly,city


In [None]:
song_to_artist_ratio = calca