# Seasonal Patterns in BPM and Energy

## Analysis Goals
Discover how BPM and energy levels vary across seasons and their impact on song success:

1. **Seasonal Patterns**
   - How do BPM and energy change across seasons?
   - Are summer hits more energetic?
   - Do winter songs have different BPM patterns?

2. **Success Factors**
   - Which BPM-Energy combinations work best in each season?
   - Seasonal hit song characteristics
   - Genre-specific seasonal trends

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Set visualization style
plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = [12, 6]

## 1. Data Loading and Seasonal Classification

In [None]:
# Load data
sheet_url = "https://docs.google.com/spreadsheets/d/1ae96nZRL_kJWb_EEv2avxMOgGClvyc77SpY-VBqVGiY/edit#gid=1052928543"
csv_export_url = sheet_url.replace('/edit#gid=', '/export?format=csv&gid=')
df = pd.read_csv(csv_export_url)

# Convert release date to datetime
df['release_date'] = pd.to_datetime(df['track_album_release_date'])

# Extract month and season
df['month'] = df['release_date'].dt.month

def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

df['season'] = df['month'].apply(get_season)

# Display seasonal distribution
print("Songs per Season:")
print(df['season'].value_counts())

plt.figure(figsize=(10, 6))
df['season'].value_counts().plot(kind='bar')
plt.title('Distribution of Songs Across Seasons')
plt.xlabel('Season')
plt.ylabel('Number of Songs')
plt.show()

## 2. BPM and Energy by Season

In [None]:
# Calculate seasonal averages
seasonal_stats = df.groupby('season').agg({
    'tempo': ['mean', 'std'],
    'energy': ['mean', 'std']
}).round(2)

print("Seasonal BPM and Energy Statistics:")
print(seasonal_stats)

# Plot seasonal patterns
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# BPM by Season
sns.boxplot(data=df, x='season', y='tempo', 
            order=['Winter', 'Spring', 'Summer', 'Fall'],
            ax=ax1)
ax1.set_title('BPM Distribution by Season')

# Energy by Season
sns.boxplot(data=df, x='season', y='energy',
            order=['Winter', 'Spring', 'Summer', 'Fall'],
            ax=ax2)
ax2.set_title('Energy Distribution by Season')

plt.tight_layout()
plt.show()

# Perform ANOVA tests
seasons = ['Winter', 'Spring', 'Summer', 'Fall']
bpm_by_season = [df[df['season'] == season]['tempo'] for season in seasons]
energy_by_season = [df[df['season'] == season]['energy'] for season in seasons]

f_bpm, p_bpm = stats.f_oneway(*bpm_by_season)
f_energy, p_energy = stats.f_oneway(*energy_by_season)

print("\nANOVA Test Results:")
print(f"BPM: F={f_bpm:.3f}, p={p_bpm:.3f}")
print(f"Energy: F={f_energy:.3f}, p={p_energy:.3f}")

## 3. Seasonal Success Patterns

In [None]:
# Define hit threshold (top 10%)
hit_threshold = df['track_popularity'].quantile(0.9)

# Analyze hits by season
seasonal_hits = df[df['track_popularity'] >= hit_threshold].groupby('season').agg({
    'tempo': ['mean', 'std'],
    'energy': ['mean', 'std'],
    'track_popularity': ['mean', 'count']
}).round(2)

print("Hit Song Characteristics by Season:")
print(seasonal_hits)

# Plot hit song patterns
plt.figure(figsize=(12, 6))
for season in seasons:
    season_hits = df[(df['season'] == season) & 
                     (df['track_popularity'] >= hit_threshold)]
    plt.scatter(season_hits['tempo'], season_hits['energy'],
                label=season, alpha=0.6)

plt.title('Hit Songs: BPM vs Energy by Season')
plt.xlabel('BPM')
plt.ylabel('Energy')
plt.legend()
plt.show()

## 4. Genre-Specific Seasonal Patterns

In [None]:
# Analyze patterns by genre and season
genre_season_stats = df.groupby(['playlist_genre', 'season']).agg({
    'tempo': 'mean',
    'energy': 'mean',
    'track_popularity': ['mean', 'count']
}).round(2)

print("Genre-Season Statistics:")
print(genre_season_stats)

# Plot genre-specific seasonal patterns
plt.figure(figsize=(15, 10))
for i, genre in enumerate(df['playlist_genre'].unique(), 1):
    plt.subplot(2, 3, i)
    genre_data = df[df['playlist_genre'] == genre]
    
    sns.scatterplot(data=genre_data, x='tempo', y='energy',
                    hue='season', style='season',
                    alpha=0.6)
    plt.title(genre)
    plt.legend(bbox_to_anchor=(1.05, 1))
    
plt.tight_layout()
plt.show()

## 5. Seasonal Sweet Spots

In [None]:
# Create BPM and Energy bins
bpm_bins = pd.qcut(df['tempo'], q=4)
energy_bins = pd.qcut(df['energy'], q=4)

# Analyze success rates by season
seasonal_success = []
for season in seasons:
    season_data = df[df['season'] == season]
    
    # Calculate success rates for each BPM-Energy combination
    success_matrix = season_data.groupby([bpm_bins, energy_bins])['track_popularity'].agg([
        'mean',
        lambda x: (x >= hit_threshold).mean() * 100
    ]).round(2)
    success_matrix.columns = ['Avg Popularity', 'Hit Rate %']
    
    # Find best combination
    best_combo = success_matrix.sort_values('Hit Rate %', ascending=False).head(1)
    
    seasonal_success.append({
        'Season': season,
        'Best BPM Range': best_combo.index[0][0],
        'Best Energy Range': best_combo.index[0][1],
        'Hit Rate': best_combo['Hit Rate %'].values[0]
    })

success_df = pd.DataFrame(seasonal_success)
print("Best BPM-Energy Combinations by Season:")
print(success_df)

# Plot seasonal success patterns
plt.figure(figsize=(15, 10))
for i, season in enumerate(seasons, 1):
    plt.subplot(2, 2, i)
    season_data = df[df['season'] == season]
    
    success_matrix = season_data.pivot_table(
        values='track_popularity',
        index=pd.qcut(season_data['tempo'], q=4),
        columns=pd.qcut(season_data['energy'], q=4),
        aggfunc='mean'
    )
    
    sns.heatmap(success_matrix, annot=True, fmt='.1f', cmap='YlOrRd')
    plt.title(f'{season} Success Patterns')
    plt.xlabel('Energy Quartile')
    plt.ylabel('BPM Quartile')
    
plt.tight_layout()
plt.show()

## 6. Seasonal Hit Song Recipes

In [None]:
# Calculate hit song recipes by season
print("Seasonal Hit Song Recipes:")
for season in seasons:
    season_hits = df[(df['season'] == season) & 
                     (df['track_popularity'] >= hit_threshold)]
    
    print(f"\n{season.upper()} HITS:")
    print(f"Number of Hits: {len(season_hits)}")
    print(f"BPM: {season_hits['tempo'].mean():.0f} ± {season_hits['tempo'].std():.0f}")
    print(f"Energy: {season_hits['energy'].mean():.2f} ± {season_hits['energy'].std():.2f}")
    print(f"Average Popularity: {season_hits['track_popularity'].mean():.1f}")
    
    # Most successful genre
    top_genre = season_hits['playlist_genre'].value_counts().index[0]
    genre_count = season_hits['playlist_genre'].value_counts().iloc[0]
    print(f"Most Common Genre: {top_genre} ({genre_count} hits)")

# Plot seasonal hit patterns
plt.figure(figsize=(12, 6))
for season in seasons:
    season_hits = df[(df['season'] == season) & 
                     (df['track_popularity'] >= hit_threshold)]
    
    sns.kdeplot(data=season_hits, x='tempo', y='energy',
                label=season, levels=5)

plt.title('Hit Song Patterns by Season')
plt.xlabel('BPM')
plt.ylabel('Energy')
plt.legend()
plt.show()