In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (10, 6)
df = pd.read_csv('../data/dataset.csv')

print(f"Total: {df.shape[0]}")
df.head()

FileNotFoundError: [Errno 2] No such file or directory: '../data/dataset.csv'

In [None]:
top_genres = df['track_genre'].value_counts().head(20)
plt.figure(figsize=(12, 6))

sns.barplot(
    x=top_genres.values, 
    y=top_genres.index, 
    hue=top_genres.index,  
    legend=False,         
    palette="viridis"
)

plt.title('Top 20 Most Common Genres in the Dataset')
plt.xlabel('Total')
plt.ylabel('Genre')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))

sample_df = df.sample(1000, random_state=42)

sns.scatterplot(data=sample_df, x='loudness', y='energy', alpha=0.5, hue='track_genre', legend=False)

plt.title('Relation Loudness (loudness) vs Energy')
plt.xlabel('Loudness (dB) - further to the right, is the harder')
plt.ylabel('Energy (0.0 - 1.0)')
plt.axvline(x=-9, color='r', linestyle='--', label='Boundary Between Classical and Modern Eras (-9dB)')
plt.legend()
plt.show()

In [None]:
from math import pi

def plot_radar_comparison(song1_name, song2_name):
    features = ['danceability', 'energy', 'valence', 'acousticness', 'instrumentalness']
    
    try:
        mask1 = df['track_name'].str.contains(song1_name, case=False, na=False)
        if not mask1.any():
            print(f"song '{song1_name}' not found.")
            return
        s1 = df[mask1].iloc[0]
        
        mask2 = df['track_name'].str.contains(song2_name, case=False, na=False)
        if not mask2.any():
            print(f"song '{song2_name}' not found.")
            return
        s2 = df[mask2].iloc[0]
        
    except Exception as e:
        print(f"an error occured: {e}")
        return

    values1 = s1[features].values.flatten().tolist()
    values2 = s2[features].values.flatten().tolist()
    
    values1 += values1[:1]
    values2 += values2[:1]
    
    angles = [n / float(len(features)) * 2 * pi for n in range(len(features))]
    angles += angles[:1]
    
    plt.figure(figsize=(8, 8))
    ax = plt.subplot(111, polar=True)
    
    plt.xticks(angles[:-1], features, color='grey', size=10)
    
    ax.plot(angles, values1, linewidth=1, linestyle='solid', label=s1['track_name'])
    ax.fill(angles, values1, 'b', alpha=0.1)
    
    ax.plot(angles, values2, linewidth=1, linestyle='solid', label=s2['track_name'])
    ax.fill(angles, values2, 'r', alpha=0.1)
    
    plt.title(f"audio comparison DNA:\n{s1['artists']} vs {s2['artists']}")
    plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
    plt.show()

plot_radar_comparison("Algernon", "Psychosocial")