In [2]:
import pandas as pd

# --- STEP 1: Load dataset ---
df = pd.read_csv("muse_v3.csv")

# Make sure numeric columns are floats
df['valence_tags'] = pd.to_numeric(df['valence_tags'], errors='coerce')
df['arousal_tags'] = pd.to_numeric(df['arousal_tags'], errors='coerce')
df['dominance_tags'] = pd.to_numeric(df['dominance_tags'], errors='coerce')

# --- STEP 2: Define VAD → Plutchik emotion mapping ---
def map_vad_to_emotion(valence, arousal, dominance):
    # Handle missing data
    if pd.isna(valence) or pd.isna(arousal) or pd.isna(dominance):
        return "neutral"

    # Joy
    if valence > 6.5 and arousal > 6 and dominance > 6:
        return "joy"
    # Trust
    elif valence > 6 and arousal < 5 and dominance >= 5:
        return "trust"
    # Fear
    elif valence < 4 and arousal > 6 and dominance < 4:
        return "fear"
    # Anger
    elif valence < 4 and arousal > 6 and dominance >= 5:
        return "anger"
    # Sadness
    elif valence < 4 and arousal < 4 and dominance < 4:
        return "sadness"
    # Disgust
    elif valence < 4 and 4 <= arousal <= 5:
        return "disgust"
    # Surprise
    elif 4.5 <= valence <= 6 and arousal > 6:
        return "surprise"
    # Anticipation
    elif valence > 5 and arousal > 6 and 5 <= dominance <= 7:
        return "anticipation"
    else:
        return "neutral"

# --- STEP 3: Apply mapping ---
df['emotion_vad'] = df.apply(
    lambda row: map_vad_to_emotion(
        row['valence_tags'],
        row['arousal_tags'],
        row['dominance_tags']
    ),
    axis=1
)

# --- STEP 4: Save track + artist + emotion ---
output_df = df[['track', 'artist', 'emotion_vad']]
output_df.to_csv("songs_vad_emotion_labels.csv", index=False)

print("✅ Done! Saved VAD-based emotion labels to songs_vad_emotion_labels.csv")

# Optional: quick distribution check
print("\n📊 Emotion distribution:")
print(output_df['emotion_vad'].value_counts())


✅ Done! Saved VAD-based emotion labels to songs_vad_emotion_labels.csv

📊 Emotion distribution:
emotion_vad
neutral         51016
trust           21488
sadness          8993
disgust          3375
joy              2719
anticipation     1366
surprise          921
anger              66
fear               57
Name: count, dtype: int64
