In [None]:
import pandas as pd

# Load the full dataset
df = pd.read_csv("muse_v3.csv")

# Select relevant columns
columns_to_keep = ['track', 'artist', 'seeds', 'valence_tags', 'arousal_tags', 'dominance_tags']
df_simplified = df[columns_to_keep]

# Ensure VAD columns are numeric
df_simplified['valence_tags'] = pd.to_numeric(df_simplified['valence_tags'], errors='coerce')
df_simplified['arousal_tags'] = pd.to_numeric(df_simplified['arousal_tags'], errors='coerce')
df_simplified['dominance_tags'] = pd.to_numeric(df_simplified['dominance_tags'], errors='coerce')

# Save to a new CSV
df_simplified.to_csv("muse_simplified.csv", index=False)

print("✅ Step 1 complete: saved muse_simplified.csv with track, artist, seeds, and VAD values.")


In [2]:
import pandas as pd
import ast  # to safely parse stringified lists

# Load the simplified dataset from Step 1
df = pd.read_csv("muse_simplified.csv")

# Function to get the first emotion from seeds
def get_primary_emotion(seed_str):
    try:
        # Convert string representation of list to actual list
        seed_list = ast.literal_eval(seed_str)
        if isinstance(seed_list, list) and len(seed_list) > 0:
            return seed_list[0].strip()
        else:
            return None
    except:
        return None

# Apply function
df['primary_emotion'] = df['seeds'].apply(get_primary_emotion)

# Save to a new CSV
df[['track', 'artist', 'primary_emotion', 'valence_tags', 'arousal_tags', 'dominance_tags']] \
    .to_csv("muse_primary_emotion.csv", index=False)

print("✅ Step 2 complete: saved muse_primary_emotion.csv with primary emotion extracted.")


✅ Step 2 complete: saved muse_primary_emotion.csv with primary emotion extracted.


In [3]:
import pandas as pd

# Load primary emotion dataset
df = pd.read_csv("muse_primary_emotion.csv")

# Updated mapping dictionary for 273 primary emotions
# (mapping common/representative emotions; rare ones remain neutral)
emotion_mapping = {
    # Anger/Aggression
    'aggressive': 'anger', 'angry': 'anger', 'fierce': 'anger', 'hostile': 'anger',
    'defiant': 'anger', 'confrontational': 'anger', 'harsh': 'anger', 'menacing': 'anger',
    # Joy / Positive Energy
    'happy': 'joy', 'cheerful': 'joy', 'euphoric': 'joy', 'upbeat': 'joy',
    'fun': 'joy', 'joyous': 'joy', 'bright': 'joy', 'playful': 'joy', 'whimsical': 'joy', 'humorous': 'joy',
    # Sadness / Melancholy
    'lonely': 'sadness', 'gloomy': 'sadness', 'melancholy': 'sadness', 'sad': 'sadness',
    'wistful': 'sadness', 'brooding': 'sadness', 'tragic': 'sadness', 'nostalgic': 'sadness',
    'sentimental': 'sadness', 'poignant': 'sadness',
    # Calm / Soothing
    'relaxed': 'calm', 'peaceful': 'calm', 'gentle': 'calm', 'soft': 'calm', 'soothing': 'calm',
    'mellow': 'calm', 'quiet': 'calm', 'delicate': 'calm',
    # Romantic / Erotic / Intimate
    'romantic': 'romantic', 'passionate': 'romantic', 'sensual': 'romantic', 'warm': 'romantic',
    'intimate': 'romantic', 'erotic': 'romantic', 'sexy': 'romantic', 'tender': 'romantic',
    # Fear / Eerie / Spooky
    'eerie': 'fear', 'spooky': 'fear', 'anxious': 'fear', 'ominous': 'fear', 'scary': 'fear', 
    # Nostalgia / Reflective
    'introspective': 'reflective', 'thoughtful': 'reflective', 'reflective': 'reflective',
    'meditative': 'reflective', 'lyrical': 'reflective',
    # Excitement / Power
    'intense': 'excitement', 'powerful': 'excitement', 'driving': 'excitement',
    'explosive': 'excitement', 'epic': 'excitement', 'energetic': 'excitement'
    # All other emotions remain neutral for now
}

# Map primary_emotion to final_emotion
df['final_emotion'] = df['primary_emotion'].map(emotion_mapping)

# Assign 'neutral' to unmapped emotions
df['final_emotion'].fillna('neutral', inplace=True)

# Add source column
df['emotion_source'] = df.apply(lambda row: 'tag' if row['final_emotion'] != 'neutral' else 'neutral', axis=1)

# Save merged CSV
df[['track', 'artist', 'primary_emotion', 'final_emotion', 'emotion_source',
    'valence_tags', 'arousal_tags', 'dominance_tags']].to_csv("muse_merged_emotions.csv", index=False)

print("✅ Step 3 complete: merged primary emotions into final classes, saved muse_merged_emotions.csv")

# Optional: check distribution
print("\n📊 Emotion distribution after merging:")
print(df['final_emotion'].value_counts())


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['final_emotion'].fillna('neutral', inplace=True)


✅ Step 3 complete: merged primary emotions into final classes, saved muse_merged_emotions.csv

📊 Emotion distribution after merging:
final_emotion
neutral       50482
sadness        6488
joy            6255
romantic       6103
calm           5437
anger          4802
excitement     3742
reflective     3523
fear           3169
Name: count, dtype: int64


In [None]:
import pandas as pd

# Load merged dataset from Step 3
df = pd.read_csv("muse_merged_emotions.csv")

# Select only neutral songs
neutral_df = df[df['final_emotion'] == 'neutral'].copy()

# Define a function to classify based on VAD
def vad_to_emotion(v, a, d):
    if v >= 6.0 and a >= 5.0 and d >= 5.0:
        return 'excitement'
    elif v >= 6.0 and a >= 4.0 and d >= 4.0:
        return 'joy'
    elif v >= 6.0 and a <= 4.0:
        return 'calm'
    elif v <= 4.0 and a >= 5.0 and d >= 5.0:
        return 'anger'
    elif v <= 4.0 and a >= 5.0 and d <= 4.0:
        return 'fear'
    elif v <= 4.0 and a <= 4.5:
        return 'sadness'
    elif 4.5 <= v <= 6.0 and a <= 4.5:
        return 'reflective'
    elif v >= 6.0 and 4.0 <= a <= 5.0:
        return 'romantic'
    else:
        return 'neutral'  # fallback if no thresholds match

# Apply VAD-based classification
neutral_df['final_emotion_vad'] = neutral_df.apply(
    lambda row: vad_to_emotion(row['valence_tags'], row['arousal_tags'], row['dominance_tags']), axis=1
)

# Update final_emotion only for neutrals
df.loc[df['final_emotion'] == 'neutral', 'final_emotion'] = neutral_df['final_emotion_vad']

# Update emotion_source
df['emotion_source'] = df.apply(
    lambda row: row['emotion_source'] if row['emotion_source'] != 'neutral' else 'vad', axis=1
)

# Save refined dataset
df[['track', 'artist', 'primary_emotion', 'final_emotion', 'emotion_source',
    'valence_tags', 'arousal_tags', 'dominance_tags']].to_csv("muse_final_emotions.csv", index=False)

print("✅ Step 4 complete: neutral songs refined using VAD, saved muse_final_emotions.csv")

# Optional: check new distribution
print("\n📊 Emotion distribution after VAD refinement:")
print(df['final_emotion'].value_counts())


✅ Step 4 complete: neutral songs refined using VAD, saved muse_final_emotions.csv

📊 Emotion distribution 
 after VAD refinement:
final_emotion
reflective    15213
sadness       14875
joy           13755
excitement    11470
neutral       11252
calm           8858
romantic       6103
anger          4898
fear           3577
Name: count, dtype: int64


In [None]:
import pandas as pd

# Load your VAD-refined merged dataset
df = pd.read_csv("muse_final_emotions.csv")

# Select only the final columns you need
final_df = df[["track", "artist", "final_emotion"]]

# Optional: remove songs without final emotion (if any)
final_df = final_df.dropna(subset=["final_emotion"])

# Save the final CSV
final_df.to_csv("muse_final_emotion_dataset.csv", index=False)

print("Final dataset created with shape:", final_df.shape)
print(final_df['final_emotion'].value_counts())
