In [None]:
import nltk
from nltk.tokenize import word_tokenize
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
import torch
import numpy as np
import random

import pandas as pd
import matplotlib.pyplot as plt

nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
# !pip install nltk textblob
# !pip install nltk
# !pip install transformers torch nltk
# !pip install transformers torch

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

Data: https://www.kaggle.com/datasets/heminp16/spotify-top-2018-2019-songs

In [None]:
df = pd.read_csv("/content/Top2018_2019.csv")
df.head()

Unnamed: 0,sel,title,artist,top genre,year,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop
0,1,Someone You Loved,Lewis Capaldi,pop,2019,110,41,50,-6,11,45,182,75,3,89
1,2,Jocelyn Flores,XXXTENTACION,emo rap,2017,134,39,87,-9,30,44,119,47,24,88
2,3,Perfect,Ed Sheeran,pop,2017,95,45,60,-6,11,17,263,16,2,88
3,4,Circles,Post Malone,dfw rap,2019,120,76,70,-3,9,55,215,19,4,88
4,5,Lucid Dreams,Juice WRLD,chicago rap,2018,84,57,51,-7,34,22,240,35,20,87


In [None]:
print(df.columns)
print(df.isnull().sum())

Index(['sel', 'title', 'artist', 'top genre', 'year', 'bpm', 'nrgy', 'dnce',
       'dB', 'live', 'val', 'dur', 'acous', 'spch', 'pop'],
      dtype='object')
sel          0
title        0
artist       0
top genre    0
year         0
bpm          0
nrgy         0
dnce         0
dB           0
live         0
val          0
dur          0
acous        0
spch         0
pop          0
dtype: int64


In [None]:
df = df[['title','artist','top genre','bpm','nrgy','dnce','dB','val','acous','spch']]
df = df.dropna()

In [None]:
# for column in df.columns:
#     if df[column].dtype in ['int64', 'float64']:  # Check if the column is numeric
#         plt.figure(figsize=(10, 6))  # Adjust figure size as needed
#         plt.hist(df[column], bins=20)  # Create a histogram with 20 bins
#         plt.title(f'Bar Chart of {column}')
#         plt.xlabel(column)
#         plt.ylabel('Frequency')
#         plt.show()
#     else:
#         print(f"Skipping non-numeric column: {column}")

In [None]:
df.head(1)

Unnamed: 0,title,artist,top genre,bpm,nrgy,dnce,dB,val,acous,spch
0,Someone You Loved,Lewis Capaldi,pop,110,41,50,-6,45,75,3


In [None]:
def classify_mood(val, nrgy, dnce, bpm, acous):
    if val > 60 and dnce > 50 and 100 <= bpm <= 160:
        return "happy"
    elif val < 40 and nrgy < 50 and acous > 60:
        return "sad"
    elif nrgy > 70 and bpm > 130 and acous < 30:
        return "energetic"
    elif acous > 70 and nrgy < 40 and 60 <= bpm <= 100:
        return "calm"
    elif val > 50 and dnce > 50 and 80 <= bpm <= 120:
        return "romantic"
    elif nrgy > 70 and val < 40 and bpm > 140:
        return "angry"
    else:
        return "neutral"

df["mood"] = df.apply(lambda row: classify_mood(row["val"], row["nrgy"], row["dnce"], row["bpm"], row["acous"]), axis=1)

print(df["mood"].value_counts())

mood
neutral      59
romantic     20
happy        14
energetic     5
calm          1
angry         1
Name: count, dtype: int64


In [None]:
# Load pre-trained sentiment analysis model
# classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
classifier = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


In [None]:
def detect_emotion(text):
    """Detects emotion in the given text using a sentiment classifier."""
    tokens = word_tokenize(text.lower())

    # Analyze sentiment using the pre-trained model
    result = classifier(text)[0]
    label = result["label"]

    # Map sentiment labels to moods
    mood_mapping = {
        "POSITIVE": "happy",
        "NEGATIVE": "sad"
    }
    return mood_mapping.get(label, "neutral")

# Example usage
user_input = "Hurray"
user_mood = detect_emotion(user_input)
print(f"Detected mood: {user_mood}")

Detected mood: happy


In [None]:
# Load fine-tuned emotion detection model
model_name = "bhadresh-savani/distilbert-base-uncased-emotion"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Define emotions-to-mood mapping
emotion_to_mood = {
    "joy": "happy",
    "anger": "aggressive",
    "sadness": "sad",
    "fear": "relaxed",
    "love": "happy",
    "surprise": "happy",
    "disgust": "sad",
}

# Function to detect mood based on emotions
def detect_mood(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    scores = torch.nn.functional.softmax(outputs.logits, dim=-1).detach().numpy()[0]
    emotion = np.argmax(scores)

    # Map emotion to mood
    detected_mood = emotion_to_mood.get(model.config.id2label[emotion], "neutral")
    return detected_mood

# Example usage
# user_input = "I'm feeling extremely nervous and anxious today."
# user_mood = detect_mood(user_input)
# print(f"Detected mood: {user_mood}")

In [None]:
def recommend_song(user_mood, df):
    # Filter songs based on detected mood
    matching_songs = df[df["mood"] == user_mood]

    # If no exact match, choose a song from a similar mood category
    if matching_songs.empty:
        alternative_moods = {
            "happy": ["romantic", "aggressive"],
            "sad": ["calm"],
            "relaxed": ["happy", "calm"],
            "aggressive": ["energitic"]
        }
        for alt_mood in alternative_moods.get(user_mood, []):
            matching_songs = df[df["mood"] == alt_mood]
            if not matching_songs.empty:
                break

    # If still empty, return a random song from the dataset
    if matching_songs.empty:
        return "No exact match found. Try a different mood input."

    # Sort by popularity and choose the top-ranked song
    recommended_song = matching_songs.sort_values(by="top genre", ascending=False).sample(1)

    title = recommended_song["title"].values[0]
    artist = recommended_song["artist"].values[0]

    return f"🎵 Recommended Song: *{title}* by {artist}"

# Example Usage
# user_mood = "happy"  # This will come from the NLP model
# print(recommend_song(user_mood, df))

In [None]:
def mood_based_song_recommender(user_text, df):
    # Detect mood using NLP model
    user_mood = detect_mood(user_text)
    print(f"🤖 Detected Mood: {user_mood}")

    # Recommend a song based on detected mood
    recommendation = recommend_song(user_mood, df)
    return recommendation

# Example user input
user_input = "I want to talk to my special person but she is in no mood"
print(mood_based_song_recommender(user_input, df))

🤖 Detected Mood: happy
🎵 Recommended Song: *Post Malone (feat. RANI)* by Sam Feldt
