In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.sentiment.util import mark_negation
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
from gensim.models import CoherenceModel, LdaModel
from gensim.corpora import Dictionary
import spacy


nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')


nlp = spacy.load("en_core_web_sm")


label_mapping = {
    0: 'sadness',
    1: 'joy',
    2: 'love',
    3: 'anger',
    4: 'fear',
    5: 'surprise'
}


df = pd.read_csv('emotions.csv')


sample_df = df.sample(n=50000, random_state=42)


def preprocess_text_with_negation(text):
    stop_words = set(stopwords.words('english'))
    tokens = nltk.word_tokenize(text.lower())
    tokens = mark_negation(tokens)
    tokens = [word for word in tokens if word not in stop_words and word.isalpha()]
    return ' '.join(tokens)


sample_df['text'] = sample_df['text'].apply(preprocess_text_with_negation)


X = sample_df['text']
y = sample_df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=500)),
    ('clf', LogisticRegression(max_iter=500)),
])


pipeline.fit(X_train, y_train)


y_pred = pipeline.predict(X_test)
print("Classification Report:\n")
print(classification_report(y_test, y_pred, target_names=[label_mapping[i] for i in sorted(label_mapping.keys())]))


data_lemmatized = [text.split() for text in sample_df['text']]
id2word = Dictionary(data_lemmatized)
corpus = [id2word.doc2bow(text) for text in data_lemmatized]


lda_model = LdaModel(
    corpus=corpus,
    id2word=id2word,
    num_topics=6,
    random_state=42,
    passes=10,
    alpha='auto'
)


music_recommendations = {
    'sadness': ['Melancholic Piano', 'Sad Violin Music'],
    'joy': ['Happy Acoustic Guitar', 'Uplifting Piano'],
    'love': ['Romantic Piano', 'Love Songs Instrumental'],
    'anger': ['Intense Rock Instrumental', 'Heavy Metal Instrumental'],
    'fear': ['Dark Cinematic Music', 'Tense Ambient Soundscapes'],
    'surprise': ['Energetic Orchestral Music', 'Exciting Electronic Beats'],
}

def recommend_music(emotion):
    if emotion in music_recommendations:
        return music_recommendations[emotion]
    else:
        return ["Instrumental Music Suggestions Unavailable for this emotion."]


def main(user_input):
    preprocessed_input = preprocess_text_with_negation(user_input)

    predicted_label = pipeline.predict([preprocessed_input])[0]
    classifier_emotion = label_mapping[predicted_label]

    bow_input = id2word.doc2bow(preprocessed_input.split())
    topic_distribution = lda_model.get_document_topics(bow_input)
    most_probable_topic = max(topic_distribution, key=lambda x: x[1])[0]
    topic_emotion = label_mapping[most_probable_topic]

    if "not" in preprocessed_input or "_NEG" in preprocessed_input or topic_emotion == "sadness":
        final_emotion = "sadness"
    else:
        final_emotion = classifier_emotion

    recommended_tracks = recommend_music(final_emotion)

    print(f"\nDetected Emotion (Classifier): {classifier_emotion}")
    print(f"Detected Emotion (Topic Model): {topic_emotion}")
    print(f"Final Emotion: {final_emotion}")
    print(f"Recommended Music for {final_emotion}: {recommended_tracks}")