In [None]:
import pandas as pd
import numpy as np
import re
import nltk
import random
from collections import defaultdict, Counter
nltk.download('stopwords')
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split


label_mapping = {
    0: 'sadness',
    1: 'joy',
    2: 'love',
    3: 'anger',
    4: 'fear',
    5: 'surprise'
}


music_recommendations = {
    'sadness': ['Melancholic Piano', 'Sad Violin Music'],
    'joy': ['Happy Acoustic Guitar', 'Uplifting Piano'],
    'love': ['Romantic Piano', 'Love Songs Instrumental'],
    'anger': ['Intense Rock Instrumental', 'Heavy Metal Instrumental'],
    'fear': ['Dark Cinematic Music', 'Tense Ambient Soundscapes'],
    'surprise': ['Energetic Orchestral Music', 'Exciting Electronic Beats'],
}


data = pd.read_csv('emotions.csv')


stop_words = set(stopwords.words('english'))

def preprocess_text(text):

    text = text.lower()


    contractions = {
        "dont": "do not",
        "cant": "cannot",
        "wont": "will not",
        "im": "i am",
        "ive": "i have",
        "id": "i would",
        "youre": "you are",
        "isnt": "is not",
        "wasnt": "was not",
        "shouldnt": "should not",
        "couldnt": "could not",
        "doesnt": "does not",
        "havent": "have not",
        "hasnt": "has not",
        "hadnt": "had not",
        "arent": "are not",
        "werent": "were not",
        "wouldnt": "would not",
        "mustnt": "must not",
        "mightnt": "might not",
        "didnt": "did not",
        "neednt": "need not",
        "oughtnt": "ought not",
        "im": "i am",
        "hes": "he is",
        "shes": "she is",
        "its": "it is",
        "thats": "that is",
        "theres": "there is",
        "whats": "what is",
        "wheres": "where is",
        "whos": "who is",
        "theyre": "they are",
        "weve": "we have",
        "were": "we are",
        "didnt": "did not",
        "doesnt": "does not",
        "dont": "do not",
        "hadnt": "had not",
        "hasnt": "has not",
        "havent": "have not",
        "isnt": "is not",
        "shouldnt": "should not",
        "wasnt": "was not",
        "werent": "were not",
        "wont": "will not",
        "wouldnt": "would not",
    }


    for contraction, replacement in contractions.items():
        text = re.sub(r'\b' + contraction + r'\b', replacement, text)


    tokens = text.split()
    tokens = handle_negations(tokens)


    tokens = [re.sub(r'[^a-zA-Z]', '', token) for token in tokens]
    tokens = [token for token in tokens if token and token not in stop_words]

    return tokens

def handle_negations(tokens):
    negation_words = set(['no', 'not', 'never', 'none', 'cannot', 'do not', 'dont', 'did not', 'does not', 'didnt', 'doesnt', 'cannot'])
    transformed_tokens = []
    negate = False
    for token in tokens:
        if token in negation_words:
            negate = True
            transformed_tokens.append(token)
        elif negate:
            transformed_tokens.append('not_' + token)
            negate = False
        else:
            transformed_tokens.append(token)
    return transformed_tokens


data['processed_text'] = data['text'].apply(preprocess_text)


X = data['processed_text']
y = data['label']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

all_words = [word for tokens in X_train for word in tokens]
vocab = set(all_words)
vocab_size = len(vocab)
vocab_to_id = {word: idx for idx, word in enumerate(vocab)}
id_to_vocab = {idx: word for word, idx in vocab_to_id.items()}


class_priors = {}
word_likelihoods = {}
alpha = 1

for label in label_mapping.keys():

    label_docs = X_train[y_train == label]
    total_docs = len(X_train)
    total_label_docs = len(label_docs)
    class_priors[label] = np.log(total_label_docs / total_docs)


    words_in_class = [word for tokens in label_docs for word in tokens]
    total_words_in_class = len(words_in_class)
    word_counts = Counter(words_in_class)


    likelihoods = {}
    for word in vocab:
        count = word_counts.get(word, 0)
        likelihoods[word] = np.log((count + alpha) / (total_words_in_class + alpha * vocab_size))
    word_likelihoods[label] = likelihoods

def handle_negations(tokens):
    negation_words = {'no', 'not', 'never', 'none', 'cannot', 'do not', 'dont', 'did not', 'does not', 'didnt', 'doesnt'}
    transformed_tokens = []
    negate = False
    for token in tokens:
        if token in negation_words:
            negate = True
        elif negate:
            transformed_tokens.append('not_' + token)
            negate = False
        else:
            transformed_tokens.append(token)
    return transformed_tokens

def predict_emotion(text):
    tokens = preprocess_text(text)
    scores = {label: class_priors[label] for label in label_mapping.keys()}
    for label in label_mapping.keys():
        for token in tokens:
            if token.startswith('not_'):

                word = token[4:]
                if word in vocab:
                    scores[label] -= word_likelihoods[label].get(word, 0)

                scores[label] += word_likelihoods[label].get(token, np.log(alpha / (alpha * vocab_size)))
            else:
                scores[label] += word_likelihoods[label].get(token, np.log(alpha / (alpha * vocab_size)))
    predicted_label = max(scores, key=scores.get)
    return label_mapping[predicted_label]



def compute_accuracy():
    correct = 0
    total = len(X_test)
    for text, true_label in zip(X_test, y_test):
        predicted_emotion = predict_emotion(' '.join(text))
        if predicted_emotion == label_mapping[true_label]:
            correct += 1
    accuracy = correct / total
    print(f"Accuracy on test set: {accuracy:.4f}")

compute_accuracy()

