<a href="https://colab.research.google.com/github/tesla07/Adele-lyrics_classifier_n_generator/blob/main/adele's_song_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
import numpy as np
import pandas as pd

In [40]:
from google.colab import files

uploaded = files.upload()

Saving Adele.csv to Adele (1).csv


In [41]:
file_path = 'Adele.csv'
spotify_data = pd.read_csv(file_path)

In [42]:
# Data Preprocessing
# Clean the lyrics data
spotify_data['clean_lyrics'] = spotify_data['text'].str.replace('[^a-zA-Z\s]', '', regex=True).str.lower()


# Tokenize the lyrics using numpy
spotify_data['tokenized_lyrics'] = spotify_data['clean_lyrics'].apply(lambda x: np.array(x.split()))

# Remove stop words (assuming 'stop_words' a list of stop words)
stop_words = ['a', 'an', 'the', 'is', 'of', 'and', 'it']  # Define the stop words
spotify_data['filtered_lyrics'] = spotify_data['tokenized_lyrics'].apply(lambda x: np.array([word for word in x if word not in stop_words]))

In [43]:
# Sentiment Analysis using a custom approach
# Define your own sentiment scoring system using numpy
# This could involve counting positive and negative words, or using other custom rules

# For example, defining a simple sentiment score based on the presence of certain words
# Positive Words# Positive Words
positive_words = ['love', 'joy', 'happy', 'sun', 'dream', 'free', 'happiness', 'dream', 'hope', 'laugh']

# Negative Words
negative_words = ['heartbreak', 'pain', 'tears', 'sad', 'lonely', 'dark', 'angry', 'hate', 'death', 'die','last', 'never', 'cruel', 'forgiveness', 'worse']

# Function to calculate sentiment score
def calculate_sentiment_score(words):
    positive_score = np.sum([word in positive_words for word in words])
    negative_score = np.sum([word in negative_words for word in words])
    total_score = positive_score - negative_score
    return total_score

# Apply sentiment scoring
spotify_data['sentiment_score'] = spotify_data['filtered_lyrics'].apply(lambda x: calculate_sentiment_score(x))

# Map sentiment scores to mood categories
def classify_mood(score):
    if score > 0:
        return 'Happy'
    elif score < 0:
        return 'Sad'
    else:
        return 'Neutral'

spotify_data['mood_category'] = spotify_data['sentiment_score'].apply(classify_mood)
print(spotify_data[['song', 'mood_category']])

# Now you can move on to deploying your model and using it to predict the mood of songs based on their lyrics.

                                song mood_category
0                          All I Ask           Sad
1                       Can't Let Go           Sad
2                      Crazy For You           Sad
3                         Daydreamer       Neutral
4                              Hello           Sad
5                         I Miss You           Sad
6             Melt My Heart To Stone         Happy
7                      Need You Know       Neutral
8                       Now And Then         Happy
9                   Someone Like You       Neutral
10   Think That I Get Told The Court         Happy
11         You'll Never See Me Again           Sad
12                     Best For Last           Sad
13                    Black And Gold           Sad
14                 Chasing Pavements         Happy
15                     Cold Shoulder           Sad
16                Don't You Remember         Happy
17                            Fiasco         Happy
18                        First

In [44]:
# Check the first few rows of the dataset
print(spotify_data.head())

  artist           song                                  link  \
0  Adele      All I Ask      /a/adele/all+i+ask_21105101.html   
1  Adele   Can't Let Go    /a/adele/cant+let+go_21105103.html   
2  Adele  Crazy For You  /a/adele/crazy+for+you_20830095.html   
3  Adele     Daydreamer     /a/adele/daydreamer_20730061.html   
4  Adele          Hello          /a/adele/hello_21103519.html   

                                                text  \
0  [Verse 1]  \nI will leave my heart at the door...   
1  [Verse 1]  \nWhen did it go wrong, I will neve...   
2  Found myself today singing out your name,  \nY...   
3  Daydreamer  \nSitting on the sea  \nSoaking up...   
4  [Verse 1]  \nHello, it's me  \nI was wondering...   

                                        clean_lyrics  \
0  verse   \ni will leave my heart at the door  \...   
1  verse   \nwhen did it go wrong i will never kn...   
2  found myself today singing out your name  \nyo...   
3  daydreamer  \nsitting on the sea  \nsoaking u

In [45]:
# Define features (X) and target variable (y)
X = spotify_data['filtered_lyrics']
y = spotify_data['mood_category']

In [46]:
# Split the data into training and testing sets without sklearn
mask = np.random.rand(len(spotify_data)) < 0.8
train_data = spotify_data[mask]
test_data = spotify_data[~mask]

In [47]:
# Define features (X) and target variable (y)
X_train = train_data['filtered_lyrics']
y_train = train_data['mood_category']
X_test = test_data['filtered_lyrics']
y_test = test_data['mood_category']

In [48]:
unique_words = np.unique(np.hstack(X_train.apply(lambda x: np.unique(x)).values))

# Create a dictionary mapping each unique word to its index
word_to_index = {word: idx for idx, word in enumerate(unique_words)}


In [49]:
def vectorize_text(text, word_to_index):
    vector = np.zeros(len(word_to_index))
    for word in text:
        if word in word_to_index:
            vector[word_to_index[word]] += 1
    return vector

X_train_vectorized = np.array([vectorize_text(text, word_to_index) for text in X_train])
X_test_vectorized = np.array([vectorize_text(text, word_to_index) for text in X_test])

In [50]:
class MultinomialNB:
    def __init__(self):
        self.class_probs = {}
        self.word_probs = {}

    def fit(self, X, y):
        unique_classes, class_counts = np.unique(y, return_counts=True)
        total_samples = len(y)

        for cls, count in zip(unique_classes, class_counts):
            self.class_probs[cls] = count / total_samples

        for cls in unique_classes:
            cls_indices = np.where(y == cls)
            cls_texts = X[cls_indices]
            total_words_in_cls = np.sum(cls_texts)
            self.word_probs[cls] = np.sum(cls_texts, axis=0) / total_words_in_cls

    def predict(self, X):
        predictions = []

        for text in X:
            scores = {}

            for cls in self.class_probs:
                score = np.log(self.class_probs[cls])
                score += np.sum(np.log(self.word_probs[cls] + 1) * text)
                scores[cls] = score

            predicted_class = max(scores, key=scores.get)
            predictions.append(predicted_class)

        return np.array(predictions)

In [51]:
# Instantiate and fit the classifier
classifier = MultinomialNB()
classifier.fit(X_train_vectorized, y_train)

In [52]:
y_pred = classifier.predict(X_test_vectorized)


In [53]:
def accuracy(y_true, y_pred):
    correct = np.sum(y_true == y_pred)
    total = len(y_true)
    return correct / total

def precision_recall_f1(y_true, y_pred):
    unique_classes = np.unique(np.concatenate([y_true, y_pred]))
    metrics = {}

    for cls in unique_classes:
        true_positive = np.sum((y_true == cls) & (y_pred == cls))
        false_positive = np.sum((y_true != cls) & (y_pred == cls))
        false_negative = np.sum((y_true == cls) & (y_pred != cls))

        precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0
        recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        metrics[cls] = {'Precision': precision, 'Recall': recall, 'F1 Score': f1}

    return metrics

print("Accuracy:", accuracy(y_test.values, y_pred))
print("Classification Report:\n", precision_recall_f1(y_test.values, y_pred))

Accuracy: 0.16666666666666666
Classification Report:
 {'Happy': {'Precision': 0.18181818181818182, 'Recall': 1.0, 'F1 Score': 0.3076923076923077}, 'Neutral': {'Precision': 0, 'Recall': 0.0, 'F1 Score': 0}, 'Sad': {'Precision': 0.0, 'Recall': 0.0, 'F1 Score': 0}}


In [54]:
import random

# Define a function to generate lyrics based on mood
def generate_lyrics_randomly(vocabulary, num_words=50):
    # Randomly sample words from the vocabulary to generate lyrics
    generated_lyrics = ' '.join(random.sample(vocabulary, min(num_words, len(vocabulary))))

    return generated_lyrics

# Example usage:
# Assuming you have the vocabulary from the dataset
vocabulary = set(word for lyrics in spotify_data['filtered_lyrics'] for word in lyrics)

mood_to_generate = 'neutral'
generated_lyrics = generate_lyrics_randomly(vocabulary)
print(f"Generated Lyrics for {mood_to_generate} mood:\n", generated_lyrics)


Generated Lyrics for neutral mood:
 tingle learning choke mouth stranger couldve scars desert iiiii were youll obviously haunted raging lover hollywood yesterday giving city straight favorite gonna caused take in hustle trouble anything become other chorus used posses stay already born threw excusing write guess uninvited lipstick quiet chasing me lump changed clearly married difference


since Python 3.9 and will be removed in a subsequent version.
  generated_lyrics = ' '.join(random.sample(vocabulary, min(num_words, len(vocabulary))))
