<a href="https://colab.research.google.com/github/srikar-kotra/LyricsClassifier-Generator/blob/main/AuctionLyrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import numpy as np
import pandas as pd

In [6]:
from google.colab import files

uploaded = files.upload()

Saving Book1.csv to Book1.csv


In [8]:
file_path = 'Book1.csv'
spotify_data = pd.read_csv(file_path)

In [9]:
# Data Preprocessing
# Clean the lyrics data
spotify_data['clean_lyrics'] = spotify_data['text'].str.replace('[^a-zA-Z\s]', '', regex=True).str.lower()


# Tokenize the lyrics using numpy
spotify_data['tokenized_lyrics'] = spotify_data['clean_lyrics'].apply(lambda x: np.array(x.split()))

# Remove stop words (assuming 'stop_words' a list of stop words)
stop_words = ['a', 'an', 'the', 'is', 'of', 'and', 'it']  # Define the stop words
spotify_data['filtered_lyrics'] = spotify_data['tokenized_lyrics'].apply(lambda x: np.array([word for word in x if word not in stop_words]))

In [56]:
# Sentiment Analysis using a custom approach
# Define your own sentiment scoring system using numpy
# This could involve counting positive and negative words, or using other custom rules

# For example, defining a simple sentiment score based on the presence of certain words
# Positive Words# Positive Words
positive_words = ['love', 'joy', 'happy', 'sun', 'dream', 'free', 'happiness', 'dream', 'hope', 'laugh']

# Negative Words
negative_words = ['heartbreak', 'pain', 'tears', 'sad', 'lonely', 'dark', 'angry', 'hate', 'death', 'die']

# Function to calculate sentiment score
def calculate_sentiment_score(words):
    positive_score = np.sum([word in positive_words for word in words])
    negative_score = np.sum([word in negative_words for word in words])
    total_score = positive_score - negative_score
    return total_score

# Apply sentiment scoring
spotify_data['sentiment_score'] = spotify_data['filtered_lyrics'].apply(lambda x: calculate_sentiment_score(x))

# Map sentiment scores to mood categories
def classify_mood(score):
    if score > 0:
        return 'Happy'
    elif score < 0:
        return 'Sad'
    else:
        return 'Neutral'

spotify_data['mood_category'] = spotify_data['sentiment_score'].apply(classify_mood)
print(spotify_data[['song', 'mood_category']])

# Now you can move on to deploying your model and using it to predict the mood of songs based on their lyrics.

                             song mood_category
0             Belong To The World         Happy
1                     False Alarm         Happy
2                        Live For           Sad
3                        The Town         Happy
4                      Acquainted         Happy
5                           Angel         Happy
6                      As You Are         Happy
7              Can't Feel My Face         Happy
8                   Devil May Cry       Neutral
9                       Earned It         Happy
10                          Enemy         Happy
11            Heaven Or Las Vegas       Neutral
12                  High For This         Happy
13             Can't Feel My Face         Happy
14                   In The Night           Sad
15                         Losers         Happy
16                          Often         Happy
17               Pass Dat (Remix)       Neutral
18                      Real Life       Neutral
19                      Shameless       

In [57]:
# Check the first few rows of the dataset
print(spotify_data.head())

       artist                 song  \
0  The Weeknd  Belong To The World   
1  The Weeknd          False Alarm   
2  The Weeknd             Live For   
3  The Weeknd             The Town   
4  The Weeknd           Acquainted   

                                              link  \
0  /t/the+weeknd/belong+to+the+world_21065400.html   
1          /t/the+weeknd/false+alarm_21111264.html   
2             /t/the+weeknd/live+for_21067397.html   
3             /t/the+weeknd/the+town_21067482.html   
4           /t/the+weeknd/acquainted_21102482.html   

                                                text  \
0  I know you want your money, girl  \n'Cause you...   
1  [Verse 1]  \nBathroom stalls for the powder no...   
2  Getting sober for a day, got me feeling too lo...   
3  You did many things  \nThat I liked, that I li...   
4  [Verse 1]  \nBaby you smell good  \nCause they...   

                                        clean_lyrics  \
0  i know you want your money girl  \ncause you d... 

In [58]:
# Define features (X) and target variable (y)
X = spotify_data['filtered_lyrics']
y = spotify_data['mood_category']

In [59]:
# Split the data into training and testing sets without sklearn
mask = np.random.rand(len(spotify_data)) < 0.8
train_data = spotify_data[mask]
test_data = spotify_data[~mask]

In [60]:
# Define features (X) and target variable (y)
X_train = train_data['filtered_lyrics']
y_train = train_data['mood_category']
X_test = test_data['filtered_lyrics']
y_test = test_data['mood_category']

In [61]:
unique_words = np.unique(np.hstack(X_train.apply(lambda x: np.unique(x)).values))

# Create a dictionary mapping each unique word to its index
word_to_index = {word: idx for idx, word in enumerate(unique_words)}


In [62]:
def vectorize_text(text, word_to_index):
    vector = np.zeros(len(word_to_index))
    for word in text:
        if word in word_to_index:
            vector[word_to_index[word]] += 1
    return vector

X_train_vectorized = np.array([vectorize_text(text, word_to_index) for text in X_train])
X_test_vectorized = np.array([vectorize_text(text, word_to_index) for text in X_test])

In [63]:
class MultinomialNB:
    def __init__(self):
        self.class_probs = {}
        self.word_probs = {}

    def fit(self, X, y):
        unique_classes, class_counts = np.unique(y, return_counts=True)
        total_samples = len(y)

        for cls, count in zip(unique_classes, class_counts):
            self.class_probs[cls] = count / total_samples

        for cls in unique_classes:
            cls_indices = np.where(y == cls)
            cls_texts = X[cls_indices]
            total_words_in_cls = np.sum(cls_texts)
            self.word_probs[cls] = np.sum(cls_texts, axis=0) / total_words_in_cls

    def predict(self, X):
        predictions = []

        for text in X:
            scores = {}

            for cls in self.class_probs:
                score = np.log(self.class_probs[cls])
                score += np.sum(np.log(self.word_probs[cls] + 1) * text)
                scores[cls] = score

            predicted_class = max(scores, key=scores.get)
            predictions.append(predicted_class)

        return np.array(predictions)

In [64]:
# Instantiate and fit the classifier
classifier = MultinomialNB()
classifier.fit(X_train_vectorized, y_train)

In [65]:
y_pred = classifier.predict(X_test_vectorized)


In [66]:
def accuracy(y_true, y_pred):
    correct = np.sum(y_true == y_pred)
    total = len(y_true)
    return correct / total

def precision_recall_f1(y_true, y_pred):
    unique_classes = np.unique(np.concatenate([y_true, y_pred]))
    metrics = {}

    for cls in unique_classes:
        true_positive = np.sum((y_true == cls) & (y_pred == cls))
        false_positive = np.sum((y_true != cls) & (y_pred == cls))
        false_negative = np.sum((y_true == cls) & (y_pred != cls))

        precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0
        recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        metrics[cls] = {'Precision': precision, 'Recall': recall, 'F1 Score': f1}

    return metrics

print("Accuracy:", accuracy(y_test.values, y_pred))
print("Classification Report:\n", precision_recall_f1(y_test.values, y_pred))

Accuracy: 0.8333333333333334
Classification Report:
 {'Happy': {'Precision': 0.8333333333333334, 'Recall': 1.0, 'F1 Score': 0.9090909090909091}, 'Neutral': {'Precision': 0, 'Recall': 0.0, 'F1 Score': 0}}


In [68]:
import random

# Define a function to generate lyrics based on mood
def generate_lyrics_randomly(vocabulary, num_words=50):
    # Randomly sample words from the vocabulary to generate lyrics
    generated_lyrics = ' '.join(random.sample(vocabulary, min(num_words, len(vocabulary))))

    return generated_lyrics

# Example usage:
# Assuming you have the vocabulary from the dataset
vocabulary = set(word for lyrics in spotify_data['filtered_lyrics'] for word in lyrics)

mood_to_generate = 'Happy'
generated_lyrics = generate_lyrics_randomly(vocabulary)
print(f"Generated Lyrics for {mood_to_generate} mood:\n", generated_lyrics)


Generated Lyrics for Happy mood:
 day another myself lovin rode nothin come dancing half obsessed plenty warn their threesome while down responsibility want scars liked knee doubt gon coat have oh because guess put lens feel havent them double gonna conversation close stupids scream motherfucking almost said racist eighth point eyes old king control think


since Python 3.9 and will be removed in a subsequent version.
  generated_lyrics = ' '.join(random.sample(vocabulary, min(num_words, len(vocabulary))))
