Sentiment Analysis on Song Lyrics
An NLP project using machine learning to analyze the emotional content of song lyrics.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
data = {
    'lyrics': [
        "I'm feeling good and everything's alright",
        "Why does it rain all the time in my heart",
        "I love the way you lie",
        "Happiness is a warm gun",
        "This pain is all I have now",
        "You're beautiful, it's true",
        "I wish I never met you",
        "Tonight, we are young and wild and free",
        "All around me are familiar faces, worn out places",
        "You make me smile when skies are grey"
    ],
    'sentiment': [
        'positive', 'negative', 'negative', 'negative', 'negative',
        'positive', 'negative', 'positive', 'negative', 'positive'
    ]
}
df = pd.DataFrame(data)
df

In [None]:
vectorizer = CountVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['lyrics'])
y = df['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
nb_preds = nb_model.predict(X_test)

lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)
lr_preds = lr_model.predict(X_test)

In [None]:
nb_acc = accuracy_score(y_test, nb_preds)
lr_acc = accuracy_score(y_test, lr_preds)
print(f"Naive Bayes Accuracy: {nb_acc:.2f}")
print(f"Logistic Regression Accuracy: {lr_acc:.2f}")

In [None]:
cm = confusion_matrix(y_test, lr_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Logistic Regression')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
report = classification_report(y_test, lr_preds, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df