In [10]:
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [11]:
url_training = 'https://raw.githubusercontent.com/prathamgupta36/BERT-Data/main/Data/cleanedTrainingLabeled.csv'
url_validating = 'https://raw.githubusercontent.com/prathamgupta36/BERT-Data/main/Data/cleanedValidatingLabeled.csv'

In [12]:
training_data = pd.read_csv(url_training)
validating_data =pd.read_csv(url_validating)

In [13]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)

In [14]:
X_train_tfidf = tfidf_vectorizer.fit_transform(training_data['Lyrics'])
X_valid_tfidf = tfidf_vectorizer.transform(validating_data['Lyrics'])


In [15]:
y_train = training_data['Label']
y_valid = validating_data['Label']

In [16]:
nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, y_train)

In [17]:
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_tfidf, y_train)

In [18]:
nb_predictions = nb_model.predict(X_valid_tfidf)
lr_predictions = lr_model.predict(X_valid_tfidf)

In [19]:
nb_accuracy = accuracy_score(y_valid, nb_predictions)
lr_accuracy = accuracy_score(y_valid, lr_predictions)
nb_report = classification_report(y_valid, nb_predictions)
lr_report = classification_report(y_valid, lr_predictions)

(nb_accuracy, lr_accuracy, nb_report, lr_report)

(0.6818181818181818,
 0.7272727272727273,
 '              precision    recall  f1-score   support\n\n    Negative       1.00      0.07      0.12        30\n    Positive       0.67      1.00      0.81        58\n\n    accuracy                           0.68        88\n   macro avg       0.84      0.53      0.47        88\nweighted avg       0.79      0.68      0.57        88\n',
 '              precision    recall  f1-score   support\n\n    Negative       0.75      0.30      0.43        30\n    Positive       0.72      0.95      0.82        58\n\n    accuracy                           0.73        88\n   macro avg       0.74      0.62      0.62        88\nweighted avg       0.73      0.73      0.69        88\n')

In [20]:
def predict_sentiment(lyrics):
    lyrics_vector = tfidf_vectorizer.transform([lyrics])
    prediction = lr_model.predict(lyrics_vector)
    return prediction[0]

In [21]:
lyrics= "Well met couple hours ago last night town hey would not know would get hooked girl blue diamond eyes Mexico oh walking asking dance Smilin' smile reaching hand Well move two would like show still got chance got soul know use Put hand hip know lose got heart racing like nothing Fallin' love beat music Oh-oh would not want stay Yeah Oh-oh let plane fly away away away got soul know use Put hand hip know lose got heart racing like nothing Fallin' love beat music beat music think could get used steel drum playing Wakin' beach know saying One night would alright hold baby got soul know use Put hand hip know lose got heart racing like nothing Falling love beat music falling love beat music Ooh-ooh Beat music Yay-ay-ay Yay-ay-ay ay Yay-ay-ay"
predict_sentiment(lyrics)

'Positive'