In [None]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, accuracy_score

In [2]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/jorocca/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [14]:
texts = [
    "I love this movie, it's amazing!",
    "This film is terrible, I hate it.",
    "What a great performance by the actors",
    "The plot was boring and predictable",
    "An excellent film, highly recommend",
    "Not worth the time, very disappointing",
    "The cinematography was beautiful",
    "Taylor Swift is mother",
    "The storyline was dull and uninteresting.",
    "I enjoyed every moment of this film!",
    "The acting was poor and unconvincing",
    "Absolutely fantastic! A must-watch",
    "One of the worst movies I've ever seen",
    
    # Gen Z Slang Additions
    "This movie is straight fire! 10/10, no cap!",  # positive
    "Bro, this film is straight up mid. Hard pass.",  # negative
    "Skibidi toilet level of quality, and that’s not a compliment.",  # negative
    "Peak cinema. Literally bussin'.",  # positive
    "This movie flopped harder than Morbius, sheesh.",  # negative
    "Certified hood classic, this film slaps!",  # positive
    "Main character energy. Absolutely goated with the sauce.",  # positive
    "It gave off major NPC energy, super cringe.",  # negative
    "If rizz was a movie, it would be this. 10/10.",  # positive
    "The plot had zero rizz, completely fumbled.",  # negative
    "The vibes were immaculate. W movie.",  # positive
    "L script, L acting, just a big fat L overall.",  # negative
    "The editing was elite, but the dialogue was straight sus.",  # negative
    "This movie aged like fine wine, still a W after all these years.",  # positive
]

labels = [
    "positive", "negative", "positive", "negative", "positive", "negative",
    "positive", "positive", "negative", "positive", "negative", "positive", "negative",
    
    # Labels for new additions
    "positive", "negative", "negative", "positive", "negative", "positive",
    "positive", "negative", "positive", "negative", "positive", "negative",
    "negative", "positive"
]


In [15]:
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42, stratify=labels)
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(X_train, y_train)

import numpy as np
print("Train labels:", np.unique(y_train, return_counts=True))
print("Test labels:", np.unique(y_test, return_counts=True))

Train labels: (array(['negative', 'positive'], dtype='<U8'), array([10, 11]))
Test labels: (array(['negative', 'positive'], dtype='<U8'), array([3, 3]))


In [16]:
predicted_labels = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, predicted_labels))
print("Classification Report")
print(classification_report(y_test, predicted_labels))

Accuracy: 0.8333333333333334
Classification Report
              precision    recall  f1-score   support

    negative       1.00      0.67      0.80         3
    positive       0.75      1.00      0.86         3

    accuracy                           0.83         6
   macro avg       0.88      0.83      0.83         6
weighted avg       0.88      0.83      0.83         6



In [17]:
new_texts = [
    "This movie is straight up GOATED, no debate.",  # positive
    "Bro, this film was an absolute L, don’t waste your time.",  # negative
    "The cinematography was bussin', but the story? Straight mid.",  # mixed
    "Skibidi toilet vibes all over this, unwatchable.",  # negative
    "The pacing was immaculate, not a single dull moment.",  # positive
    "Nah, this movie had NPC dialogue. Hard pass.",  # negative
    "This film had so much rizz, peak storytelling.",  # positive
    "Zero rizz, negative vibes, just pure disappointment.",  # negative
    "The main character was lowkey fire, but the ending fumbled.",  # mixed
    "Certified hood classic, truly a masterpiece.",  # positive
    "They really thought this script was it? Straight sus.",  # negative
    "This one deserves an Oscar. No cap!",  # positive
    "Morbius 2.0... and that's not a compliment.",  # negative
    "W movie, W acting, W soundtrack. Just a big W.",  # positive
    "Bro, who wrote this? Dialogue had me crying (not in a good way).",  # negative
]


predicted_new_labels = model.predict(new_texts)
for text, label in zip(new_texts, predicted_new_labels):
    print(f'Text: {text}\nPredicted Label: {label}\n')

Text: This movie is straight up GOATED, no debate.
Predicted Label: positive

Text: Bro, this film was an absolute L, don’t waste your time.
Predicted Label: negative

Text: The cinematography was bussin', but the story? Straight mid.
Predicted Label: positive

Text: Skibidi toilet vibes all over this, unwatchable.
Predicted Label: positive

Text: The pacing was immaculate, not a single dull moment.
Predicted Label: positive

Text: Nah, this movie had NPC dialogue. Hard pass.
Predicted Label: positive

Text: This film had so much rizz, peak storytelling.
Predicted Label: positive

Text: Zero rizz, negative vibes, just pure disappointment.
Predicted Label: positive

Text: The main character was lowkey fire, but the ending fumbled.
Predicted Label: positive

Text: Certified hood classic, truly a masterpiece.
Predicted Label: positive

Text: They really thought this script was it? Straight sus.
Predicted Label: negative

Text: This one deserves an Oscar. No cap!
Predicted Label: negative
