# Evaluate a real classifier

This code is an example of the use of VADER classifier from NLTK. It is a Naive-Bayes classifier that is trainded with a lexicon and dataset of movie reviews.

Look in the example how the library SKLearn is used to evaulate the classifier.

At the end you have an example on how to use the classifier en custom examples. 


In [2]:

import nltk
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.metrics import classification_report, confusion_matrix
import random

# Download required NLTK datasets
nltk.download('movie_reviews')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('vader_lexicon')

# Preprocess the data
stop_words = set(stopwords.words('english'))

def extract_features(words):
    return {word: True for word in words if word.lower() not in stop_words}

# Prepare the dataset
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]
random.shuffle(documents)  # Shuffle the dataset for better randomness

# Feature extraction
feature_sets = [(extract_features(words), category) for (words, category) in documents]

# Split the data into training and testing sets
train_size = int(len(feature_sets) * 0.8)
train_set, test_set = feature_sets[:train_size], feature_sets[train_size:]

# Train a Naive Bayes Classifier
classifier = NaiveBayesClassifier.train(train_set)

# Evaluate the classifier
print("\nNaive Bayes Classifier Evaluation:")
print(f"Accuracy: {accuracy(classifier, test_set) * 100:.2f}%")
classifier.show_most_informative_features(10)

# Prepare predictions and true labels for sklearn metrics
y_true = [label for (_, label) in test_set]
y_pred = [classifier.classify(features) for (features, _) in test_set]

# Evaluate using sklearn metrics
print("\nClassification Report:")
print(classification_report(y_true, y_pred))

# Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))

# VADER Sentiment Analysis on custom examples
sia = SentimentIntensityAnalyzer()
example_sentences = [
    "I absolutely loved this movie! The acting was fantastic.",
    "This was the worst film I have ever seen.",
    "The plot was predictable, but the cinematography was beautiful.",
    "I wouldn't recommend it. It was boring and too long."
]

print("\nVADER Sentiment Analysis:")
for sentence in example_sentences:
    score = sia.polarity_scores(sentence)
    sentiment = "positive" if score['compound'] > 0 else "negative"
    print(f"Sentence: {sentence}\nSentiment: {sentiment} (Score: {score['compound']})\n")


[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\manue\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\manue\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\manue\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\manue\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!



Naive Bayes Classifier Evaluation:
Accuracy: 73.00%
Most Informative Features
             outstanding = True              pos : neg    =     15.2 : 1.0
                  finest = True              pos : neg    =     13.8 : 1.0
                chilling = True              pos : neg    =     12.7 : 1.0
               ludicrous = True              neg : pos    =     12.2 : 1.0
               affecting = True              pos : neg    =     12.0 : 1.0
                  seagal = True              neg : pos    =     11.3 : 1.0
                 idiotic = True              neg : pos    =     11.1 : 1.0
                 insipid = True              neg : pos    =     10.7 : 1.0
                  avoids = True              pos : neg    =     10.6 : 1.0
                palpable = True              pos : neg    =     10.0 : 1.0

Classification Report:
              precision    recall  f1-score   support

         neg       0.93      0.46      0.62       188
         pos       0.67      0.97     

# Exercise:

Create your own gold standard and measure Precission, Recall, and F1 manually and with SKLearn to check if the result is the same. 

In [9]:
from sklearn.metrics import precision_score, recall_score, f1_score

gold_standard = [
    ("Today is a great day.", "positive"),
    ("I don't like this movie.", "negative"),
    ("This was the best day ever.", "positive"),
    ("He is behaving worse than ever.", "negative"),
    ("This was an amazing story, where did you learn it?", "positive"),
    ("He is the worst person I know.", "negative"),
    ("Have a good day.", "positive"),
    ("What a bad day, it's raining.", "negative"),
    ("I love the rain.", "positive"),
    ("Not worth your time.", "negative")
]

feature_sets = [(extract_features(word_tokenize(sentence)), label) for (sentence, label) in gold_standard]

test_set = feature_sets

y_true = [label for (_, label) in test_set]
y_pred = [classifier.classify(features) for (features, _) in test_set]
y_pred = ["positive" if label == "pos" else "negative" for label in y_pred]

print(y_true)
print(y_pred)

#Manually
true_positives = 0
false_positives = 0
false_negatives = 0
true_negatives= 0

for true_label, pred_label in zip(y_true, y_pred):
    if true_label == "positive" and pred_label == "positive":
        true_positives += 1
    elif true_label == "negative" and pred_label == "positive":
        false_positives += 1
    elif true_label == "positive" and pred_label == "negative":
        false_negatives += 1
    elif true_label == "negative" and pred_label == "negative":
        true_negatives += 1
print(true_positives)
print(false_negatives)


#precission = true_positives / (true_positives + false_positives)

#recall = true_positives / (true_positives + false_negatives)
#
#f1 = 2 * (precission * recall) / (precission + recall)
#
#print(f"Precission manually: {precission:.2f}")
#print(f"Recall manually: {recall:.2f}")
#print(f"F1 manually: {f1:.2f}")
#
##Scikit-learn
#precission = precision_score(y_true, y_pred, pos_label="positive")
#recall = recall_score(y_true, y_pred, pos_label="positive")
#f1 = f1_score(y_true, y_pred, pos_label="positive")
#
#print(f"Precission sccikit: {precission:.2f}")
#print(f"Recall scikit: {recall:.2f}")
#print(f"F1 scikit: {f1:.2f}")

['positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'positive', 'negative']
['positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'negative', 'positive']
4
1
