In [1]:
!pip install nltk scikit-learn matplotlib seaborn joblib




In [2]:
import nltk
nltk.download('movie_reviews')
from nltk.corpus import movie_reviews
import random

# Get words and labels (positive or negative)
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

random.shuffle(documents)  # Mix it up


[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.


In [3]:
from nltk import FreqDist

all_words = FreqDist(w.lower() for w in movie_reviews.words())
word_features = list(all_words)[:2000]  # Top 2000 words

def document_features(doc):
    doc_words = set(doc)
    features = {}
    for word in word_features:
        features[f'contains({word})'] = (word in doc_words)
    return features

# Apply the function to all documents
feature_sets = [(document_features(d), c) for (d, c) in documents]


In [4]:
from sklearn.model_selection import train_test_split
import nltk

# Split data: 80% train, 20% test
train_set, test_set = train_test_split(feature_sets, test_size=0.2, random_state=42)

# Train a Naive Bayes model (very good for text)
classifier = nltk.NaiveBayesClassifier.train(train_set)

# Test accuracy
print("Model Accuracy:", nltk.classify.accuracy(classifier, test_set) * 100, "%")


Model Accuracy: 76.5 %


In [5]:
from sklearn.metrics import classification_report, confusion_matrix

y_true = [label for (_, label) in test_set]
y_pred = [classifier.classify(feats) for (feats, _) in test_set]

print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))

print("\nPerformance Report:")
print(classification_report(y_true, y_pred))


Confusion Matrix:
[[178  33]
 [ 61 128]]

Performance Report:
              precision    recall  f1-score   support

         neg       0.74      0.84      0.79       211
         pos       0.80      0.68      0.73       189

    accuracy                           0.77       400
   macro avg       0.77      0.76      0.76       400
weighted avg       0.77      0.77      0.76       400



In [6]:
import joblib

joblib.dump(classifier, "saved_model.pkl")
joblib.dump(word_features, "word_features.pkl")


['word_features.pkl']

In [7]:
def chatbot_response(text):
    words = text.lower().split()
    feats = document_features(words)
    sentiment = classifier.classify(feats)

    if sentiment == 'pos':
        return "😊 I'm glad to hear that! How else can I help you?"
    elif sentiment == 'neg':
        return "😟 I'm sorry to hear that. Can you tell me more?"
    else:
        return "🤔 Hmm. How can I help you today?"

# Try chatting!
user_input = input("You: ")
print("Bot:", chatbot_response(user_input))


You: I HAD A BAD DAY
Bot: 😟 I'm sorry to hear that. Can you tell me more?
