[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [4]:
import joblib
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

# Download required NLTK data (only run once)
nltk.download("punkt_tab")
nltk.download("stopwords")

# Load trained model and vectorizer
model = joblib.load("sentiment_model.sav")
vectorizer = joblib.load("vectorizer.sav")

# Initialize stemmer
stemmer = PorterStemmer()

# Define negation words
negation_words = set([
    "not", "no", "n't", "dont", "don't", "doesnt", "doesn't", "isnt", "isn't",
    "wasnt", "wasn't", "didnt", "didn't", "wont", "won't", "cant", "can't"
])

# Create custom stopword list by removing negations
default_stopwords = set(stopwords.words('english'))
custom_stopwords = default_stopwords - negation_words

# Function to handle negations
def handle_negation(text):
    words = word_tokenize(text.lower())
    negation = False
    processed_words = []

    for word in words:
        if word in negation_words:
            negation = True
            continue

        if negation:
            word = "not_" + word
            negation = False

        processed_words.append(word)
    return " ".join(processed_words)

# Final preprocessing function (same as training)
def stemming(content):
    if not content:
        return ""

    content = handle_negation(content)
    content = re.sub(r'[^a-zA-Z_\s]', ' ', content)
    content = content.lower().split()

    processed_content = []
    for word in content:
        if word.startswith("not_"):
            processed_content.append(word)
        elif word not in custom_stopwords:
            processed_content.append(stemmer.stem(word))
    return ' '.join(processed_content)

# === Test Input ===
sample_text = ["I don't like this movie."]  # Negative sentiment example

# Preprocess and predict
preprocessed_text = [stemming(sample_text[0])]
vector = vectorizer.transform(preprocessed_text)
prediction = model.predict(vector)[0]

# Map prediction to label
label_map = {0: "negative", 1: "positive"}
print("Text:", sample_text[0])
print("Preprocessed:", preprocessed_text[0])
print("Predicted Sentiment:", label_map[prediction])



[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Text: I don't like this movie.
Preprocessed: not_like movi
Predicted Sentiment: negative
