In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split

In [2]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
def preprocess(text):
    # Tokenization
    tokens = word_tokenize(text.lower())
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    
    return ' '.join(tokens)

In [4]:
# Positive and negative polarity words
positive_words = ['good', 'great', 'excellent', 'positive', 'awesome']
negative_words = ['bad', 'terrible', 'negative', 'awful', 'poor']

In [5]:
documents = [
    ("The movie was excellent, I loved it!", 'positive'),
    ("The food was terrible, I wouldn't recommend it.", 'negative'),
    ("I had a great experience with their customer service.", 'positive'),
    ("The weather today is awful, I hate it.", 'negative'),
    ("The product quality is poor, very disappointing.", 'negative'),
    ("I had a good time at the party, it was fun!", 'positive')
]

In [6]:
preprocessed_documents = [(preprocess(doc), label) for doc, label in documents]

In [7]:
train_data, test_data = train_test_split(preprocessed_documents, test_size=0.2, random_state=42)

In [8]:
# Create TF-IDF vectorizer
vectorizer = TfidfVectorizer()

In [9]:
# Generate feature vectors
train_features = vectorizer.fit_transform([doc for doc, _ in train_data])
test_features = vectorizer.transform([doc for doc, _ in test_data])

In [10]:
# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(train_features, [label for _, label in train_data])

MultinomialNB()

In [16]:
# User input
user_input = input("Enter a document: ")

Enter a document: The product quality is poor, very disappointing.


In [17]:
preprocessed_input = preprocess(user_input)

In [18]:
input_features = vectorizer.transform([preprocessed_input])

In [19]:
# Predict polarity
prediction = classifier.predict(input_features)[0]
print("Predicted polarity:", prediction)


Predicted polarity: negative
