In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score


In [None]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')


In [None]:
texts = [
    "I love this movie",
    "This film is amazing",
    "I hate this movie",
    "This film is terrible",
    "Great acting and story",
    "Worst movie ever"
]

labels = [1, 1, 0, 0, 1, 0]   # 1 = positive, 0 = negative


In [None]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    tokens = word_tokenize(text.lower())
    tokens = [t for t in tokens if t.isalpha()]
    tokens = [t for t in tokens if t not in stop_words]
    tokens = [lemmatizer.lemmatize(t) for t in tokens]
    return " ".join(tokens)


In [None]:
clean_texts = [preprocess(t) for t in texts]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    clean_texts, labels, test_size=0.3, random_state=42
)


In [None]:
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = MultinomialNB()
model.fit(X_train_vec, y_train)


In [None]:
y_pred = model.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))


In [None]:
new_text = "the movie was fantastic"
clean_new = preprocess(new_text)
vec_new = vectorizer.transform([clean_new])
print("Prediction:", model.predict(vec_new))
