In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

data = {
    'text': [
        "I love this product!",
        "This is the worst experience ever.",
        "Absolutely fantastic service!",
        "I hate this item.",
        "Not bad, but could be better.",
        "I am very satisfied with the quality."
    ],
    'sentiment': [1, 0, 1, 0, 1, 1]  # For 1 = Positive, 0 = Negative
}
df = pd.DataFrame(data)

def preprocess(text):
    text = text.lower()
    words = [word for word in text.split() if word not in stop_words]
    return ' '.join(words)

df['text'] = df['text'].apply(preprocess)
x_train, x_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.2, random_state=42)

# Conversion text to numerical features using TF-IDF
vectorizer = TfidfVectorizer()
x_train = vectorizer.fit_transform(x_train)
x_test = vectorizer.transform(x_test)

# Train model
model = LogisticRegression()
model.fit(x_train, y_train)

# Evaluate
y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Predict sentiment
new_review = ["The product is amazing and works perfectly!"]
new_review_vectorized = vectorizer.transform(new_review)
prediction = model.predict(new_review_vectorized)
print(f"Predicted Sentiment: {'Positive' if prediction[0] == 1 else 'Negative'}")

[nltk_data] Downloading package stopwords to /root/nltk_data...


Accuracy: 0.5
Predicted Sentiment: Positive


[nltk_data]   Unzipping corpora/stopwords.zip.
