<a href="https://colab.research.google.com/github/selenophile2005/data-science/blob/main/Untitled13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Sentiment Analysis using Logistic Regression

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import nltk
from nltk.corpus import stopwords
import re

# Download stopwords (first time only)
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Sample dataset
data = {
    'text': [
        'I love this product!',
        'This is the worst thing I bought.',
        'Absolutely fantastic experience.',
        'I hate how this works.',
        'Not bad, could be better.',
        'Really happy with the purchase.',
        'Worst service ever.',
        'Amazing quality and service.',
        'Disappointed and sad.',
        'Excellent performance!'
    ],
    'sentiment': [1, 0, 1, 0, 0, 1, 0, 1, 0, 1]  # 1 = Positive, 0 = Negative
}

df = pd.DataFrame(data)

# Text preprocessing
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return " ".join(words)

df['clean_text'] = df['text'].apply(clean_text)

# Split data
X = df['clean_text']
y = df['sentiment']

# Vectorization
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.3, random_state=42)

# Model
model = LogisticRegression()
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)

# Output
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Example prediction
sample = ["I am not happy with this service"]
sample_clean = [clean_text(s) for s in sample]
sample_vec = vectorizer.transform(sample_clean)
print("Predicted Sentiment:", "Positive" if model.predict(sample_vec)[0] == 1 else "Negative")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Accuracy: 0.6666666666666666
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.50      1.00      0.67         1

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3

Predicted Sentiment: Positive
