<a href="https://colab.research.google.com/github/roggersanguzu/Anguzu-Sentiment-Analysis-AI/blob/main/Sentiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt_tab' # Add this line to download the missing resource

df = pd.read_csv('https://raw.githubusercontent.com/pycaret/pycaret/master/datasets/amazon.csv')

def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    filtered_tokens = [w for w in tokens if w.isalpha() and w not in stopwords.words('english')]
    lemmatizer = WordNetLemmatizer()
    return ' '.join([lemmatizer.lemmatize(w) for w in filtered_tokens])

df['cleaned_review'] = df['reviewText'].apply(preprocess_text)

In [None]:
!pip install transformers datasets torch
from transformers import pipeline

In [None]:
import torch
torch.cuda.is_available()

In [None]:
classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

df['bert_sentiment'] = df['reviewText'].apply(lambda x: classifier(x)[0]['label'])
df['bert_sentiment'] = df['bert_sentiment'].map({'POSITIVE': 1, 'NEGATIVE': 0})

print(classification_report(df['Positive'], df['bert_sentiment']))


In [None]:
from sklearn.metrics import accuracy_score

vader_acc = accuracy_score(df['Positive'], df['vader_sentiment'])
bert_acc = accuracy_score(df['Positive'], df['bert_sentiment'])

print(f"VADER Accuracy: {vader_acc:.2f}")
print(f"BERT Accuracy: {bert_acc:.2f}")


In [None]:
import gradio as gr
from transformers import pipeline
import torch

device = 0 if torch.cuda.is_available() else -1
classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)

def predict_sentiment(text):
    if not text.strip():
        return " Please enter some text."

    result = classifier(text)[0]
    label = result['label']
    score = round(result['score'], 4)

    emoji = "😃" if label == "POSITIVE" else "😠"
    verdict = f"{emoji} {label} ({score * 100:.1f}% confidence)"

    return verdict

demo = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(
        lines=5,
        placeholder="Paste your Amazon review, tweet, or customer rant here...",
        label="Enter Text"
    ),
    outputs=gr.Text(label=" Anguzu's AI Sentiment Analysis"),
    title="Anguzu Sentiment AI",
    description="""
Built by Anguzu,I believer emotions matter, and data doesn't lie.
This AI uses transformer-based deep learning to classify text as either positive or negative.
Try me on feedback, tweets, product reviews, or even your ex’s last message .

Examples:
- “I love this app  it's pure genius!”
- “Total garbage. Waste of my time.”
- “Eh... it works okay I guess.”
""",
    theme="soft",
    allow_flagging="never"
)

demo.launch()


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "distilbert-base-uncased-finetuned-sst-2-english"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

tokenizer.save_pretrained("./anguzu-sentiment-model")
model.save_pretrained("./anguzu-sentiment-model")
