In [None]:
import re
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification
import nltk
from nltk.corpus import stopwords
import pandas as pd
import flask
from flask import Flask, request, jsonify
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("sentiment_analysis.csv")

# Text cleaning function
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters
    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

df['clean_text'] = df['text'].apply(clean_text)
sentiment_mapping = {'positive': 1, 'negative': 0}
df = df[df['sentiment'].isin(sentiment_mapping)]
df['sentiment'] = df['sentiment'].map(sentiment_mapping)

# Train-test split
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['clean_text'], df['sentiment'], test_size=0.2, random_state=42
)

# Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize data
train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class SentimentDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}, torch.tensor(self.labels.iloc[idx])

# Create DataLoaders
train_dataset = SentimentDataset(train_encodings, train_labels)
test_dataset = SentimentDataset(test_encodings, test_labels)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# Load BERT model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2).to(device)
optimizer = optim.AdamW(model.parameters(), lr=5e-5)
loss_fn = nn.CrossEntropyLoss()

# Train the model
def train_model():
    model.train()
    for epoch in range(3):
        total_loss = 0
        for batch in train_loader:
            inputs, labels = batch
            inputs = {key: val.to(device) for key, val in inputs.items()}
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(**inputs)
            loss = loss_fn(outputs.logits, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

train_model()

# Save model
torch.save(model.state_dict(), "bert_sentiment_model.pth")

# Load the trained model for inference
model.load_state_dict(torch.load("bert_sentiment_model.pth"))
model.eval()

# Function to predict sentiment
def predict_sentiment(text):
    cleaned_text = clean_text(text)
    encoding = tokenizer(cleaned_text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    encoding = {key: val.to(device) for key, val in encoding.items()}
    
    with torch.no_grad():
        output = model(**encoding)
        prediction = torch.argmax(output.logits).item()
    
    return "Positive" if prediction == 1 else "Negative"

# Flask API for chatbot deployment
app = Flask(__name__)

@app.route("/chatbot", methods=["POST"])
def chatbot():
    data = request.json
    user_input = data.get("message", "")

    if not user_input:
        return jsonify({"response": "Please enter a message."})

    sentiment = predict_sentiment(user_input)
    response_text = "I see you're feeling positive! 😊" if sentiment == "Positive" else "Oh no! That seems negative. 😔"
    
    return jsonify({"response": response_text, "sentiment": sentiment})

# Run the chatbot
if __name__ == "__main__":
    app.run(port=5000, debug=True)
