## **Training Model For Sentiment Analysis**

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
import joblib
import re
import nltk
from nltk.corpus import stopwords

# Download required NLTK data
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

# Load training data (Replace with your actual dataset)
df = pd.read_csv("/content/drive/My Drive/Colab Notebooks/final_data.csv")  # Ensure this dataset is available
df.columns = ["comment", "category"]
df.drop_duplicates(inplace=True)

# Preprocess data
def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"\d+", "", text)
    text = re.sub(r"[^\w\s]", "", text)
    text = " ".join([word for word in text.split() if word not in stop_words])
    return text.strip()

df["comment"] = df["comment"].apply(clean_text)

# Vectorize and train model
vectorizer = CountVectorizer(min_df=5)
X = vectorizer.fit_transform(df["comment"])
y = df["category"]

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)

# Save model and vectorizer
joblib.dump(vectorizer, "/content/drive/My Drive/Colab Notebooks/vectorizer.pkl")
joblib.dump(model, "/content/drive/My Drive/Colab Notebooks/sentiment_model.pkl")

print("✅ Model and vectorizer saved successfully!")
