In [None]:
# ðŸŽ“ FC25 Sentiment Training Notebook (Corrected Only)

# âœ… STEP 1: Upload Corrected File
from google.colab import files
uploaded = files.upload()

# You should upload:
# - corrected_sentiment_batch.csv (from Streamlit)

# âœ… STEP 2: Load and Clean Corrected Data
import pandas as pd

if "corrected_sentiment_batch.csv" not in uploaded:
    raise Exception("Please upload only 'corrected_sentiment_batch.csv' for training.")

df = pd.read_csv("corrected_sentiment_batch.csv")

# Rename if needed
if "Corrected Sentiment" in df.columns:
    df = df.rename(columns={"Corrected Sentiment": "Sentiment"})

# Clean up
df = df[df["Sentiment"].isin(["Positive", "Negative", "Neutral"])]
df = df.drop_duplicates(subset=["Comment"])

# âœ… STEP 3: Train the Model
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import pickle

X_train, X_test, y_train, y_test = train_test_split(df["Comment"], df["Sentiment"], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(max_features=3000, ngram_range=(1, 2))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = LogisticRegression(max_iter=200)
model.fit(X_train_vec, y_train)

# âœ… STEP 4: Evaluate the Model
y_pred = model.predict(X_test_vec)
print("Model Evaluation Report:")
print(classification_report(y_test, y_pred))

# âœ… STEP 5: Export the Model and Vectorizer
with open("model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

files.download("model.pkl")
files.download("vectorizer.pkl")
