In [5]:
# 🎓 FC25 Sentiment Training Notebook (Corrected-Only, Saves to Google Drive)

# ✅ STEP 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# ✅ STEP 2: Upload Corrected File
from google.colab import files
uploaded = files.upload()

# You should upload:
# - corrected_sentiment_batch.csv (from Streamlit)

# ✅ STEP 3: Load and Clean Corrected Data
import pandas as pd

if "corrected_sentiment_batch.csv" not in uploaded:
    raise Exception("Please upload only 'corrected_sentiment_batch.csv' for training.")

df = pd.read_csv("corrected_sentiment_batch.csv")

if "Corrected Sentiment" in df.columns:
    df = df.rename(columns={"Corrected Sentiment": "Sentiment"})

df = df[df["Sentiment"].isin(["Positive", "Negative", "Neutral"])]
df = df.drop_duplicates(subset=["Comment"])

# ✅ STEP 4: Train the Model
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import pickle

X_train, X_test, y_train, y_test = train_test_split(df["Comment"], df["Sentiment"], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(max_features=3000, ngram_range=(1, 2))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = LogisticRegression(max_iter=200)
model.fit(X_train_vec, y_train)

# ✅ STEP 5: Evaluate
y_pred = model.predict(X_test_vec)
print("Model Evaluation Report:")
print(classification_report(y_test, y_pred))

# ✅ STEP 6: Save to Google Drive
model_path = "/content/drive/MyDrive/fc25/model.pkl"
vectorizer_path = "/content/drive/MyDrive/fc25/vectorizer.pkl"

with open(model_path, "wb") as f:
    pickle.dump(model, f)

with open(vectorizer_path, "wb") as f:
    pickle.dump(vectorizer, f)

# 🎓 FC25 Sentiment Training Notebook (Corrected-Only, Saves to Google Drive)

# ✅ STEP 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# ✅ STEP 2: Upload Corrected File
from google.colab import files
uploaded = files.upload()

# You should upload:
# - corrected_sentiment_batch.csv (from Streamlit)

# ✅ STEP 3: Load and Clean Corrected Data
import pandas as pd

if "corrected_sentiment_batch.csv" not in uploaded:
    raise Exception("Please upload only 'corrected_sentiment_batch.csv' for training.")

df = pd.read_csv("corrected_sentiment_batch.csv")

if "Corrected Sentiment" in df.columns:
    df = df.rename(columns={"Corrected Sentiment": "Sentiment"})

df = df[df["Sentiment"].isin(["Positive", "Negative", "Neutral"])]
df = df.drop_duplicates(subset=["Comment"])

# ✅ STEP 4: Train the Model
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import pickle

X_train, X_test, y_train, y_test = train_test_split(df["Comment"], df["Sentiment"], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(max_features=3000, ngram_range=(1, 2))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = LogisticRegression(max_iter=200)
model.fit(X_train_vec, y_train)

# ✅ STEP 5: Evaluate
y_pred = model.predict(X_test_vec)
print("Model Evaluation Report:")
print(classification_report(y_test, y_pred))

# ✅ STEP 6: Save to Google Drive
model_path = "/content/drive/MyDrive/fc25/model.pkl"
vectorizer_path = "/content/drive/MyDrive/fc25/vectorizer.pkl"

with open(model_path, "wb") as f:
    pickle.dump(model, f)

with open(vectorizer_path, "wb") as f:
    pickle.dump(vectorizer, f)

print(f"✅ Model saved to: {model_path}")
print(f"✅ Vectorizer saved to: {vectorizer_path}")

Mounted at /content/drive


Saving labeled_comments (2).csv to labeled_comments (2) (1).csv


Exception: Please upload only 'corrected_sentiment_batch.csv' for training.