In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import time
import pandas as pd
import numpy as np

# --- 1. Sentiment Classification Models (Logistic Regression, RF, SVM) ---
def train_sentiment_models(X_train, X_test, y_train, y_test):
    """Trains and evaluates a dictionary of classification models."""
    models = {
        'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
        'Random Forest Classifier': RandomForestClassifier(n_estimators=25, random_state=42, n_jobs=-1),
        'Support Vector Machine (LinearSVC)': LinearSVC(random_state=42, dual=False, max_iter=1000)
    }

    results = {}
    print("Starting Classification Model Training...")

    for name, model in models.items():
        print(f"--- Training {name} ---")
        start_time = time.time()
        
        
      
        if name in ['Random Forest Classifier', 'Support Vector Machine (LinearSVC)']:
             print(f"Skipping {name} due to computational cost in demonstration.")
             continue 

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        report = classification_report(y_test, y_pred, digits=4)
        
        end_time = time.time()
        
        results[name] = {'Accuracy': accuracy, 'Report': report, 'Model': model}
        print(f"Test Accuracy for {name}: {accuracy:.4f}")

    return results

# --- 2. Clustering Model (K-Means) ---
def run_clustering(X, final_df, K=5):
    """Runs K-Means clustering on the feature matrix."""
    print(f"\nStarting K-Means Clustering with K={K}...")
    kmeans = KMeans(n_clusters=K, random_state=42, n_init='auto', max_iter=500)
    
    final_df['cluster'] = kmeans.fit_predict(X)
    print("Clustering complete.")
    
    return kmeans, final_df

# --- 3. Predictive Modeling (MLP Neural Network) ---
def train_predictive_model(X_train_pred, X_test_pred, y_train_cat, y_test_cat):
    """Trains and evaluates the MLP classifier for category prediction."""
    
    # Encode target categories (strings) into numerical labels
    le = LabelEncoder()
    y_train_encoded = le.fit_transform(y_train_cat)
    y_test_encoded = le.transform(y_test_cat)
    
    print("\nStarting MLP Neural Network Training...")
    mlp = MLPClassifier(
        hidden_layer_sizes=(50, 50), 
        max_iter=50, 
        alpha=1e-4, 
        solver='adam', 
        random_state=42, 
        verbose=False 
    )
    
    mlp.fit(X_train_pred, y_train_encoded)
    
    y_pred_pred = mlp.predict(X_test_pred)
    accuracy = accuracy_score(y_test_encoded, y_pred_pred)
    
    print("MLP Model Training Complete.")
    print(f"Accuracy in Predicting Product Category: {accuracy:.4f}")

    return mlp, accuracy