In [None]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.40-cp313-cp313-win_amd64.whl.metadata (9.9 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.1.1-cp313-cp313-win_amd64.whl.metadata (3.9 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
Downloading alembic-1.15.2-py3-none-any.whl (231 kB)
Downloading sqlalchemy-2.0.40-cp313-cp313-win_amd64.whl (2.1 MB)
   ---------------------------------------- 0.0/2.1 MB ? eta -:--:--
   ---------------------------------------- 2.1/2.1 MB 19.9 MB/s eta 0:00:00
Downloading colorlog-6.9.0-py3-none-any.whl (11 kB)

In [None]:
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import joblib
from scipy.sparse import hstack, csr_matrix
from transformers import pipeline
from datasets import Dataset
import optuna
from sklearn.model_selection import cross_val_score
import time
import numpy as np

In [None]:
# Load dataset
file_path = "cleaned_advanced_dataset_with_5k_sarcasm.csv"
data = pd.read_csv(file_path)

In [None]:
# Load the sarcasm detection model
sarcasm_detector = pipeline("text-classification", model="dnzblgn/Sarcasm-Detection-Customer-Reviews", device=-1)


Device set to use cpu


In [None]:
# Features (Text) and Target (Sentiment)
X = data['cleaned_text']
y = data['Sentiment']

In [None]:
# Convert data to Hugging Face Dataset for batch processing
dataset = Dataset.from_pandas(pd.DataFrame({'text': X}))

In [None]:
import torch
torch.cuda.empty_cache()


In [None]:
# Process sarcasm detection in batches for GPU efficiency
results = sarcasm_detector(dataset['text'], batch_size=2)  # Adjust batch size based on memory


In [None]:
# Extract labels and map them
label_mapping = {'LABEL_0': 0, 'LABEL_1': 1}  # 0 = nonsarcasm, 1 = sarcasm
data['Sarcasm_Flag'] = [label_mapping[result['label']] for result in results]


In [None]:
# Split data
X_train, X_test, y_train, y_test, sarcasm_train, sarcasm_test = train_test_split(
    X, y, data['Sarcasm_Flag'], test_size=0.1, random_state=42
)


In [None]:
# TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(sublinear_tf=True,smooth_idf=True , max_features=28000, ngram_range=(1, 3), max_df=0.90, min_df=3)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)


In [None]:
# Convert sarcasm flag to sparse matrix
sarcasm_train_sparse = csr_matrix(sarcasm_train.values.reshape(-1, 1))
sarcasm_test_sparse = csr_matrix(sarcasm_test.values.reshape(-1, 1))


In [None]:
# Combine TF-IDF features with sarcasm flag
X_train_combined = hstack([X_train_tfidf, sarcasm_train_sparse])
X_test_combined = hstack([X_test_tfidf, sarcasm_test_sparse])

In [None]:
from optuna.pruners import MedianPruner


# Number of trials
n_trials = 10

# Ensure training data exists
if X_train_combined is None or y_train is None:
    raise ValueError("❌ Error: Training data is missing!")

def objective(trial):
    print(f"\n🔵 Starting Trial {trial.number + 1}/{n_trials}")

    # **Optimized Search Space**
    C_svm = trial.suggest_loguniform("C_svm", 1e-2, 1e1)   # More compact range
    C_lr = trial.suggest_loguniform("C_lr", 1e-2, 1)       # Keeps it reasonable
    alpha_nb = trial.suggest_float("alpha_nb", 1e-2, 1, log=True)

    print(f"🔹 Hyperparams - C_svm: {C_svm:.3f}, C_lr: {C_lr:.3f}, alpha_nb: {alpha_nb:.3f}")

    # Define models
    clf_svm = SVC(C=C_svm, kernel="rbf", probability=True)
    clf_lr = LogisticRegression(C=C_lr, solver="liblinear")
    clf_nb = MultinomialNB(alpha=alpha_nb)

    # Voting Classifier
    voting_clf = VotingClassifier(
        estimators=[("svm", clf_svm), ("lr", clf_lr), ("nb", clf_nb)],
        voting="soft"
    )

    # **Faster Cross-Validation (cv=2)**
    scores = cross_val_score(voting_clf, X_train_combined, y_train, cv=2, scoring="accuracy", n_jobs=-1)

    mean_acc = scores.mean()
    print(f"✅ Trial {trial.number + 1}/{n_trials} - Accuracy: {mean_acc:.4f}")

    return mean_acc

# **Aggressive Pruning for Speed**
study = optuna.create_study(direction="maximize", pruner=MedianPruner(n_startup_trials=2, n_warmup_steps=1))
study.optimize(objective, n_trials=n_trials)

# Show best results
print("\n🎯 Best Trial Found:")
print(f"⭐ Accuracy: {study.best_trial.value:.4f}")
print(f"🏆 Best Hyperparameters: {study.best_trial.params}")


[I 2025-04-01 14:32:54,462] A new study created in memory with name: no-name-f9090098-1aeb-4663-ab97-d0adb62f2b53
  C_svm = trial.suggest_loguniform("C_svm", 1e-2, 1e1)   # More compact range
  C_lr = trial.suggest_loguniform("C_lr", 1e-2, 1)       # Keeps it reasonable



🔵 Starting Trial 1/10
🔹 Hyperparams - C_svm: 0.043, C_lr: 0.017, alpha_nb: 0.054


[I 2025-04-01 14:50:20,428] Trial 0 finished with value: 0.720286014300715 and parameters: {'C_svm': 0.04267477133241278, 'C_lr': 0.01718157564076153, 'alpha_nb': 0.053701245506332276}. Best is trial 0 with value: 0.720286014300715.


✅ Trial 1/10 - Accuracy: 0.7203

🔵 Starting Trial 2/10
🔹 Hyperparams - C_svm: 0.032, C_lr: 0.059, alpha_nb: 0.823


  C_svm = trial.suggest_loguniform("C_svm", 1e-2, 1e1)   # More compact range
  C_lr = trial.suggest_loguniform("C_lr", 1e-2, 1)       # Keeps it reasonable
[I 2025-04-01 15:07:43,772] Trial 1 finished with value: 0.7214110705535277 and parameters: {'C_svm': 0.03232670980640546, 'C_lr': 0.058892042537674716, 'alpha_nb': 0.8226629452060383}. Best is trial 1 with value: 0.7214110705535277.


✅ Trial 2/10 - Accuracy: 0.7214

🔵 Starting Trial 3/10
🔹 Hyperparams - C_svm: 0.114, C_lr: 0.020, alpha_nb: 0.043


  C_svm = trial.suggest_loguniform("C_svm", 1e-2, 1e1)   # More compact range
  C_lr = trial.suggest_loguniform("C_lr", 1e-2, 1)       # Keeps it reasonable
[I 2025-04-01 15:24:31,268] Trial 2 finished with value: 0.7232111605580278 and parameters: {'C_svm': 0.11442420431308933, 'C_lr': 0.02006783038715579, 'alpha_nb': 0.04341595897006368}. Best is trial 2 with value: 0.7232111605580278.


✅ Trial 3/10 - Accuracy: 0.7232

🔵 Starting Trial 4/10
🔹 Hyperparams - C_svm: 1.844, C_lr: 0.053, alpha_nb: 0.153


  C_svm = trial.suggest_loguniform("C_svm", 1e-2, 1e1)   # More compact range
  C_lr = trial.suggest_loguniform("C_lr", 1e-2, 1)       # Keeps it reasonable
[I 2025-04-01 15:58:46,404] Trial 3 finished with value: 0.7531376568828441 and parameters: {'C_svm': 1.8444223425586244, 'C_lr': 0.052603806017407155, 'alpha_nb': 0.15326344811148027}. Best is trial 3 with value: 0.7531376568828441.


✅ Trial 4/10 - Accuracy: 0.7531

🔵 Starting Trial 5/10
🔹 Hyperparams - C_svm: 0.018, C_lr: 0.415, alpha_nb: 0.047


  C_svm = trial.suggest_loguniform("C_svm", 1e-2, 1e1)   # More compact range
  C_lr = trial.suggest_loguniform("C_lr", 1e-2, 1)       # Keeps it reasonable
[I 2025-04-01 16:16:18,799] Trial 4 finished with value: 0.7327866393319666 and parameters: {'C_svm': 0.018155884714300415, 'C_lr': 0.41515649235484126, 'alpha_nb': 0.04654226582863611}. Best is trial 3 with value: 0.7531376568828441.


✅ Trial 5/10 - Accuracy: 0.7328

🔵 Starting Trial 6/10
🔹 Hyperparams - C_svm: 0.088, C_lr: 0.042, alpha_nb: 0.857


  C_svm = trial.suggest_loguniform("C_svm", 1e-2, 1e1)   # More compact range
  C_lr = trial.suggest_loguniform("C_lr", 1e-2, 1)       # Keeps it reasonable
[I 2025-04-01 16:32:51,729] Trial 5 finished with value: 0.7137356867843392 and parameters: {'C_svm': 0.08816740194563699, 'C_lr': 0.042392279121070374, 'alpha_nb': 0.8565387869843096}. Best is trial 3 with value: 0.7531376568828441.


✅ Trial 6/10 - Accuracy: 0.7137

🔵 Starting Trial 7/10
🔹 Hyperparams - C_svm: 4.176, C_lr: 0.015, alpha_nb: 0.011


  C_svm = trial.suggest_loguniform("C_svm", 1e-2, 1e1)   # More compact range
  C_lr = trial.suggest_loguniform("C_lr", 1e-2, 1)       # Keeps it reasonable
[I 2025-04-01 17:08:43,414] Trial 6 finished with value: 0.7457622881144057 and parameters: {'C_svm': 4.176069304187696, 'C_lr': 0.014823670314162226, 'alpha_nb': 0.010712416801041031}. Best is trial 3 with value: 0.7531376568828441.


✅ Trial 7/10 - Accuracy: 0.7458

🔵 Starting Trial 8/10
🔹 Hyperparams - C_svm: 0.106, C_lr: 0.071, alpha_nb: 0.675


  C_svm = trial.suggest_loguniform("C_svm", 1e-2, 1e1)   # More compact range
  C_lr = trial.suggest_loguniform("C_lr", 1e-2, 1)       # Keeps it reasonable
[I 2025-04-01 17:25:17,640] Trial 7 finished with value: 0.7160358017900894 and parameters: {'C_svm': 0.10566773525155793, 'C_lr': 0.0714286608723542, 'alpha_nb': 0.6745899708808794}. Best is trial 3 with value: 0.7531376568828441.


✅ Trial 8/10 - Accuracy: 0.7160

🔵 Starting Trial 9/10
🔹 Hyperparams - C_svm: 0.116, C_lr: 0.013, alpha_nb: 0.026


  C_svm = trial.suggest_loguniform("C_svm", 1e-2, 1e1)   # More compact range
  C_lr = trial.suggest_loguniform("C_lr", 1e-2, 1)       # Keeps it reasonable
[I 2025-04-01 17:41:40,995] Trial 8 finished with value: 0.7221611080554028 and parameters: {'C_svm': 0.11593797647289757, 'C_lr': 0.01263765185388339, 'alpha_nb': 0.025876411200658332}. Best is trial 3 with value: 0.7531376568828441.


✅ Trial 9/10 - Accuracy: 0.7222

🔵 Starting Trial 10/10
🔹 Hyperparams - C_svm: 0.077, C_lr: 0.129, alpha_nb: 0.025


  C_svm = trial.suggest_loguniform("C_svm", 1e-2, 1e1)   # More compact range
  C_lr = trial.suggest_loguniform("C_lr", 1e-2, 1)       # Keeps it reasonable
[I 2025-04-01 17:59:06,782] Trial 9 finished with value: 0.7219360968048403 and parameters: {'C_svm': 0.07662861487686683, 'C_lr': 0.12902086073755784, 'alpha_nb': 0.025173072240344086}. Best is trial 3 with value: 0.7531376568828441.


✅ Trial 10/10 - Accuracy: 0.7219

🎯 Best Trial Found:
⭐ Accuracy: 0.7531
🏆 Best Hyperparameters: {'C_svm': 1.8444223425586244, 'C_lr': 0.052603806017407155, 'alpha_nb': 0.15326344811148027}


In [None]:
# Create and run the Optuna study
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)  # You can increase n_trials for better tuning


[I 2025-03-31 13:26:45,151] A new study created in memory with name: no-name-a9d44c78-8e02-4a20-a1b9-c8d6313dd33e
  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)



🔵 Starting Trial: 1
🔹 Hyperparameters - C_svm: 7.62560, C_lr: 94.49656, alpha_nb: 0.25609


[I 2025-03-31 14:45:03,384] Trial 0 finished with value: 0.7554878040867913 and parameters: {'C_svm': 7.625604008699433, 'C_lr': 94.4965640323167, 'alpha_nb': 0.2560863611859261}. Best is trial 0 with value: 0.7554878040867913.


✅ Trial 1/15 - Accuracy: 0.7555

🔵 Starting Trial: 2
🔹 Hyperparameters - C_svm: 0.07971, C_lr: 0.90715, alpha_nb: 0.00602


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-03-31 15:16:58,439] Trial 1 finished with value: 0.7369369108965197 and parameters: {'C_svm': 0.07971186473897689, 'C_lr': 0.907147398782921, 'alpha_nb': 0.00601994784526749}. Best is trial 0 with value: 0.7554878040867913.


✅ Trial 2/15 - Accuracy: 0.7369

🔵 Starting Trial: 3
🔹 Hyperparameters - C_svm: 0.14097, C_lr: 0.02169, alpha_nb: 0.01378


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-03-31 15:47:32,804] Trial 2 finished with value: 0.7323366683777056 and parameters: {'C_svm': 0.14096660919307424, 'C_lr': 0.02169294457942429, 'alpha_nb': 0.013776809163710145}. Best is trial 0 with value: 0.7554878040867913.


✅ Trial 3/15 - Accuracy: 0.7323

🔵 Starting Trial: 4
🔹 Hyperparameters - C_svm: 42.78167, C_lr: 39.72915, alpha_nb: 0.12837


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-03-31 17:06:43,384] Trial 3 finished with value: 0.7581879353464483 and parameters: {'C_svm': 42.78167015175765, 'C_lr': 39.72914843296738, 'alpha_nb': 0.12836953327795256}. Best is trial 3 with value: 0.7581879353464483.


✅ Trial 4/15 - Accuracy: 0.7582

🔵 Starting Trial: 5
🔹 Hyperparameters - C_svm: 0.09832, C_lr: 7.37286, alpha_nb: 0.14729


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-03-31 17:37:52,611] Trial 4 finished with value: 0.748537472812758 and parameters: {'C_svm': 0.09832304542825943, 'C_lr': 7.372860154162669, 'alpha_nb': 0.1472926140699237}. Best is trial 3 with value: 0.7581879353464483.


✅ Trial 5/15 - Accuracy: 0.7485

🔵 Starting Trial: 6
🔹 Hyperparameters - C_svm: 6.63976, C_lr: 0.16187, alpha_nb: 0.07355


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-03-31 18:56:39,912] Trial 5 finished with value: 0.7657132884967789 and parameters: {'C_svm': 6.639757428296832, 'C_lr': 0.1618717596752821, 'alpha_nb': 0.07355355351468403}. Best is trial 5 with value: 0.7657132884967789.


✅ Trial 6/15 - Accuracy: 0.7657

🔵 Starting Trial: 7
🔹 Hyperparameters - C_svm: 14.01328, C_lr: 0.03959, alpha_nb: 0.99606


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-03-31 20:15:07,494] Trial 6 finished with value: 0.7671883159965288 and parameters: {'C_svm': 14.01328339236906, 'C_lr': 0.03958847546119499, 'alpha_nb': 0.9960562615133344}. Best is trial 6 with value: 0.7671883159965288.


✅ Trial 7/15 - Accuracy: 0.7672

🔵 Starting Trial: 8
🔹 Hyperparameters - C_svm: 143.95390, C_lr: 47.19160, alpha_nb: 0.00258


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-03-31 21:33:33,313] Trial 7 finished with value: 0.7558878365898541 and parameters: {'C_svm': 143.9539015162809, 'C_lr': 47.19160344360181, 'alpha_nb': 0.0025771643525862977}. Best is trial 6 with value: 0.7671883159965288.


✅ Trial 8/15 - Accuracy: 0.7559

🔵 Starting Trial: 9
🔹 Hyperparameters - C_svm: 0.47654, C_lr: 0.01865, alpha_nb: 0.00151


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-03-31 22:02:39,824] Trial 8 finished with value: 0.7459123171748661 and parameters: {'C_svm': 0.47653867907665054, 'C_lr': 0.018651088216788816, 'alpha_nb': 0.0015097454234713477}. Best is trial 6 with value: 0.7671883159965288.


✅ Trial 9/15 - Accuracy: 0.7459

🔵 Starting Trial: 10
🔹 Hyperparameters - C_svm: 0.00398, C_lr: 69.43612, alpha_nb: 0.35776


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-03-31 22:37:45,433] Trial 9 finished with value: 0.7464873821815542 and parameters: {'C_svm': 0.0039821820748989125, 'C_lr': 69.43612053891377, 'alpha_nb': 0.35776074087420506}. Best is trial 6 with value: 0.7671883159965288.


✅ Trial 10/15 - Accuracy: 0.7465


In [None]:
study.optimize(objective, n_trials=20)


🔵 Starting Trial: 13
🔹 Hyperparameters - C_svm: 413.02871, C_lr: 0.00287, alpha_nb: 0.79532


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 00:14:26,775] Trial 12 finished with value: 0.7677383278716382 and parameters: {'C_svm': 413.02871134494916, 'C_lr': 0.002865484081900127, 'alpha_nb': 0.7953224994311736}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 13/15 - Accuracy: 0.7677

🔵 Starting Trial: 14
🔹 Hyperparameters - C_svm: 300.92790, C_lr: 0.00127, alpha_nb: 0.71554


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 01:30:24,915] Trial 13 finished with value: 0.7673383084952161 and parameters: {'C_svm': 300.92789632951883, 'C_lr': 0.0012734415675683015, 'alpha_nb': 0.7155439102319726}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 14/15 - Accuracy: 0.7673

🔵 Starting Trial: 15
🔹 Hyperparameters - C_svm: 928.02350, C_lr: 0.00278, alpha_nb: 0.64878


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 02:45:16,808] Trial 14 finished with value: 0.7675383247467474 and parameters: {'C_svm': 928.0235005359342, 'C_lr': 0.002776099073564033, 'alpha_nb': 0.6487752492788301}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 15/15 - Accuracy: 0.7675

🔵 Starting Trial: 16
🔹 Hyperparameters - C_svm: 999.08475, C_lr: 0.00141, alpha_nb: 0.05106


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 03:58:38,460] Trial 15 finished with value: 0.7636631734875281 and parameters: {'C_svm': 999.0847504111387, 'C_lr': 0.0014146530302395644, 'alpha_nb': 0.05106336384332545}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 16/15 - Accuracy: 0.7637

🔵 Starting Trial: 17
🔹 Hyperparameters - C_svm: 849.51191, C_lr: 873.30734, alpha_nb: 0.02257


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 05:12:05,902] Trial 16 finished with value: 0.7494625090649768 and parameters: {'C_svm': 849.5119129785137, 'C_lr': 873.3073396497401, 'alpha_nb': 0.022572709003822907}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 17/15 - Accuracy: 0.7495

🔵 Starting Trial: 18
🔹 Hyperparameters - C_svm: 73.56685, C_lr: 0.00756, alpha_nb: 0.51060


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 06:25:27,508] Trial 17 finished with value: 0.7669633234980289 and parameters: {'C_svm': 73.56685252092461, 'C_lr': 0.007560324055342973, 'alpha_nb': 0.510597081086596}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 18/15 - Accuracy: 0.7670

🔵 Starting Trial: 19
🔹 Hyperparameters - C_svm: 1.47108, C_lr: 0.22909, alpha_nb: 0.16373


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 07:29:13,482] Trial 18 finished with value: 0.7648882078637625 and parameters: {'C_svm': 1.471078854121757, 'C_lr': 0.22908517048323906, 'alpha_nb': 0.16373291212322183}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 19/15 - Accuracy: 0.7649

🔵 Starting Trial: 20
🔹 Hyperparameters - C_svm: 0.00272, C_lr: 0.00192, alpha_nb: 0.97613


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 08:01:07,222] Trial 19 finished with value: 0.7297115671216113 and parameters: {'C_svm': 0.002718239160532125, 'C_lr': 0.0019176059190796403, 'alpha_nb': 0.9761342191775851}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 20/15 - Accuracy: 0.7297

🔵 Starting Trial: 21
🔹 Hyperparameters - C_svm: 33.25919, C_lr: 0.00690, alpha_nb: 0.34361


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 09:15:21,813] Trial 20 finished with value: 0.7673383422494352 and parameters: {'C_svm': 33.25918939329072, 'C_lr': 0.006896547582996673, 'alpha_nb': 0.3436095664770883}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 21/15 - Accuracy: 0.7673

🔵 Starting Trial: 22
🔹 Hyperparameters - C_svm: 151.48326, C_lr: 0.09517, alpha_nb: 0.06789


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 10:30:48,188] Trial 21 finished with value: 0.7651882678705131 and parameters: {'C_svm': 151.4832599873041, 'C_lr': 0.09516809400309163, 'alpha_nb': 0.06788749797548145}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 22/15 - Accuracy: 0.7652

🔵 Starting Trial: 23
🔹 Hyperparameters - C_svm: 2.10189, C_lr: 0.91048, alpha_nb: 0.03158


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 11:46:04,508] Trial 22 finished with value: 0.7655382747454974 and parameters: {'C_svm': 2.101887864375262, 'C_lr': 0.910479860443332, 'alpha_nb': 0.03158167684754257}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 23/15 - Accuracy: 0.7655

🔵 Starting Trial: 24
🔹 Hyperparameters - C_svm: 32.11939, C_lr: 0.00404, alpha_nb: 0.30302


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 13:01:49,055] Trial 23 finished with value: 0.7669133128718256 and parameters: {'C_svm': 32.11939053010379, 'C_lr': 0.004043503055645757, 'alpha_nb': 0.3030216739530543}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 24/15 - Accuracy: 0.7669

🔵 Starting Trial: 25
🔹 Hyperparameters - C_svm: 387.44279, C_lr: 0.00823, alpha_nb: 0.47118


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[I 2025-04-01 14:17:36,379] Trial 24 finished with value: 0.7670383272483102 and parameters: {'C_svm': 387.44279477758454, 'C_lr': 0.008230349480916655, 'alpha_nb': 0.4711822941571972}. Best is trial 12 with value: 0.7677383278716382.


✅ Trial 25/15 - Accuracy: 0.7670

🔵 Starting Trial: 26
🔹 Hyperparameters - C_svm: 292.09296, C_lr: 0.00625, alpha_nb: 0.54449


  C_svm = trial.suggest_loguniform("C_svm", 1e-3, 1e3)
  C_lr = trial.suggest_loguniform("C_lr", 1e-3, 1e3)
  alpha_nb = trial.suggest_loguniform("alpha_nb", 1e-3, 1)
[W 2025-04-01 14:30:58,107] Trial 25 failed with parameters: {'C_svm': 292.09296153793, 'C_lr': 0.006247110709861469, 'alpha_nb': 0.5444932559221406} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "C:\Users\rohan\AppData\Local\Programs\Python\Python313\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\rohan\AppData\Local\Temp\ipykernel_19940\1142412665.py", line 35, in objective
    scores = cross_val_score(voting_clf, X_train_combined, y_train, cv=3, scoring="accuracy", n_jobs=-1)
  File "C:\Users\rohan\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\utils\_param_validation.py", line 213, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\rohan\AppData\Local\Programs\Python\Py

KeyboardInterrupt: 

In [None]:
print("\n🎯 Best Parameters Found by Optuna:", study.best_params)


🎯 Best Parameters Found by Optuna: {'C_svm': 1.8444223425586244, 'C_lr': 0.052603806017407155, 'alpha_nb': 0.15326344811148027}


In [None]:
# Manually set best hyperparameters
C_svm = 413.02871
C_lr = 0.00287
alpha_nb = 0.79532


# Train the final Voting Classifier using the best parameters
final_voting_clf = VotingClassifier(
    estimators=[
        ("svm", SVC(C=413.02871, kernel="rbf", probability=True)),
        ("lr", LogisticRegression(C=0.00287, solver="liblinear", n_jobs=-1)),
        ("nb", MultinomialNB(alpha=0.79532))
    ],
    voting="soft"
)

In [None]:
# Train the final model
final_voting_clf.fit(X_train_combined, y_train)



In [None]:
# Evaluate accuracy on test set
accuracy = final_voting_clf.score(X_test_combined, y_test)
print("Final Model Accuracy:", accuracy)

# Predict labels for the test set
y_pred = final_voting_clf.predict(X_test_combined)

# Print classification report
print("📊 Classification Report:")
print(classification_report(y_test, y_pred))

Final Model Accuracy: 0.8066
📊 Classification Report:
              precision    recall  f1-score   support

    negative       0.81      0.82      0.82      1706
     neutral       0.76      0.75      0.75      1666
    positive       0.85      0.86      0.85      1628

    accuracy                           0.81      5000
   macro avg       0.81      0.81      0.81      5000
weighted avg       0.81      0.81      0.81      5000

