In [1]:
# === 1. Import Required Libraries ===
import pandas as pd
import numpy as np
import warnings

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score
)
from imblearn.over_sampling import SMOTE
from sklearn.svm import SVC

# Suppress warnings from precision_score when a model makes no positive predictions (common in early epochs)
warnings.filterwarnings('ignore', category=UserWarning)

# === 2. Load Dataset ===
# Provide your path here
try:
    combined_dataset = pd.read_csv("/Users/administrator/Desktop/All desktop-data/Prof-Irshad Papers/2025-Papers/10K-Paper/Crop-IoT-Paper/IoT-Journal-Submission/Manuscript-R2/Code-For-Github/Combined_Data.csv")
except FileNotFoundError:
    print("Error: CSV file not found. Using a dummy dataframe for demonstration.")
    data = np.random.rand(5000, 9)
    columns = ['N', 'P', 'K', 'Temp', 'H', 'pH', 'R', 'WL', 'CH']
    combined_dataset = pd.DataFrame(data, columns=columns)
    combined_dataset['CH'] = np.random.randint(0, 2, 5000)

# === 3. Prepare Features and Labels ===
features = ['N', 'P', 'K', 'Temp', 'H', 'pH', 'R', 'WL']
X = combined_dataset[features]
y = combined_dataset['CH']

# === 4. Train-Test Split ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# === 5. Scale Features ===
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === 6. Balance the TRAINING Dataset with SMOTE ===
print("Balancing the training data with SMOTE...")
sm = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = sm.fit_resample(X_train_scaled, y_train)
print("Balancing complete.")

# === 7. Run Simulation for SVM Model ===
EPOCHS = 163
acc_list, prec_list, rec_list, f1_list, auc_list = [], [], [], [], []

# Get the total number of samples in the balanced training data
n_samples = len(X_train_resampled)
print(f"\nStarting SVM simulation for {EPOCHS} epochs...")
print("-" * 60)

for epoch in range(1, EPOCHS + 1):
    # Calculate the size of the data subset for the current epoch
    subset_size = int(n_samples * (epoch / EPOCHS))
    subset_size = max(2, subset_size) # Ensure at least 2 samples for fitting
    
    # Take a subset of the training data
    X_subset = X_train_resampled[:subset_size]
    y_subset = y_train_resampled[:subset_size]
    
    # Skip epoch if the small subset doesn't contain both classes
    if len(np.unique(y_subset)) < 2:
        print(f"Epoch {epoch:03d}/{EPOCHS}: Skipped (not enough class diversity in small data subset)")
        # Append last known values or defaults to keep list lengths consistent
        acc_list.append(acc_list[-1] if acc_list else 0.5)
        prec_list.append(prec_list[-1] if prec_list else 0)
        rec_list.append(rec_list[-1] if rec_list else 0)
        f1_list.append(f1_list[-1] if f1_list else 0)
        auc_list.append(auc_list[-1] if auc_list else 0.5)
        continue

    # --- Define and Train the SVM Model on the current subset ---
    svm_model = SVC(kernel='linear', probability=True, C=0.5, random_state=42)
    svm_model.fit(X_subset, y_subset)

    # --- Evaluate on the FULL, UNSEEN test set ---
    svm_probs = svm_model.predict_proba(X_test_scaled)[:, 1]
    y_pred = (svm_probs >= 0.5).astype(int)

    # --- Calculate and Store Metrics ---
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    auc = roc_auc_score(y_test, svm_probs) # Use probabilities for AUC

    # Store metrics
    acc_list.append(acc)
    prec_list.append(prec)
    rec_list.append(rec)
    f1_list.append(f1)
    auc_list.append(auc)

    # Print performance for the current epoch
    print(f"Epoch {epoch:03d}/{EPOCHS} -> "
          f"Accuracy: {acc*100:.2f}%, Precision: {prec*100:.2f}%, "
          f"Recall: {rec*100:.2f}%, F1: {f1*100:.2f}%, AUC: {auc*100:.2f}%")


# === 8. Store History in a DataFrame for Later Use ===
metrics_df = pd.DataFrame({
    'Epoch': range(1, EPOCHS + 1),
    'Accuracy': acc_list,
    'Precision': prec_list,
    'Recall': rec_list,
    'F1Score': f1_list,
    'AUC': auc_list
})

print("\n" + "="*40)
print("Simulation Complete.")
print(f"Final performance at Epoch {EPOCHS}:")
# Display the last row of the DataFrame for the final results
print(metrics_df.iloc[-1].to_string())
print("="*40)



Balancing the training data with SMOTE...
Balancing complete.

Starting SVM simulation for 163 epochs...
------------------------------------------------------------
Epoch 001/163: Skipped (not enough class diversity in small data subset)
Epoch 002/163: Skipped (not enough class diversity in small data subset)
Epoch 003/163: Skipped (not enough class diversity in small data subset)
Epoch 004/163: Skipped (not enough class diversity in small data subset)
Epoch 005/163: Skipped (not enough class diversity in small data subset)
Epoch 006/163: Skipped (not enough class diversity in small data subset)
Epoch 007/163: Skipped (not enough class diversity in small data subset)
Epoch 008/163: Skipped (not enough class diversity in small data subset)
Epoch 009/163: Skipped (not enough class diversity in small data subset)
Epoch 010/163: Skipped (not enough class diversity in small data subset)
Epoch 011/163: Skipped (not enough class diversity in small data subset)
Epoch 012/163: Skipped (not eno

Epoch 092/163 -> Accuracy: 96.82%, Precision: 7.14%, Recall: 50.00%, F1: 12.50%, AUC: 81.28%
Epoch 093/163 -> Accuracy: 96.82%, Precision: 7.14%, Recall: 50.00%, F1: 12.50%, AUC: 81.85%
Epoch 094/163 -> Accuracy: 96.82%, Precision: 7.14%, Recall: 50.00%, F1: 12.50%, AUC: 81.62%
Epoch 095/163 -> Accuracy: 96.82%, Precision: 7.14%, Recall: 50.00%, F1: 12.50%, AUC: 81.85%
Epoch 096/163 -> Accuracy: 96.82%, Precision: 7.14%, Recall: 50.00%, F1: 12.50%, AUC: 81.96%
Epoch 097/163 -> Accuracy: 96.82%, Precision: 7.14%, Recall: 50.00%, F1: 12.50%, AUC: 81.96%
Epoch 098/163 -> Accuracy: 96.82%, Precision: 7.14%, Recall: 50.00%, F1: 12.50%, AUC: 82.31%
Epoch 099/163 -> Accuracy: 96.36%, Precision: 6.25%, Recall: 50.00%, F1: 11.11%, AUC: 82.42%
Epoch 100/163 -> Accuracy: 96.36%, Precision: 6.25%, Recall: 50.00%, F1: 11.11%, AUC: 82.31%
Epoch 101/163 -> Accuracy: 96.36%, Precision: 6.25%, Recall: 50.00%, F1: 11.11%, AUC: 82.42%
Epoch 102/163 -> Accuracy: 96.14%, Precision: 5.88%, Recall: 50.00%, F