In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split, StratifiedKFold, LeaveOneOut, KFold, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LinearRegression
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.applications import MobileNet
from sklearn.preprocessing import StandardScaler

# Load and preprocess data
df = pd.read_csv("/content/lung cancer.csv")
df['GENDER'] = df['GENDER'].replace({'M':0, 'F':1})
df['LUNG_CANCER'] = df['LUNG_CANCER'].replace({'YES':1, 'NO':0})
x = df.drop('LUNG_CANCER', axis=1)
y = df['LUNG_CANCER']

# Verify dataset shape
print(f"Dataset shape: {df.shape}")
print(f"Features shape: {x.shape}")

# Split data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print(f"x_train: {x_train.shape}, x_test: {x_test.shape}, y_train: {y_train.shape}, y_test: {y_test.shape}")

# Scale features
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)
print(f"x_train_scaled: {x_train_scaled.shape}, x_test_scaled: {x_test_scaled.shape}")

# Prepare data for CNN/MobileNet
n_features = x_train_scaled.shape[1]
if n_features != 16:
    print(f"Warning: Expected 16 features, got {n_features}. Adjusting reshape.")
    # Pad or truncate to 16 features if necessary
    if n_features < 16:
        x_train_scaled = np.pad(x_train_scaled, ((0, 0), (0, 16 - n_features)), mode='constant')
        x_test_scaled = np.pad(x_test_scaled, ((0, 0), (0, 16 - n_features)), mode='constant')
    else:
        x_train_scaled = x_train_scaled[:, :16]
        x_test_scaled = x_test_scaled[:, :16]
    n_features = 16

# Reshape for CNN: 16 features -> 4x4x1
x_train_cnn = x_train_scaled.reshape(-1, 4, 4, 1)
x_test_cnn = x_test_scaled.reshape(-1, 4, 4, 1)
print(f"x_train_cnn: {x_train_cnn.shape}, x_test_cnn: {x_test_cnn.shape}")

# Reshape for MobileNet: Upscale to 224x224x3
x_train_mn = np.repeat(x_train_cnn, 56, axis=1)  # 4 -> 224
x_train_mn = np.repeat(x_train_mn, 56, axis=2)
x_train_mn = np.repeat(x_train_mn, 3, axis=3)  # 1 -> 3 channels
x_test_mn = np.repeat(x_test_cnn, 56, axis=1)
x_test_mn = np.repeat(x_test_mn, 56, axis=2)
x_test_mn = np.repeat(x_test_mn, 3, axis=3)
print(f"x_train_mn: {x_train_mn.shape}, x_test_mn: {x_test_mn.shape}")

np.random.seed(32)

# Initialize lists
models = ['GaussianNB', 'SVC', 'Logistic Regression', 'Decision Tree',
          'Random Forest', 'Gradient Boosting', 'XGBoost', 'Linear Regression', 'CNN', 'MobileNet']
test_accuracies = []
train_accuracies = []
skfold_means = []
loocv_means = []
kf_means = []
training_times = []
y_pred_list = []

# CNN training function
def train_cnn(x_train, y_train, x_test, y_test):
    model = Sequential([
        Conv2D(16, (2, 2), activation='relu', input_shape=(4, 4, 1)),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(32, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=20, batch_size=16, validation_data=(x_test, y_test), verbose=0)
    training_time = time.time() - start_time
    train_acc = history.history['accuracy'][-1]
    test_acc = model.evaluate(x_test, y_test)[1]
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    return model, test_acc, train_acc, training_time, y_pred

# MobileNet training function
def train_mobilenet(x_train, y_train, x_test, y_test):
    base_model = MobileNet(weights=None, include_top=False, input_shape=(224, 224, 3))
    model = Sequential([
        base_model,
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=10, batch_size=16, validation_data=(x_test, y_test), verbose=0)
    training_time = time.time() - start_time
    train_acc = history.history['accuracy'][-1]
    test_acc = model.evaluate(x_test, y_test)[1]
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    return model, test_acc, train_acc, training_time, y_pred

# Train models
# GaussianNB
model_g = GaussianNB()
start_time = time.time()
model_g.fit(x_train_scaled, y_train)
training_times.append(time.time() - start_time)
train_accuracies.append(model_g.score(x_train_scaled, y_train))
test_accuracies.append(model_g.score(x_test_scaled, y_test))
y_pred = model_g.predict(x_test_scaled)
y_pred_list.append(y_pred)
skf = StratifiedKFold(n_splits=5)
skfold_score_g = cross_val_score(model_g, x, y, cv=skf)
skfold_means.append(skfold_score_g.mean())
loocv = LeaveOneOut()
loocv_score_g = cross_val_score(model_g, x, y, cv=loocv)
loocv_means.append(loocv_score_g.mean())
kf = KFold(n_splits=5)
k_fold_score_g = cross_val_score(model_g, x, y, cv=kf)
kf_means.append(k_fold_score_g.mean())
print(f"GaussianNB: Train Acc: {train_accuracies[-1]:.4f}, Test Acc: {test_accuracies[-1]:.4f}, Time: {training_times[-1]:.2f}s")

# SVC
model2 = SVC(probability=True)
start_time = time.time()
model2.fit(x_train_scaled, y_train)
training_times.append(time.time() - start_time)
train_accuracies.append(model2.score(x_train_scaled, y_train))
test_accuracies.append(model2.score(x_test_scaled, y_test))
y_pred2 = model2.predict(x_test_scaled)
y_pred_list.append(y_pred2)
skf = StratifiedKFold(n_splits=5)
skfold_score = cross_val_score(model2, x, y, cv=skf)
skfold_means.append(skfold_score.mean())
loocv = LeaveOneOut()
loocv_score = cross_val_score(model2, x, y, cv=loocv)
loocv_means.append(loocv_score.mean())
kf = KFold(n_splits=5)
score = cross_val_score(model2, x, y, cv=kf)
kf_means.append(score.mean())
print(f"SVC: Train Acc: {train_accuracies[-1]:.4f}, Test Acc: {test_accuracies[-1]:.4f}, Time: {training_times[-1]:.2f}s")

# Logistic Regression
model_lr = LogisticRegression()
start_time = time.time()
model_lr.fit(x_train_scaled, y_train)
training_times.append(time.time() - start_time)
train_accuracies.append(model_lr.score(x_train_scaled, y_train))
test_accuracies.append(model_lr.score(x_test_scaled, y_test))
y_pred_lr = model_lr.predict(x_test_scaled)
y_pred_list.append(y_pred_lr)
skf = StratifiedKFold(n_splits=5)
skfold_score_lr = cross_val_score(model_lr, x, y, cv=skf)
skfold_means.append(skfold_score_lr.mean())
loocv = LeaveOneOut()
loocv_score_lr = cross_val_score(model_lr, x, y, cv=loocv)
loocv_means.append(loocv_score_lr.mean())
kf = KFold(n_splits=5)
kf_score_lr = cross_val_score(model_lr, x, y, cv=kf)
kf_means.append(kf_score_lr.mean())
print(f"Logistic Regression: Train Acc: {train_accuracies[-1]:.4f}, Test Acc: {test_accuracies[-1]:.4f}, Time: {training_times[-1]:.2f}s")

# Decision Tree
model_dt = DecisionTreeClassifier()
start_time = time.time()
model_dt.fit(x_train_scaled, y_train)
training_times.append(time.time() - start_time)
train_accuracies.append(model_dt.score(x_train_scaled, y_train))
test_accuracies.append(model_dt.score(x_test_scaled, y_test))
y_pred_dt = model_dt.predict(x_test_scaled)
y_pred_list.append(y_pred_dt)
skf = StratifiedKFold(n_splits=5)
skfold_score_dt = cross_val_score(model_dt, x, y, cv=skf)
skfold_means.append(skfold_score_dt.mean())
loocv = LeaveOneOut()
loocv_score_dt = cross_val_score(model_dt, x, y, cv=loocv)
loocv_means.append(loocv_score_dt.mean())
kf = KFold(n_splits=5)
kf_score_dt = cross_val_score(model_dt, x, y, cv=kf)
kf_means.append(kf_score_dt.mean())
print(f"Decision Tree: Train Acc: {train_accuracies[-1]:.4f}, Test Acc: {test_accuracies[-1]:.4f}, Time: {training_times[-1]:.2f}s")

# Random Forest
model_rf = RandomForestClassifier()
start_time = time.time()
model_rf.fit(x_train_scaled, y_train)
training_times.append(time.time() - start_time)
train_accuracies.append(model_rf.score(x_train_scaled, y_train))
test_accuracies.append(model_rf.score(x_test_scaled, y_test))
y_pred_rf = model_rf.predict(x_test_scaled)
y_pred_list.append(y_pred_rf)
skf = StratifiedKFold(n_splits=5)
skfold_score_rf = cross_val_score(model_rf, x, y, cv=skf)
skfold_means.append(skfold_score_rf.mean())
loocv = LeaveOneOut()
loocv_score_rf = cross_val_score(model_rf, x, y, cv=loocv)
loocv_means.append(loocv_score_rf.mean())
kf = KFold(n_splits=5)
kf_score_rf = cross_val_score(model_rf, x, y, cv=kf)
kf_means.append(kf_score_rf.mean())
print(f"Random Forest: Train Acc: {train_accuracies[-1]:.4f}, Test Acc: {test_accuracies[-1]:.4f}, Time: {training_times[-1]:.2f}s")

# Gradient Boosting
model_gb = GradientBoostingClassifier()
start_time = time.time()
model_gb.fit(x_train_scaled, y_train)
training_times.append(time.time() - start_time)
train_accuracies.append(model_gb.score(x_train_scaled, y_train))
test_accuracies.append(model_gb.score(x_test_scaled, y_test))
y_pred_gb = model_gb.predict(x_test_scaled)
y_pred_list.append(y_pred_gb)
skf = StratifiedKFold(n_splits=5)
skfold_score_gb = cross_val_score(model_gb, x, y, cv=skf)
skfold_means.append(skfold_score_gb.mean())
loocv = LeaveOneOut()
loocv_score_gb = cross_val_score(model_gb, x, y, cv=loocv)
loocv_means.append(loocv_score_gb.mean())
kf = KFold(n_splits=5)
kf_score_gb = cross_val_score(model_gb, x, y, cv=kf)
kf_means.append(kf_score_gb.mean())
print(f"Gradient Boosting: Train Acc: {train_accuracies[-1]:.4f}, Test Acc: {test_accuracies[-1]:.4f}, Time: {training_times[-1]:.2f}s")

# XGBoost
model_xgb = XGBClassifier()
start_time = time.time()
model_xgb.fit(x_train_scaled, y_train)
training_times.append(time.time() - start_time)
train_accuracies.append(model_xgb.score(x_train_scaled, y_train))
test_accuracies.append(model_xgb.score(x_test_scaled, y_test))
y_pred_xgb = model_xgb.predict(x_test_scaled)
y_pred_list.append(y_pred_xgb)
skf = StratifiedKFold(n_splits=5)
skfold_score_xgb = cross_val_score(model_xgb, x, y, cv=skf)
skfold_means.append(skfold_score_xgb.mean())
loocv = LeaveOneOut()
loocv_score_xgb = cross_val_score(model_xgb, x, y, cv=loocv)
loocv_means.append(loocv_score_xgb.mean())
kf = KFold(n_splits=5)
kf_score_xgb = cross_val_score(model_xgb, x, y, cv=kf)
kf_means.append(kf_score_xgb.mean())
print(f"XGBoost: Train Acc: {train_accuracies[-1]:.4f}, Test Acc: {test_accuracies[-1]:.4f}, Time: {training_times[-1]:.2f}s")

# Linear Regression (as classifier)
model_lre = LinearRegression()
start_time = time.time()
model_lre.fit(x_train_scaled, y_train)
training_times.append(time.time() - start_time)
train_pred_lre = (model_lre.predict(x_train_scaled) > 0.5).astype(int)
test_pred_lre = (model_lre.predict(x_test_scaled) > 0.5).astype(int)
train_accuracies.append(accuracy_score(y_train, train_pred_lre))
test_accuracies.append(accuracy_score(y_test, test_pred_lre))
y_pred_lre = test_pred_lre
y_pred_list.append(y_pred_lre)
num_bins = 5
y_binned = np.digitize(y, bins=np.linspace(min(y), max(y), num_bins))
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
skfold_score_lre = cross_val_score(model_lre, x, y_binned, cv=skf, scoring='r2')
skfold_means.append(skfold_score_lre.mean())
loocv = LeaveOneOut()
loocv_score_lre = cross_val_score(model_lre, x, y, cv=loocv)
loocv_means.append(loocv_score_lre.mean())
kf = KFold(n_splits=5)
kf_score_lre = cross_val_score(model_lre, x, y, cv=kf, scoring='r2')
kf_means.append(kf_score_lre.mean())
print(f"Linear Regression: Train Acc: {train_accuracies[-1]:.4f}, Test Acc: {test_accuracies[-1]:.4f}, Time: {training_times[-1]:.2f}s")

# CNN
model_cnn, test_acc_cnn, train_acc_cnn, time_cnn, y_pred_cnn = train_cnn(x_train_cnn, y_train, x_test_cnn, y_test)
training_times.append(time_cnn)
train_accuracies.append(train_acc_cnn)
test_accuracies.append(test_acc_cnn)
y_pred_list.append(y_pred_cnn)
skfold_means.append(0)
loocv_means.append(0)
kf_means.append(0)
print(f"CNN: Train Acc: {train_acc_cnn:.4f}, Test Acc: {test_acc_cnn:.4f}, Time: {time_cnn:.2f}s")

# MobileNet
model_mn, test_acc_mn, train_acc_mn, time_mn, y_pred_mn = train_mobilenet(x_train_mn, y_train, x_test_mn, y_test)
training_times.append(time_mn)
train_accuracies.append(train_acc_mn)
test_accuracies.append(test_acc_mn)
y_pred_list.append(y_pred_mn)
skfold_means.append(0)
loocv_means.append(0)
kf_means.append(0)
print(f"MobileNet: Train Acc: {train_acc_mn:.4f}, Test Acc: {test_acc_mn:.4f}, Time: {time_mn:.2f}s")

# Plotting Training Times
plt.figure(figsize=(12, 6))
sns.barplot(x=models, y=training_times)
plt.title('Training Times of Different Models')
plt.ylabel('Time (seconds)')
plt.xticks(rotation=45)
plt.show()

# Plotting Training and Testing Accuracies
x = np.arange(len(models))
width = 0.35
fig, ax = plt.subplots(figsize=(14, 6))
rects1 = ax.bar(x - width/2, train_accuracies, width, label='Training Accuracy')
rects2 = ax.bar(x + width/2, test_accuracies, width, label='Testing Accuracy')
ax.set_ylabel('Accuracy')
ax.set_title('Training and Testing Accuracies of Different Models')
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.legend()
plt.xticks(rotation=45)
plt.ylim(0, 1)
plt.show()

# Plotting Test Accuracies
plt.figure(figsize=(12, 6))
sns.barplot(x=models, y=test_accuracies)
plt.title('Test Accuracies of Different Models')
plt.ylabel('Accuracy')
plt.xticks(rotation=45)
plt.ylim(0, 1)
plt.show()

# Plotting Validation Scores
fig, ax = plt.subplots(figsize=(12, 6))
rects1 = ax.bar(x - width, skfold_means, width, label='Stratified K-Fold')
rects2 = ax.bar(x, loocv_means, width, label='Leave-One-Out')
rects3 = ax.bar(x + width, kf_means, width, label='K-Fold')
ax.set_ylabel('Scores')
ax.set_title('Validation Scores of Different Models')
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.legend()
plt.xticks(rotation=45)
plt.show()

# ROC Curve
plt.figure(figsize=(12, 6))
model_instances = [model_g, model2, model_lr, model_dt, model_rf, model_gb, model_xgb, model_lre, model_cnn, model_mn]
for model, y_pred, name in zip(model_instances, y_pred_list, models):
    if name in ['CNN', 'MobileNet']:
        fpr, tpr, _ = roc_curve(y_test, y_pred)
    elif name == 'Linear Regression':
        fpr, tpr, _ = roc_curve(y_test, model.predict(x_test_scaled))
    else:
        fpr, tpr, _ = roc_curve(y_test, model.predict_proba(x_test_scaled)[:, 1] if hasattr(model, "predict_proba") else model.predict(x_test_scaled))
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

# Results Table
results = []
def specificity(confusion):
    TN, FP, FN, TP = confusion.ravel()
    return TN / (TN + FP)
for model, y_pred, skfold_score, loocv_score, kf_score, name in zip(
    model_instances, y_pred_list,
    [skfold_score_g, skfold_score, skfold_score_lr, skfold_score_dt, skfold_score_rf, skfold_score_gb, skfold_score_xgb, skfold_score_lre, 0, 0],
    [loocv_score_g, loocv_score, loocv_score_lr, loocv_score_dt, loocv_score_rf, loocv_score_gb, loocv_score_xgb, loocv_score_lre, 0, 0],
    [k_fold_score_g, score, kf_score_lr, kf_score_dt, kf_score_rf, kf_score_gb, kf_score_xgb, kf_score_lre, 0, 0],
    models
):
    acc = test_accuracies[models.index(name)]
    results.append({
        'Model': name,
        'Testing Accuracy': acc,
        'K-Fold': kf_score.mean() if name not in ['CNN', 'MobileNet'] else 0,
        'Stratified K-Fold': skfold_score.mean() if name not in ['CNN', 'MobileNet'] else 0,
        'Leave-One-Out': loocv_score.mean() if name not in ['CNN', 'MobileNet'] else 0
    })
results_df = pd.DataFrame(results)
best_model = results_df.loc[results_df['Testing Accuracy'].idxmax()]
print("\nComparison Table:")
print(results_df)
print("\nBest Model:")
print(best_model)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split, StratifiedKFold, LeaveOneOut, KFold, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LinearRegression
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.applications import MobileNet
from sklearn.preprocessing import StandardScaler

# Load and preprocess data
df = pd.read_csv("/content/lung cancer.csv")
df['GENDER'] = df['GENDER'].replace({'M':0, 'F':1})
df['LUNG_CANCER'] = df['LUNG_CANCER'].replace({'YES':1, 'NO':0})
x = df.drop('LUNG_CANCER', axis=1)
y = df['LUNG_CANCER']

# Verify dataset shape
print(f"Dataset shape: {df.shape}")
print(f"Features shape: {x.shape}")

# Split data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print(f"x_train: {x_train.shape}, x_test: {x_test.shape}, y_train: {y_train.shape}, y_test: {y_test.shape}")

# Scale features
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)
print(f"x_train_scaled: {x_train_scaled.shape}, x_test_scaled: {x_test_scaled.shape}")

# Prepare data for CNN/MobileNet
n_features = x_train_scaled.shape[1]
if n_features != 16:
    print(f"Warning: Expected 16 features, got {n_features}. Adjusting reshape.")
    # Pad or truncate to 16 features if necessary
    if n_features < 16:
        x_train_scaled = np.pad(x_train_scaled, ((0, 0), (0, 16 - n_features)), mode='constant')
        x_test_scaled = np.pad(x_test_scaled, ((0, 0), (0, 16 - n_features)), mode='constant')
    else:
        x_train_scaled = x_train_scaled[:, :16]
        x_test_scaled = x_test_scaled[:, :16]
    n_features = 16

# Reshape for CNN: 16 features -> 4x4x1
x_train_cnn = x_train_scaled.reshape(-1, 4, 4, 1)
x_test_cnn = x_test_scaled.reshape(-1, 4, 4, 1)
print(f"x_train_cnn: {x_train_cnn.shape}, x_test_cnn: {x_test_cnn.shape}")

# Reshape for MobileNet: Upscale to 224x224x3
x_train_mn = np.repeat(x_train_cnn, 56, axis=1)  # 4 -> 224
x_train_mn = np.repeat(x_train_mn, 56, axis=2)
x_train_mn = np.repeat(x_train_mn, 3, axis=3)  # 1 -> 3 channels
x_test_mn = np.repeat(x_test_cnn, 56, axis=1)
x_test_mn = np.repeat(x_test_mn, 56, axis=2)
x_test_mn = np.repeat(x_test_mn, 3, axis=3)
print(f"x_train_mn: {x_train_mn.shape}, x_test_mn: {x_test_mn.shape}")

np.random.seed(32)

# Initialize lists
models = ['GaussianNB', 'SVC', 'Logistic Regression', 'Decision Tree',
          'Random Forest', 'Gradient Boosting', 'XGBoost', 'Linear Regression', 'CNN', 'MobileNet']
test_accuracies = []
train_accuracies = []
skfold_means = []
loocv_means = []
kf_means = []
holdout_times = []
skfold_times = []
loocv_times = []
kf_times = []
y_pred_list = []

# CNN training function
def train_cnn(x_train, y_train, x_test, y_test):
    model = Sequential([
        Conv2D(16, (2, 2), activation='relu', input_shape=(4, 4, 1)),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(32, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=20, batch_size=16, validation_data=(x_test, y_test), verbose=0)
    training_time = time.time() - start_time
    train_acc = history.history['accuracy'][-1]
    test_acc = model.evaluate(x_test, y_test)[1]
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    return model, test_acc, train_acc, training_time, y_pred

# MobileNet training function
def train_mobilenet(x_train, y_train, x_test, y_test):
    base_model = MobileNet(weights=None, include_top=False, input_shape=(224, 224, 3))
    model = Sequential([
        base_model,
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=10, batch_size=16, validation_data=(x_test, y_test), verbose=0)
    training_time = time.time() - start_time
    train_acc = history.history['accuracy'][-1]
    test_acc = model.evaluate(x_test, y_test)[1]
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    return model, test_acc, train_acc, training_time, y_pred

# Function to measure cross-validation time
def measure_cv_time(model, x, y, cv_method, cv_params):
    start_time = time.time()
    scores = cross_val_score(model, x, y, cv=cv_method(**cv_params))
    return time.time() - start_time, scores.mean()

# Train models and measure times
for model_name in models:
    print(f"\n=== Training {model_name} ===")

    # Initialize model
    if model_name == 'GaussianNB':
        model = GaussianNB()
    elif model_name == 'SVC':
        model = SVC(probability=True)
    elif model_name == 'Logistic Regression':
        model = LogisticRegression()
    elif model_name == 'Decision Tree':
        model = DecisionTreeClassifier()
    elif model_name == 'Random Forest':
        model = RandomForestClassifier()
    elif model_name == 'Gradient Boosting':
        model = GradientBoostingClassifier()
    elif model_name == 'XGBoost':
        model = XGBClassifier()
    elif model_name == 'Linear Regression':
        model = LinearRegression()
    elif model_name == 'CNN':
        pass  # Handled separately
    elif model_name == 'MobileNet':
        pass  # Handled separately

    # Hold-out method timing
    if model_name not in ['CNN', 'MobileNet']:
        start_time = time.time()
        model.fit(x_train_scaled, y_train)
        holdout_time = time.time() - start_time
        holdout_times.append(holdout_time)

        # Predictions
        train_acc = model.score(x_train_scaled, y_train)
        test_acc = model.score(x_test_scaled, y_test)
        y_pred = model.predict(x_test_scaled)
    else:
        if model_name == 'CNN':
            model, test_acc, train_acc, holdout_time, y_pred = train_cnn(x_train_cnn, y_train, x_test_cnn, y_test)
        else:
            model, test_acc, train_acc, holdout_time, y_pred = train_mobilenet(x_train_mn, y_train, x_test_mn, y_test)
        holdout_times.append(holdout_time)

    train_accuracies.append(train_acc)
    test_accuracies.append(test_acc)
    y_pred_list.append(y_pred)

    # Cross-validation timing (skip for CNN/MobileNet)
    if model_name not in ['CNN', 'MobileNet']:
        # K-Fold
        kf_time, kf_mean = measure_cv_time(model, x, y, KFold, {'n_splits': 5})
        kf_times.append(kf_time)
        kf_means.append(kf_mean)

        # Stratified K-Fold
        skf_time, skf_mean = measure_cv_time(model, x, y, StratifiedKFold, {'n_splits': 5})
        skfold_times.append(skf_time)
        skfold_means.append(skf_mean)

        # Leave-One-Out
        loo_time, loo_mean = measure_cv_time(model, x, y, LeaveOneOut, {})
        loocv_times.append(loo_time)
        loocv_means.append(loo_mean)
    else:
        kf_times.append(0)
        skfold_times.append(0)
        loocv_times.append(0)
        kf_means.append(0)
        skfold_means.append(0)
        loocv_means.append(0)

    print(f"Hold-out time: {holdout_time:.4f}s")
    if model_name not in ['CNN', 'MobileNet']:
        print(f"K-Fold time: {kf_time:.4f}s, mean accuracy: {kf_mean:.4f}")
        print(f"Stratified K-Fold time: {skf_time:.4f}s, mean accuracy: {skf_mean:.4f}")
        print(f"LOOCV time: {loo_time:.4f}s, mean accuracy: {loo_mean:.4f}")

# Create timing results DataFrame
timing_results = pd.DataFrame({
    'Model': models,
    'Hold-Out Time (s)': holdout_times,
    'K-Fold Time (s)': kf_times,
    'Stratified K-Fold Time (s)': skfold_times,
    'LOOCV Time (s)': loocv_times,
    'Hold-Out Test Acc': test_accuracies,
    'K-Fold Mean Acc': kf_means,
    'Stratified K-Fold Mean Acc': skfold_means,
    'LOOCV Mean Acc': loocv_means
})

print("\n=== Timing Results ===")
print(timing_results)

# Plotting Training Times
plt.figure(figsize=(14, 6))
sns.barplot(x='Model', y='Hold-Out Time (s)', data=timing_results)
plt.title('Training Times (Hold-Out Method)')
plt.ylabel('Time (seconds)')
plt.xticks(rotation=45)
plt.show()

# Plotting Cross-Validation Times
plt.figure(figsize=(14, 6))
timing_results_melted = timing_results.melt(id_vars=['Model'],
                                          value_vars=['K-Fold Time (s)', 'Stratified K-Fold Time (s)', 'LOOCV Time (s)'],
                                          var_name='CV Method', value_name='Time (s)')
sns.barplot(x='Model', y='Time (s)', hue='CV Method', data=timing_results_melted)
plt.title('Cross-Validation Times by Method')
plt.ylabel('Time (seconds)')
plt.xticks(rotation=45)
plt.legend(title='CV Method')
plt.show()

# Plotting Accuracy Comparison
plt.figure(figsize=(14, 6))
accuracy_results_melted = timing_results.melt(id_vars=['Model'],
                                            value_vars=['Hold-Out Test Acc', 'K-Fold Mean Acc',
                                                       'Stratified K-Fold Mean Acc', 'LOOCV Mean Acc'],
                                            var_name='Method', value_name='Accuracy')
sns.barplot(x='Model', y='Accuracy', hue='Method', data=accuracy_results_melted)
plt.title('Accuracy Comparison Across Methods')
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.xticks(rotation=45)
plt.legend(title='Evaluation Method')
plt.show()

# Print comprehensive results
print("\n=== Comprehensive Results ===")
print(timing_results.to_string())

  df['GENDER'] = df['GENDER'].replace({'M':0, 'F':1})
  df['LUNG_CANCER'] = df['LUNG_CANCER'].replace({'YES':1, 'NO':0})


Dataset shape: (5871, 16)
Features shape: (5871, 15)
x_train: (4696, 15), x_test: (1175, 15), y_train: (4696,), y_test: (1175,)
x_train_scaled: (4696, 15), x_test_scaled: (1175, 15)
x_train_cnn: (4696, 4, 4, 1), x_test_cnn: (1175, 4, 4, 1)


KeyboardInterrupt: 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split, StratifiedKFold, LeaveOneOut, KFold, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LinearRegression
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.applications import MobileNet
from sklearn.preprocessing import StandardScaler

# Load and preprocess data
df = pd.read_csv("/content/lung cancer.csv")
df['GENDER'] = df['GENDER'].replace({'M':0, 'F':1})
df['LUNG_CANCER'] = df['LUNG_CANCER'].replace({'YES':1, 'NO':0})
x = df.drop('LUNG_CANCER', axis=1)
y = df['LUNG_CANCER']

# Verify dataset shape
print(f"Dataset shape: {df.shape}")
print(f"Features shape: {x.shape}")

# Split data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print(f"x_train: {x_train.shape}, x_test: {x_test.shape}, y_train: {y_train.shape}, y_test: {y_test.shape}")

# Scale features
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)
print(f"x_train_scaled: {x_train_scaled.shape}, x_test_scaled: {x_test_scaled.shape}")

# Prepare data for CNN/MobileNet
n_features = x_train_scaled.shape[1]
if n_features != 16:
    print(f"Warning: Expected 16 features, got {n_features}. Adjusting reshape.")
    # Pad or truncate to 16 features if necessary
    if n_features < 16:
        x_train_scaled = np.pad(x_train_scaled, ((0, 0), (0, 16 - n_features)), mode='constant')
        x_test_scaled = np.pad(x_test_scaled, ((0, 0), (0, 16 - n_features)), mode='constant')
    else:
        x_train_scaled = x_train_scaled[:, :16]
        x_test_scaled = x_test_scaled[:, :16]
    n_features = 16

# Reshape for CNN: 16 features -> 4x4x1
x_train_cnn = x_train_scaled.reshape(-1, 4, 4, 1)
x_test_cnn = x_test_scaled.reshape(-1, 4, 4, 1)
print(f"x_train_cnn: {x_train_cnn.shape}, x_test_cnn: {x_test_cnn.shape}")

# Reshape for MobileNet: Upscale to 224x224x3
x_train_mn = np.repeat(x_train_cnn, 56, axis=1)  # 4 -> 224
x_train_mn = np.repeat(x_train_mn, 56, axis=2)
x_train_mn = np.repeat(x_train_mn, 3, axis=3)  # 1 -> 3 channels
x_test_mn = np.repeat(x_test_cnn, 56, axis=1)
x_test_mn = np.repeat(x_test_mn, 56, axis=2)
x_test_mn = np.repeat(x_test_mn, 3, axis=3)
print(f"x_train_mn: {x_train_mn.shape}, x_test_mn: {x_test_mn.shape}")

np.random.seed(32)

# Initialize lists
models = ['GaussianNB', 'SVC', 'Logistic Regression', 'Decision Tree',
          'Random Forest', 'Gradient Boosting', 'XGBoost', 'Linear Regression', 'CNN', 'MobileNet']
test_accuracies = []
train_accuracies = []
skfold_means = []
loocv_means = []
kf_means = []
holdout_times = []
skfold_times = []
loocv_times = []
kf_times = []
y_pred_list = []

# CNN training function
def train_cnn(x_train, y_train, x_test, y_test):
    model = Sequential([
        Conv2D(16, (2, 2), activation='relu', input_shape=(4, 4, 1)),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(32, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=20, batch_size=16, validation_data=(x_test, y_test), verbose=0)
    training_time = time.time() - start_time
    train_acc = history.history['accuracy'][-1]
    test_acc = model.evaluate(x_test, y_test)[1]
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    return model, test_acc, train_acc, training_time, y_pred

# MobileNet training function
def train_mobilenet(x_train, y_train, x_test, y_test):
    base_model = MobileNet(weights=None, include_top=False, input_shape=(224, 224, 3))
    model = Sequential([
        base_model,
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=10, batch_size=16, validation_data=(x_test, y_test), verbose=0)
    training_time = time.time() - start_time
    train_acc = history.history['accuracy'][-1]
    test_acc = model.evaluate(x_test, y_test)[1]
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    return model, test_acc, train_acc, training_time, y_pred

# Function to measure cross-validation time
def measure_cv_time(model, x, y, cv_method, cv_params):
    start_time = time.time()
    scores = cross_val_score(model, x, y, cv=cv_method(**cv_params))
    return time.time() - start_time, scores.mean()

# Train models and measure times
for model_name in models:
    print(f"\n=== Training {model_name} ===")

    # Initialize model
    if model_name == 'GaussianNB':
        model = GaussianNB()
    elif model_name == 'SVC':
        model = SVC(probability=True)
    elif model_name == 'Logistic Regression':
        model = LogisticRegression()
    elif model_name == 'Decision Tree':
        model = DecisionTreeClassifier()
    elif model_name == 'Random Forest':
        model = RandomForestClassifier()
    elif model_name == 'Gradient Boosting':
        model = GradientBoostingClassifier()
    elif model_name == 'XGBoost':
        model = XGBClassifier()
    elif model_name == 'Linear Regression':
        model = LinearRegression()
    elif model_name == 'CNN':
        pass  # Handled separately
    elif model_name == 'MobileNet':
        pass  # Handled separately

    # Hold-out method timing
    if model_name not in ['CNN', 'MobileNet']:
        start_time = time.time()
        model.fit(x_train_scaled, y_train)
        holdout_time = time.time() - start_time
        holdout_times.append(holdout_time)

        # Predictions
        train_acc = model.score(x_train_scaled, y_train)
        test_acc = model.score(x_test_scaled, y_test)
        y_pred = model.predict(x_test_scaled)
    else:
        if model_name == 'CNN':
            model, test_acc, train_acc, holdout_time, y_pred = train_cnn(x_train_cnn, y_train, x_test_cnn, y_test)
        else:
            model, test_acc, train_acc, holdout_time, y_pred = train_mobilenet(x_train_mn, y_train, x_test_mn, y_test)
        holdout_times.append(holdout_time)

    train_accuracies.append(train_acc)
    test_accuracies.append(test_acc)
    y_pred_list.append(y_pred)

    # Cross-validation timing (skip for CNN/MobileNet)
    if model_name not in ['CNN', 'MobileNet']:
        # K-Fold
        kf_time, kf_mean = measure_cv_time(model, x, y, KFold, {'n_splits': 5})
        kf_times.append(kf_time)
        kf_means.append(kf_mean)

        # Stratified K-Fold
        skf_time, skf_mean = measure_cv_time(model, x, y, StratifiedKFold, {'n_splits': 5})
        skfold_times.append(skf_time)
        skfold_means.append(skf_mean)

        # Leave-One-Out
        loo_time, loo_mean = measure_cv_time(model, x, y, LeaveOneOut, {})
        loocv_times.append(loo_time)
        loocv_means.append(loo_mean)
    else:
        kf_times.append(0)
        skfold_times.append(0)
        loocv_times.append(0)
        kf_means.append(0)
        skfold_means.append(0)
        loocv_means.append(0)

    print(f"Hold-out time: {holdout_time:.4f}s")
    if model_name not in ['CNN', 'MobileNet']:
        print(f"K-Fold time: {kf_time:.4f}s, mean accuracy: {kf_mean:.4f}")
        print(f"Stratified K-Fold time: {skf_time:.4f}s, mean accuracy: {skf_mean:.4f}")
        print(f"LOOCV time: {loo_time:.4f}s, mean accuracy: {loo_mean:.4f}")

# Create timing results DataFrame
timing_results = pd.DataFrame({
    'Model': models,
    'Hold-Out Time (s)': holdout_times,
    'K-Fold Time (s)': kf_times,
    'Stratified K-Fold Time (s)': skfold_times,
    'LOOCV Time (s)': loocv_times,
    'Hold-Out Test Acc': test_accuracies,
    'K-Fold Mean Acc': kf_means,
    'Stratified K-Fold Mean Acc': skfold_means,
    'LOOCV Mean Acc': loocv_means
})

print("\n=== Timing Results ===")
print(timing_results)

# Plotting Training Times
plt.figure(figsize=(14, 6))
sns.barplot(x='Model', y='Hold-Out Time (s)', data=timing_results)
plt.title('Training Times (Hold-Out Method)')
plt.ylabel('Time (seconds)')
plt.xticks(rotation=45)
plt.show()

# Plotting Cross-Validation Times
plt.figure(figsize=(14, 6))
timing_results_melted = timing_results.melt(id_vars=['Model'],
                                          value_vars=['K-Fold Time (s)', 'Stratified K-Fold Time (s)', 'LOOCV Time (s)'],
                                          var_name='CV Method', value_name='Time (s)')
sns.barplot(x='Model', y='Time (s)', hue='CV Method', data=timing_results_melted)
plt.title('Cross-Validation Times by Method')
plt.ylabel('Time (seconds)')
plt.xticks(rotation=45)
plt.legend(title='CV Method')
plt.show()

# Plotting Accuracy Comparison
plt.figure(figsize=(14, 6))
accuracy_results_melted = timing_results.melt(id_vars=['Model'],
                                            value_vars=['Hold-Out Test Acc', 'K-Fold Mean Acc',
                                                       'Stratified K-Fold Mean Acc', 'LOOCV Mean Acc'],
                                            var_name='Method', value_name='Accuracy')
sns.barplot(x='Model', y='Accuracy', hue='Method', data=accuracy_results_melted)
plt.title('Accuracy Comparison Across Methods')
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.xticks(rotation=45)
plt.legend(title='Evaluation Method')
plt.show()

# Print comprehensive results
print("\n=== Comprehensive Results ===")
print(timing_results.to_string())

  df['GENDER'] = df['GENDER'].replace({'M':0, 'F':1})
  df['LUNG_CANCER'] = df['LUNG_CANCER'].replace({'YES':1, 'NO':0})


Dataset shape: (5871, 16)
Features shape: (5871, 15)
x_train: (4696, 15), x_test: (1175, 15), y_train: (4696,), y_test: (1175,)
x_train_scaled: (4696, 15), x_test_scaled: (1175, 15)
x_train_cnn: (4696, 4, 4, 1), x_test_cnn: (1175, 4, 4, 1)
x_train_mn: (4696, 224, 224, 3), x_test_mn: (1175, 224, 224, 3)

=== Training GaussianNB ===
Hold-out time: 0.0106s
K-Fold time: 0.0620s, mean accuracy: 0.9061
Stratified K-Fold time: 0.0530s, mean accuracy: 0.9061
LOOCV time: 50.1738s, mean accuracy: 0.9061

=== Training SVC ===


KeyboardInterrupt: 

# New Section

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split, StratifiedKFold, LeaveOneOut, KFold, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.applications import MobileNet
from sklearn.preprocessing import StandardScaler

# Set random seed for reproducibility
np.random.seed(32)
tf.random.set_seed(32)

# Load and preprocess data
df = pd.read_csv("/content/lung cancer.csv")
df['GENDER'] = df['GENDER'].replace({'M':0, 'F':1})
df['LUNG_CANCER'] = df['LUNG_CANCER'].replace({'YES':1, 'NO':0})
x = df.drop('LUNG_CANCER', axis=1)
y = df['LUNG_CANCER']

# Verify dataset shape
print(f"Dataset shape: {df.shape}")
print(f"Features shape: {x.shape}")

# Split data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print(f"x_train: {x_train.shape}, x_test: {x_test.shape}, y_train: {y_train.shape}, y_test: {y_test.shape}")

# Scale features
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)
print(f"x_train_scaled: {x_train_scaled.shape}, x_test_scaled: {x_test_scaled.shape}")

# Prepare data for CNN/MobileNet
n_features = x_train_scaled.shape[1]
if n_features != 16:
    print(f"Warning: Expected 16 features, got {n_features}. Adjusting reshape.")
    if n_features < 16:
        x_train_scaled = np.pad(x_train_scaled, ((0, 0), (0, 16 - n_features)), mode='constant')
        x_test_scaled = np.pad(x_test_scaled, ((0, 0), (0, 16 - n_features)), mode='constant')
    else:
        x_train_scaled = x_train_scaled[:, :16]
        x_test_scaled = x_test_scaled[:, :16]
    n_features = 16

# Reshape for CNN: 16 features -> 4x4x1
x_train_cnn = x_train_scaled.reshape(-1, 4, 4, 1)
x_test_cnn = x_test_scaled.reshape(-1, 4, 4, 1)
print(f"x_train_cnn: {x_train_cnn.shape}, x_test_cnn: {x_test_cnn.shape}")

# Reshape for MobileNet: Upscale to 224x224x3
x_train_mn = np.repeat(x_train_cnn, 56, axis=1)  # 4 -> 224
x_train_mn = np.repeat(x_train_mn, 56, axis=2)
x_train_mn = np.repeat(x_train_mn, 3, axis=3)
x_test_mn = np.repeat(x_test_cnn, 56, axis=1)
x_test_mn = np.repeat(x_test_mn, 56, axis=2)
x_test_mn = np.repeat(x_test_mn, 3, axis=3)
print(f"x_train_mn: {x_train_mn.shape}, x_test_mn: {x_test_mn.shape}")

# Initialize lists
models = ['GaussianNB', 'SVC', 'Logistic Regression', 'Decision Tree',
          'Random Forest', 'Gradient Boosting', 'XGBoost', 'CNN', 'MobileNet']
test_accuracies = []
train_accuracies = []
skfold_means = []
loocv_means = []
kf_means = []
holdout_times = []
skfold_times = []
loocv_times = []
kf_times = []
y_pred_list = []
model_instances = []

# CNN training function
def train_cnn(x_train, y_train, x_test, y_test):
    model = Sequential([
        Conv2D(16, (2, 2), activation='relu', input_shape=(4, 4, 1)),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(32, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=10, batch_size=16, validation_data=(x_test, y_test), verbose=0)
    training_time = time.time() - start_time
    train_acc = history.history['accuracy'][-1]
    test_acc = model.evaluate(x_test, y_test)[1]
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    return model, test_acc, train_acc, training_time, y_pred

# MobileNet training function
def train_mobilenet(x_train, y_train, x_test, y_test):
    base_model = MobileNet(weights=None, include_top=False, input_shape=(224, 224, 3))
    model = Sequential([
        base_model,
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=5, batch_size=16, validation_data=(x_test, y_test), verbose=0)
    training_time = time.time() - start_time
    train_acc = history.history['accuracy'][-1]
    test_acc = model.evaluate(x_test, y_test)[1]
    y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()
    return model, test_acc, train_acc, training_time, y_pred

# Function to measure cross-validation time
def measure_cv_time(model, x, y, cv_method, cv_params, scoring='accuracy'):
    start_time = time.time()
    scores = cross_val_score(model, x, y, cv=cv_method(**cv_params), scoring=scoring, n_jobs=-1)
    return time.time() - start_time, scores.mean()

# Train models and measure times
for model_name in models:
    print(f"\n=== Training {model_name} ===")

    # Initialize model
    if model_name == 'GaussianNB':
        model = GaussianNB()
    elif model_name == 'SVC':
        model = SVC(probability=True)
    elif model_name == 'Logistic Regression':
        model = LogisticRegression()
    elif model_name == 'Decision Tree':
        model = DecisionTreeClassifier()
    elif model_name == 'Random Forest':
        model = RandomForestClassifier()
    elif model_name == 'Gradient Boosting':
        model = GradientBoostingClassifier()
    elif model_name == 'XGBoost':
        model = XGBClassifier()
    elif model_name == 'CNN':
        model, test_acc, train_acc, holdout_time, y_pred = train_cnn(x_train_cnn, y_train, x_test_cnn, y_test)
        holdout_times.append(holdout_time)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)
        y_pred_list.append(y_pred)
        model_instances.append(model)
        kf_times.append(0)
        skfold_times.append(0)
        loocv_times.append(0)
        kf_means.append(0)
        skfold_means.append(0)
        loocv_means.append(0)
        print(f"Hold-out time: {holdout_time:.4f}s")
        continue
    elif model_name == 'MobileNet':
        model, test_acc, train_acc, holdout_time, y_pred = train_mobilenet(x_train_mn, y_train, x_test_mn, y_test)
        holdout_times.append(holdout_time)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)
        y_pred_list.append(y_pred)
        model_instances.append(model)
        kf_times.append(0)
        skfold_times.append(0)
        loocv_times.append(0)
        kf_means.append(0)
        skfold_means.append(0)
        loocv_means.append(0)
        print(f"Hold-out time: {holdout_time:.4f}s")
        continue

    # Hold-out method
    start_time = time.time()
    model.fit(x_train_scaled, y_train)
    holdout_time = time.time() - start_time
    holdout_times.append(holdout_time)

    # Predictions
    train_acc = model.score(x_train_scaled, y_train)
    test_acc = model.score(x_test_scaled, y_test)
    y_pred = model.predict(x_test_scaled)

    train_accuracies.append(train_acc)
    test_accuracies.append(test_acc)
    y_pred_list.append(y_pred)
    model_instances.append(model)

    # Cross-validation
    # K-Fold
    kf_time, kf_mean = measure_cv_time(model, x, y, KFold, {'n_splits': 5}, scoring='accuracy')
    kf_times.append(kf_time)
    kf_means.append(kf_mean)

    # Stratified K-Fold
    skf_time, skf_mean = measure_cv_time(model, x, y, StratifiedKFold, {'n_splits': 5}, scoring='accuracy')
    skfold_times.append(skf_time)
    skfold_means.append(skf_mean)

    # LOOCV (use 10-fold for SVC, Random Forest, Gradient Boosting, XGBoost)
    if model_name in ['SVC', 'Random Forest', 'Gradient Boosting', 'XGBoost']:
        loo_time, loo_mean = measure_cv_time(model, x, y, KFold, {'n_splits': 10}, scoring='accuracy')
    else:
        loo_time, loo_mean = measure_cv_time(model, x, y, LeaveOneOut, {}, scoring='accuracy')
    loocv_times.append(loo_time)
    loocv_means.append(loo_mean)

    print(f"Hold-out time: {holdout_time:.4f}s")
    print(f"K-Fold time: {kf_time:.4f}s, mean: {kf_mean:.4f}")
    print(f"Stratified K-Fold time: {skf_time:.4f}s, mean: {skf_mean:.4f}")
    print(f"LOOCV time: {loo_time:.4f}s, mean: {loo_mean:.4f}")

# Create timing results DataFrame
timing_results = pd.DataFrame({
    'Model': models,
    'Hold-Out Time (s)': holdout_times,
    'K-Fold Time (s)': kf_times,
    'Stratified K-Fold Time (s)': skfold_times,
    'LOOCV Time (s)': loocv_times,
    'Hold-Out Train Acc': train_accuracies,
    'Hold-Out Test Acc': test_accuracies,
    'K-Fold Mean Acc': kf_means,
    'Stratified K-Fold Mean Acc': skfold_means,
    'LOOCV Mean Acc': loocv_means
})

# Plotting Training Times (Hold-Out)
plt.figure(figsize=(14, 6))
sns.barplot(x='Model', y='Hold-Out Time (s)', data=timing_results)
plt.title('Training Times (Hold-Out Method)')
plt.ylabel('Time (seconds)')
plt.xticks(rotation=45)
plt.show()

# Plotting Cross-Validation Times
plt.figure(figsize=(14, 6))
timing_results_melted = timing_results.melt(id_vars=['Model'],
                                           value_vars=['K-Fold Time (s)', 'Stratified K-Fold Time (s)', 'LOOCV Time (s)'],
                                           var_name='CV Method', value_name='Time (s)')
sns.barplot(x='Model', y='Time (s)', hue='CV Method', data=timing_results_melted)
plt.title('Cross-Validation Times by Method')
plt.ylabel('Time (seconds)')
plt.xticks(rotation=45)
plt.legend(title='CV Method')
plt.show()

# Plotting Training and Testing Accuracies
plt.figure(figsize=(14, 6))
x = np.arange(len(models))
width = 0.35
fig, ax = plt.subplots(figsize=(14, 6))
rects1 = ax.bar(x - width/2, train_accuracies, width, label='Training Accuracy')
rects2 = ax.bar(x + width/2, test_accuracies, width, label='Testing Accuracy')
ax.set_ylabel('Accuracy')
ax.set_title('Training and Testing Accuracies of Different Models')
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.legend()
plt.xticks(rotation=45)
plt.ylim(0, 1)
plt.show()

# Plotting Accuracy Comparison
plt.figure(figsize=(14, 6))
accuracy_results_melted = timing_results.melt(id_vars=['Model'],
                                             value_vars=['Hold-Out Test Acc', 'K-Fold Mean Acc',
                                                        'Stratified K-Fold Mean Acc', 'LOOCV Mean Acc'],
                                             var_name='Method', value_name='Accuracy')
sns.barplot(x='Model', y='Accuracy', hue='Method', data=accuracy_results_melted)
plt.title('Accuracy Comparison Across Methods')
plt.ylabel('Accuracy')
plt.ylim(0, 1)
plt.xticks(rotation=45)
plt.legend(title='Evaluation Method')
plt.show()

# ROC Curve
plt.figure(figsize=(12, 6))
for model, y_pred, name in zip(model_instances, y_pred_list, models):
    if name in ['CNN', 'MobileNet']:
        fpr, tpr, _ = roc_curve(y_test, y_pred)
    else:
        fpr, tpr, _ = roc_curve(y_test, model.predict_proba(x_test_scaled)[:, 1] if hasattr(model, "predict_proba") else model.predict(x_test_scaled))
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

# Print comprehensive results
print("\n=== Comprehensive Results ===")
print(timing_results.to_string())

# Best model based on hold-out test accuracy
best_model = timing_results.loc[timing_results['Hold-Out Test Acc'].idxmax()]
print("\n=== Best Model ===")
print(best_model)

  df['GENDER'] = df['GENDER'].replace({'M':0, 'F':1})
  df['LUNG_CANCER'] = df['LUNG_CANCER'].replace({'YES':1, 'NO':0})


Dataset shape: (5871, 16)
Features shape: (5871, 15)
x_train: (4696, 15), x_test: (1175, 15), y_train: (4696,), y_test: (1175,)
x_train_scaled: (4696, 15), x_test_scaled: (1175, 15)
x_train_cnn: (4696, 4, 4, 1), x_test_cnn: (1175, 4, 4, 1)
x_train_mn: (4696, 224, 224, 3), x_test_mn: (1175, 224, 224, 3)

=== Training GaussianNB ===
Hold-out time: 0.0098s
K-Fold time: 2.4299s, mean: 0.9061
Stratified K-Fold time: 0.0523s, mean: 0.9061
LOOCV time: 40.0569s, mean: 0.9061

=== Training SVC ===
Hold-out time: 0.5599s
K-Fold time: 9.7953s, mean: 0.8738
Stratified K-Fold time: 9.7922s, mean: 0.8738
LOOCV time: 21.9747s, mean: 0.8738

=== Training Logistic Regression ===
Hold-out time: 0.0351s
K-Fold time: 0.3611s, mean: 0.9457
Stratified K-Fold time: 0.4027s, mean: 0.9443
LOOCV time: 348.1221s, mean: 0.9452

=== Training Decision Tree ===
Hold-out time: 0.0056s
K-Fold time: 0.1133s, mean: 0.9968
Stratified K-Fold time: 0.0601s, mean: 0.9968
LOOCV time: 51.7759s, mean: 0.9935

=== Training Rand

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9336 - loss: 0.1699
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Hold-out time: 11.6002s

=== Training MobileNet ===


#start

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc, accuracy_score
from sklearn.model_selection import train_test_split, StratifiedKFold, LeaveOneOut, KFold, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.applications import MobileNet
from sklearn.preprocessing import StandardScaler
from transformers import TFSwinModel

# Set random seeds for reproducibility
np.random.seed(32)
tf.random.set_seed(32)

# Load and preprocess data
def load_data(file_path):
    df = pd.read_csv(file_path)
    # Encode categorical variables
    df['GENDER'] = df['GENDER'].replace({'M': 0, 'F': 1})
    df['LUNG_CANCER'] = df['LUNG_CANCER'].replace({'YES': 1, 'NO': 0})
    X = df.drop('LUNG_CANCER', axis=1)
    y = df['LUNG_CANCER']
    print(f"Dataset shape: {df.shape}, Features shape: {X.shape}, Target shape: {y.shape}")
    return X, y

# Prepare data for training
def prepare_data(X, y, test_size=0.2):
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    print(f"X_train: {X_train.shape}, X_test: {X_test.shape}, y_train: {y_train.shape}, y_test: {y_test.shape}")

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Reshape for CNN: Assume 16 features (4x4x1)
    n_features = X_train_scaled.shape[1]
    if n_features != 16:
        print(f"Warning: Expected 16 features, got {n_features}. Adjusting...")
        if n_features < 16:
            X_train_scaled = np.pad(X_train_scaled, ((0, 0), (0, 16 - n_features)), mode='constant')
            X_test_scaled = np.pad(X_test_scaled, ((0, 0), (0, 16 - n_features)), mode='constant')
        else:
            X_train_scaled = X_train_scaled[:, :16]
            X_test_scaled = X_test_scaled[:, :16]
        n_features = 16

    X_train_cnn = X_train_scaled.reshape(-1, 4, 4, 1)
    X_test_cnn = X_test_scaled.reshape(-1, 4, 4, 1)
    print(f"X_train_cnn: {X_train_cnn.shape}, X_test_cnn: {X_test_cnn.shape}")

    # Reshape for MobileNet/Swin: Upscale to 224x224x3
    X_train_mn = np.repeat(X_train_cnn, 56, axis=1)  # 4 -> 224
    X_train_mn = np.repeat(X_train_mn, 56, axis=2)
    X_train_mn = np.repeat(X_train_mn, 3, axis=3)
    X_test_mn = np.repeat(X_test_cnn, 56, axis=1)
    X_test_mn = np.repeat(X_test_mn, 56, axis=2)
    X_test_mn = np.repeat(X_test_mn, 3, axis=3)
    print(f"X_train_mn: {X_train_mn.shape}, X_test_mn: {X_test_mn.shape}")

    return X_train_scaled, X_test_scaled, X_train_cnn, X_test_cnn, X_train_mn, X_test_mn, y_train, y_test

# CNN model
def train_cnn(X_train, y_train, X_test, y_test):
    model = Sequential([
        Conv2D(16, (2, 2), activation='relu', input_shape=(4, 4, 1)),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(32, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=10, batch_size=16, validation_data=(X_test, y_test), verbose=0)
    train_acc = history.history['accuracy'][-1]
    test_acc = model.evaluate(X_test, y_test, verbose=0)[1]
    y_pred = (model.predict(X_test, verbose=0) > 0.5).astype("int32").flatten()
    return model, train_acc, test_acc, y_pred

# MobileNet model
def train_mobilenet(X_train, y_train, X_test, y_test):
    base_model = MobileNet(weights=None, include_top=False, input_shape=(224, 224, 3))
    model = Sequential([
        base_model,
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=5, batch_size=16, validation_data=(X_test, y_test), verbose=0)
    train_acc = history.history['accuracy'][-1]
    test_acc = model.evaluate(X_test, y_test, verbose=0)[1]
    y_pred = (model.predict(X_test, verbose=0) > 0.5).astype("int32").flatten()
    return model, train_acc, test_acc, y_pred

# Swin Transformer model
def train_swin_transformer(X_train, y_train, X_test, y_test):
    try:
        base_model = TFSwinModel.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
        inputs = tf.keras.Input(shape=(224, 224, 3))
        x = base_model(inputs).pooler_output
        x = Dense(64, activation='relu')(x)
        x = Dropout(0.5)(x)
        outputs = Dense(1, activation='sigmoid')(x)
        model = tf.keras.Model(inputs, outputs)
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        history = model.fit(X_train, y_train, epochs=5, batch_size=16, validation_data=(X_test, y_test), verbose=0)
        train_acc = history.history['accuracy'][-1]
        test_acc = model.evaluate(X_test, y_test, verbose=0)[1]
        y_pred = (model.predict(X_test, verbose=0) > 0.5).astype("int32").flatten()
        return model, train_acc, test_acc, y_pred
    except Exception as e:
        print(f"Error training Swin Transformer: {e}")
        return None, 0, 0, np.zeros_like(y_test)

# Train and evaluate all models
def train_and_evaluate(X, y, X_train_scaled, X_test_scaled, X_train_cnn, X_test_cnn, X_train_mn, X_test_mn, y_train, y_test):
    models = [
        ('GaussianNB', GaussianNB()),
        ('SVC', SVC(probability=True)),
        ('Logistic Regression', LogisticRegression()),
        ('Decision Tree', DecisionTreeClassifier()),
        ('Random Forest', RandomForestClassifier()),
        ('Gradient Boosting', GradientBoostingClassifier()),
        ('XGBoost', XGBClassifier())
    ]
    model_names = [name for name, _ in models] + ['CNN', 'MobileNet', 'Swin Transformer']
    train_accuracies = []
    test_accuracies = []
    kf_means = []
    skf_means = []
    loo_means = []
    y_pred_list = []
    model_instances = []

    # Train traditional ML models
    for name, model in models:
        print(f"\nTraining {name}...")
        # Hold-Out
        model.fit(X_train_scaled, y_train)
        train_acc = model.score(X_train_scaled, y_train)
        test_acc = model.score(X_test_scaled, y_test)
        y_pred = model.predict(X_test_scaled)

        # Cross-validation
        kf_scores = cross_val_score(model, X, y, cv=KFold(n_splits=5), scoring='accuracy', n_jobs=-1)
        skf_scores = cross_val_score(model, X, y, cv=StratifiedKFold(n_splits=5), scoring='accuracy', n_jobs=-1)
        if name in ['SVC', 'Random Forest', 'Gradient Boosting', 'XGBoost']:
            loo_scores = cross_val_score(model, X, y, cv=KFold(n_splits=10), scoring='accuracy', n_jobs=-1)
        else:
            loo_scores = cross_val_score(model, X, y, cv=LeaveOneOut(), scoring='accuracy', n_jobs=-1)

        # Store results
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)
        kf_means.append(kf_scores.mean())
        skf_means.append(skf_scores.mean())
        loo_means.append(loo_scores.mean())
        y_pred_list.append(y_pred)
        model_instances.append(model)

        print(f"Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}")
        print(f"K-Fold Mean: {kf_scores.mean():.4f}")
        print(f"Stratified K-Fold Mean: {skf_scores.mean():.4f}")
        print(f"LOOCV Mean: {loo_scores.mean():.4f}")

    # Train CNN
    print("\nTraining CNN...")
    cnn_model, cnn_train_acc, cnn_test_acc, cnn_y_pred = train_cnn(X_train_cnn, y_train, X_test_cnn, y_test)
    train_accuracies.append(cnn_train_acc)
    test_accuracies.append(cnn_test_acc)
    kf_means.append(0)  # No CV for deep learning
    skf_means.append(0)
    loo_means.append(0)
    y_pred_list.append(cnn_y_pred)
    model_instances.append(cnn_model)
    print(f"Train Acc: {cnn_train_acc:.4f}, Test Acc: {cnn_test_acc:.4f}")

    # Train MobileNet
    print("\nTraining MobileNet...")
    mn_model, mn_train_acc, mn_test_acc, mn_y_pred = train_mobilenet(X_train_mn, y_train, X_test_mn, y_test)
    train_accuracies.append(mn_train_acc)
    test_accuracies.append(mn_test_acc)
    kf_means.append(0)
    skf_means.append(0)
    loo_means.append(0)
    y_pred_list.append(mn_y_pred)
    model_instances.append(mn_model)
    print(f"Train Acc: {mn_train_acc:.4f}, Test Acc: {mn_test_acc:.4f}")

    # Train Swin Transformer
    print("\nTraining Swin Transformer...")
    swin_model, swin_train_acc, swin_test_acc, swin_y_pred = train_swin_transformer(X_train_mn, y_train, X_test_mn, y_test)
    train_accuracies.append(swin_train_acc)
    test_accuracies.append(swin_test_acc)
    kf_means.append(0)
    skf_means.append(0)
    loo_means.append(0)
    y_pred_list.append(swin_y_pred)
    model_instances.append(swin_model)
    print(f"Train Acc: {swin_train_acc:.4f}, Test Acc: {swin_test_acc:.4f}")

    # Create results DataFrame
    results = pd.DataFrame({
        'Model': model_names,
        'Hold-Out Train Acc': train_accuracies,
        'Hold-Out Test Acc': test_accuracies,
        'K-Fold Mean Acc': kf_means,
        'Stratified K-Fold Mean Acc': skf_means,
        'LOOCV Mean Acc': loo_means
    })
    return results, model_instances, y_pred_list

# Plotting functions
def plot_accuracies(results):
    plt.figure(figsize=(14, 6))
    x = np.arange(len(results['Model']))
    width = 0.35
    plt.bar(x - width/2, results['Hold-Out Train Acc'], width, label='Training Accuracy')
    plt.bar(x + width/2, results['Hold-Out Test Acc'], width, label='Testing Accuracy')
    plt.ylabel('Accuracy')
    plt.title('Training and Testing Accuracies')
    plt.xticks(x, results['Model'], rotation=45)
    plt.legend()
    plt.ylim(0, 1)
    plt.tight_layout()
    plt.show()

def plot_accuracy_comparison(results):
    plt.figure(figsize=(14, 6))
    melted = results.melt(id_vars=['Model'],
                         value_vars=['Hold-Out Test Acc', 'K-Fold Mean Acc',
                                    'Stratified K-Fold Mean Acc', 'LOOCV Mean Acc'],
                         var_name='Method', value_name='Accuracy')
    sns.barplot(x='Model', y='Accuracy', hue='Method', data=melted)
    plt.title('Accuracy Comparison Across Methods')
    plt.ylabel('Accuracy')
    plt.xticks(rotation=45)
    plt.ylim(0, 1)
    plt.legend(title='Evaluation Method')
    plt.tight_layout()
    plt.show()

def plot_roc_curves(model_instances, y_pred_list, X_test_scaled, X_test_cnn, X_test_mn, y_test, model_names):
    plt.figure(figsize=(12, 6))
    for i, (model, y_pred, name) in enumerate(zip(model_instances, y_pred_list, model_names)):
        if name == 'CNN':
            fpr, tpr, _ = roc_curve(y_test, y_pred)
        elif name == 'MobileNet':
            fpr, tpr, _ = roc_curve(y_test, y_pred)
        elif name == 'Swin Transformer':
            fpr, tpr, _ = roc_curve(y_test, y_pred)
        else:
            if hasattr(model, "predict_proba"):
                y_scores = model.predict_proba(X_test_scaled)[:, 1]
            else:
                y_scores = model.predict(X_test_scaled)
            fpr, tpr, _ = roc_curve(y_test, y_scores)
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend(loc='lower right')
    plt.tight_layout()
    plt.show()

# Main execution
def main():
    # Load data
    file_path = "/content/lung cancer.csv"  # Update with actual path
    X, y = load_data(file_path)

    # Prepare data
    X_train_scaled, X_test_scaled, X_train_cnn, X_test_cnn, X_train_mn, X_test_mn, y_train, y_test = prepare_data(X, y)

    # Train and evaluate
    results, model_instances, y_pred_list = train_and_evaluate(
        X, y, X_train_scaled, X_test_scaled, X_train_cnn, X_test_cnn, X_train_mn, X_test_mn, y_train, y_test
    )

    # Plot results
    plot_accuracies(results)
    plot_accuracy_comparison(results)
    plot_roc_curves(model_instances, y_pred_list, X_test_scaled, X_test_cnn, X_test_mn, y_test, results['Model'])

    # Print results
    print("\n=== Comprehensive Results ===")
    print(results.to_string(index=False))

    # Best model
    best_model = results.loc[results['Hold-Out Test Acc'].idxmax()]
    print("\n=== Best Model ===")
    print(best_model)

if __name__ == "__main__":
    main()

  df['GENDER'] = df['GENDER'].replace({'M': 0, 'F': 1})
  df['LUNG_CANCER'] = df['LUNG_CANCER'].replace({'YES': 1, 'NO': 0})


Dataset shape: (5871, 16), Features shape: (5871, 15), Target shape: (5871,)
X_train: (4696, 15), X_test: (1175, 15), y_train: (4696,), y_test: (1175,)
X_train_cnn: (4696, 4, 4, 1), X_test_cnn: (1175, 4, 4, 1)
X_train_mn: (4696, 224, 224, 3), X_test_mn: (1175, 224, 224, 3)

Training GaussianNB...
Train Acc: 0.9069, Test Acc: 0.9030
K-Fold Mean: 0.9061
Stratified K-Fold Mean: 0.9061
LOOCV Mean: 0.9061

Training SVC...
Train Acc: 0.9847, Test Acc: 0.9804
K-Fold Mean: 0.8738
Stratified K-Fold Mean: 0.8738
LOOCV Mean: 0.8738

Training Logistic Regression...
Train Acc: 0.9461, Test Acc: 0.9404
K-Fold Mean: 0.9457
Stratified K-Fold Mean: 0.9443
LOOCV Mean: 0.9452

Training Decision Tree...
Train Acc: 0.9968, Test Acc: 0.9966
K-Fold Mean: 0.9968
Stratified K-Fold Mean: 0.9968
LOOCV Mean: 0.9935

Training Random Forest...
Train Acc: 0.9968, Test Acc: 0.9966
K-Fold Mean: 0.9968
Stratified K-Fold Mean: 0.9968
LOOCV Mean: 0.9968

Training Gradient Boosting...
Train Acc: 0.9842, Test Acc: 0.9821
K

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Train Acc: 0.9342, Test Acc: 0.9268

Training MobileNet...
