In [None]:
import numpy as np
import time

try:
    from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
    from sklearn.svm import SVC
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
    from sklearn.naive_bayes import GaussianNB
except ImportError as e:
    print(f"Error importing scikit-learn: {e}")
    exit()

try:
    from xgboost import XGBClassifier
except ImportError as e:
    print(f"Error importing XGBoost: {e}")
    # Optionally continue without XGBoost if it's not crucial

try:
    from catboost import CatBoostClassifier
except ImportError as e:
    print(f"Error importing CatBoost: {e}")
    # Optionally continue without CatBoost if it's not crucial

try:
    from imblearn.over_sampling import SMOTE
except ImportError as e:
    print(f"Error importing imbalanced-learn: {e}")
    exit()

# Load features and labels
features_path = 'D://SEM-//\ML//CODES//Machine-Learning//Lab04//extracted_features.npy'
labels_path = 'D://SEM-4//ML//CODES//Machine-Learning//Lab04//labels.npy'

features = np.load(features_path)
labels = np.load(labels_path)

# Reshape features from 4D (n_samples, height, width, channels) to 2D (n_samples, height*width*channels)
features = features.reshape(features.shape[0], -1)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Define parameter grids
param_grid_svm = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 0.1, 0.01],
    'kernel': ['rbf', 'linear']
}

param_grid_rf = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'max_features': ['auto', 'sqrt']
}

param_grid_adaboost = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1]
}

param_grid_dt = {
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 10]
}

param_grid_nb = {}  # GaussianNB doesn't have many parameters to tune

param_grid_xgb = {
    'n_estimators': [100, 200],
    'max_depth': [6, 10],
    'learning_rate': [0.01, 0.1]
}

param_grid_catboost = {
    'iterations': [100, 200],
    'learning_rate': [0.01, 0.1],
    'depth': [4, 6, 10]
}

# Setup cross-validation strategy
cv_strategy = StratifiedKFold(n_splits=5)

# Dictionary of classifiers
classifiers = {
    "CatBoost": GridSearchCV(CatBoostClassifier(verbose=0), param_grid_catboost, cv=cv_strategy, scoring='accuracy') if 'CatBoostClassifier' in globals() else None,
    "XGBoost": GridSearchCV(XGBClassifier(), param_grid_xgb, cv=cv_strategy, scoring='accuracy') if 'XGBClassifier' in globals() else None,
    "SVM": GridSearchCV(SVC(), param_grid_svm, cv=cv_strategy, scoring='accuracy'),
    "Random Forest": GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=cv_strategy, scoring='accuracy'),
    "AdaBoost": GridSearchCV(AdaBoostClassifier(), param_grid_adaboost, cv=cv_strategy, scoring='accuracy'),
    "Decision Tree": GridSearchCV(DecisionTreeClassifier(), param_grid_dt, cv=cv_strategy, scoring='accuracy'),
    "Naive Bayes": GaussianNB() if not param_grid_nb else GridSearchCV(GaussianNB(), param_grid_nb, cv=cv_strategy, scoring='accuracy')
}

# Results dictionary
results = {}

for name, clf in classifiers.items():
    if clf is None:
        print(f"{name} is not available due to an import error.")
        continue
    # Start timer for training
    start_time_train = time.time()
    
    # Train the classifier with SMOTE-applied training data
    clf.fit(X_train_smote, y_train_smote)
    
    # End timer for training
    end_time_train = time.time()
    
    # Calculate training time
    training_time = end_time_train - start_time_train
    
    # Start timer for prediction
    start_time_pred = time.time()
    
    # Predict the responses for the test dataset
    y_pred = clf.predict(X_test)
    
    # End timer for prediction
    end_time_pred = time.time()
    
    # Calculate prediction time
    prediction_time = end_time_pred - start_time_pred
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    conf_matrix = confusion_matrix(y_test, y_pred)
    
    # Print best parameters (if clf uses GridSearchCV)
    best_params = clf.best_params_ if isinstance(clf, GridSearchCV) else "N/A"
    
    # Store results
    results[name] = (accuracy, precision, recall, f1, conf_matrix, training_time, prediction_time, best_params)

# Print all results
for name, metrics in results.items():
    if metrics:
        print(f"{name} Performance Metrics:")
        print(f"Accuracy: {metrics[0]:.2f}")
        print(f"Precision: {metrics[1]:.2f}")
        print(f"Recall: {metrics[2]:.2f}")
        print(f"F1 Score: {metrics[3]:.2f}")
        print("Confusion Matrix:")
        print(metrics[4])
        print(f"Training Time: {metrics[5]:.4f} seconds")
        print(f"Prediction Time: {metrics[6]:.4f} seconds")
        print(f"Best Parameters: {metrics[7]}")
        print("\n")

In [None]:
import numpy as np
import time

try:
    from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
    from sklearn.svm import SVC
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
    from sklearn.naive_bayes import GaussianNB
except ImportError as e:
    print(f"Error importing scikit-learn: {e}")
    exit()

try:
    from xgboost import XGBClassifier
except ImportError as e:
    print(f"Error importing XGBoost: {e}")
    # Optionally continue without XGBoost if it's not crucial

try:
    from catboost import CatBoostClassifier
except ImportError as e:
    print(f"Error importing CatBoost: {e}")
    # Optionally continue without CatBoost if it's not crucial

# Load features and labels
features_path = 'D://SEM-4//ML//CODES//Machine-Learning//Lab04//extracted_features.npy'
labels_path = 'D://SEM-4//ML//CODES//Machine-Learning//Lab04//labels.npy'

features = np.load(features_path)
labels = np.load(labels_path)

# Reshape features from 4D (n_samples, height, width, channels) to 2D (n_samples, height*width*channels)
features = features.reshape(features.shape[0], -1)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Define parameter grids
param_grid_svm = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 0.1, 0.01],
    'kernel': ['rbf', 'linear']
}

param_grid_rf = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'max_features': ['auto', 'sqrt']
}

param_grid_adaboost = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1]
}

param_grid_dt = {
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 10]
}

param_grid_nb = {}  # GaussianNB doesn't have many parameters to tune

param_grid_xgb = {
    'n_estimators': [100, 200],
    'max_depth': [6, 10],
    'learning_rate': [0.01, 0.1]
}

param_grid_catboost = {
    'iterations': [100, 200],
    'learning_rate': [0.01, 0.1],
    'depth': [4, 6, 10]
}

# Setup cross-validation strategy
cv_strategy = StratifiedKFold(n_splits=5)

# Dictionary of classifiers
classifiers = {
    "CatBoost": GridSearchCV(CatBoostClassifier(verbose=0), param_grid_catboost, cv=cv_strategy, scoring='accuracy') if 'CatBoostClassifier' in globals() else None,
    "XGBoost": GridSearchCV(XGBClassifier(), param_grid_xgb, cv=cv_strategy, scoring='accuracy') if 'XGBClassifier' in globals() else None,
    "SVM": GridSearchCV(SVC(), param_grid_svm, cv=cv_strategy, scoring='accuracy'),
    "Random Forest": GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=cv_strategy, scoring='accuracy'),
    "AdaBoost": GridSearchCV(AdaBoostClassifier(), param_grid_adaboost, cv=cv_strategy, scoring='accuracy'),
    "Decision Tree": GridSearchCV(DecisionTreeClassifier(), param_grid_dt, cv=cv_strategy, scoring='accuracy'),
    "Naive Bayes": GaussianNB() if not param_grid_nb else GridSearchCV(GaussianNB(), param_grid_nb, cv=cv_strategy, scoring='accuracy')
}

# Results dictionary
results = {}

for name, clf in classifiers.items():
    if clf is None:
        print(f"{name} is not available due to an import error.")
        continue
    # Start timer for training
    start_time_train = time.time()
    
    # Train the classifier with training data
    clf.fit(X_train, y_train)
    
    # End timer for training
    end_time_train = time.time()
    
    # Calculate training time
    training_time = end_time_train - start_time_train
    
    # Start timer for prediction
    start_time_pred = time.time()
    
    # Predict the responses for the test dataset
    y_pred = clf.predict(X_test)
    
    # End timer for prediction
    end_time_pred = time.time()
    
    # Calculate prediction time
    prediction_time = end_time_pred - start_time_pred
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    conf_matrix = confusion_matrix(y_test, y_pred)
    
    # Print best parameters (if clf uses GridSearchCV)
    best_params = clf.best_params_ if isinstance(clf, GridSearchCV) else "N/A"
    
    # Store results
    results[name] = (accuracy, precision, recall, f1, conf_matrix, training_time, prediction_time, best_params)

# Print all results
for name, metrics in results.items():
    if metrics:
        print(f"{name} Performance Metrics:")
        print(f"Accuracy: {metrics[0]:.2f}")
        print(f"Precision: {metrics[1]:.2f}")
        print(f"Recall: {metrics[2]:.2f}")
        print(f"F1 Score: {metrics[3]:.2f}")
        print("Confusion Matrix:")
        print(metrics[4])
        print(f"Training Time: {metrics[5]:.4f} seconds")
        print(f"Prediction Time: {metrics[6]:.4f} seconds")
        print(f"Best Parameters: {metrics[7]}")
        print("\n")