In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

# Paths to the features and labels files
features_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Lab04\extracted_features.npy'
labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Lab04\labels.npy'

# Load features and labels
features = np.load(features_path)
labels = np.load(labels_path)

# Reshape features from 4D (n_samples, height, width, channels) to 2D (n_samples, height*width*channels)
features = features.reshape(features.shape[0], -1)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Dictionary of classifiers
classifiers = {
    "CatBoost": CatBoostClassifier(verbose=0),
    "XGBoost": XGBClassifier(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB()
}

# Results dictionary
results = {}

for name, clf in classifiers.items():
    # Train the classifier
    clf.fit(X_train, y_train)
    # Predict the responses for the test dataset
    y_pred = clf.predict(X_test)
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    conf_matrix = confusion_matrix(y_test, y_pred)
    # Store results
    results[name] = (accuracy, precision, recall, f1, conf_matrix)

# Print all results
for name, metrics in results.items():
    print(f"{name} Performance Metrics:")
    print(f"Accuracy: {metrics[0]:.2f}")
    print(f"Precision: {metrics[1]:.2f}")
    print(f"Recall: {metrics[2]:.2f}")
    print(f"F1 Score: {metrics[3]:.2f}")
    print("Confusion Matrix:")
    print(metrics[4])
    print("\n")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


CatBoost Performance Metrics:
Accuracy: 0.52
Precision: 0.53
Recall: 0.52
F1 Score: 0.52
Confusion Matrix:
[[ 108    5   54   10    2    6    3   65   66   26   56    6    1]
 [   7  513   18   49   56   21   13   60  109    8   61    4   17]
 [   9   18  936   16    7   31    5  148  196   73  180    6    3]
 [   4   51   30  326   34   18   19   56  134    7   60   11   10]
 [   1   67   10   30  418    6   14   36   62    0   26    1    4]
 [  15   30   94    8    5  474    2   39   76   56  100    0    2]
 [   1   26    8   46   14    5  149   43   60    2   30    3    3]
 [   9   20  165   25    7   19   15  901  109   21  286   12    7]
 [  16   61  226   56   21   32   12   93 1208   23   77    7   30]
 [  13    8  179   10    1   23    2   52   51  293  103    0    3]
 [  19   34  162   21   16   58    8  308  200   76 1123   11    9]
 [   7   26   32   47    8   17    2  112   79    6   95   43    3]
 [   0   13    4   24   11    6    6   10   62    2   11    1  184]]


XGBoos

In [2]:
import numpy as np
from sklearn.decomposition import PCA

# Paths to the features and labels files
features_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Lab04\extracted_features.npy'
labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Lab04\labels.npy'# Load features and labels
features = np.load(features_path)
labels = np.load(labels_path)

# Reshape features from 4D (n_samples, height, width, channels) to 2D (n_samples, height*width*channels)
features = features.reshape(features.shape[0], -1)

# Applying PCA to capture 99% of the variance
pca = PCA(0.99)
features_pca = pca.fit_transform(features)

# Saving the reduced features to a new file
reduced_features_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Reduced Features\Features_reduced.npy'
np.save(reduced_features_path, features_pca)

# Optionally, save the labels if you need to keep them aligned with the reduced features for later use
reduced_labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Reduced Features\Labels_reduced.npy'
np.save(reduced_labels_path, labels)

# Number of components selected
n_components = pca.n_components_
print(f"Number of principal components selected to explain at least 99% of the variance: {n_components}")

Number of principal components selected to explain at least 99% of the variance: 270


In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

# Paths to the features and labels files
features_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Reduced Features\Features_reduced.npy'
labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Reduced Features\Labels_reduced.npy'

# Load features and labels
features = np.load(features_path)
labels = np.load(labels_path)

# Reshape features from 4D (n_samples, height, width, channels) to 2D (n_samples, height*width*channels)
features = features.reshape(features.shape[0], -1)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Dictionary of classifiers
classifiers = {
    "CatBoost": CatBoostClassifier(verbose=0),
    "XGBoost": XGBClassifier(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB()
}

# Results dictionary
results = {}

for name, clf in classifiers.items():
    # Train the classifier
    clf.fit(X_train, y_train)
    # Predict the responses for the test dataset
    y_pred = clf.predict(X_test)
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    conf_matrix = confusion_matrix(y_test, y_pred)
    # Store results
    results[name] = (accuracy, precision, recall, f1, conf_matrix)

# Print all results
for name, metrics in results.items():
    print(f"{name} Performance Metrics:")
    print(f"Accuracy: {metrics[0]:.2f}")
    print(f"Precision: {metrics[1]:.2f}")
    print(f"Recall: {metrics[2]:.2f}")
    print(f"F1 Score: {metrics[3]:.2f}")
    print("Confusion Matrix:")
    print(metrics[4])
    print("\n")

CatBoost Performance Metrics:
Accuracy: 0.50
Precision: 0.51
Recall: 0.50
F1 Score: 0.50
Confusion Matrix:
[[  99    6   55    9    2    6    5   68   68   31   55    3    1]
 [   2  504   20   43   70   33   15   54  108    4   76    0    7]
 [  10   18  880   12    9   32    6  149  224   63  218    4    3]
 [   2   49   30  312   34   22   14   56  166    4   52    9   10]
 [   1   78   12   34  393   10    8   42   71    0   21    1    4]
 [  16   46   94    7    2  477    2   47   75   48   84    2    1]
 [   3   35   15   44   12    5  137   36   62    3   35    1    2]
 [  12   17  138   30    3   39    6  903  123   24  293    5    3]
 [  11   65  235   70   24   22    9   92 1166   24   99   10   35]
 [  19   10  165    4    0   25    5   48   74  271  115    2    0]
 [  16   36  157   33   13   58    9  310  222   60 1119    9    3]
 [   4   22   34   45    9   16    5  116   86    7  107   23    3]
 [   0   19    7   25    7    8    8   14   74    2   16    3  151]]


XGBoos

In [4]:
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load features and labels
features_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Lab04\extracted_features.npy'
labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Lab04\labels.npy'
features = np.load(features_path)
labels = np.load(labels_path)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Initialize SMOTE object
smote = SMOTE(random_state=42)

# Apply SMOTE to the training data
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# For example, training a RandomForest Classifier to see the effect of SMOTE
classifier = RandomForestClassifier(random_state=42)
classifier.fit(X_train_smote, y_train_smote)

# Predict on the test set
y_pred = classifier.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.26      0.39      0.31       408
           1       0.50      0.55      0.52       936
           2       0.43      0.49      0.46      1628
           3       0.34      0.39      0.36       760
           4       0.58      0.58      0.58       675
           5       0.57      0.54      0.55       901
           6       0.43      0.42      0.42       390
           7       0.43      0.48      0.45      1596
           8       0.49      0.41      0.45      1862
           9       0.37      0.42      0.39       738
          10       0.55      0.41      0.47      2045
          11       0.17      0.10      0.13       477
          12       0.46      0.60      0.52       334

    accuracy                           0.45     12750
   macro avg       0.43      0.44      0.43     12750
weighted avg       0.46      0.45      0.45     12750



In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load features and labels
features_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Reduced 512\Features_reduced.npy'
labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Reduced 512\Labels_reduced.npy'
features = np.load(features_path)
labels = np.load(labels_path)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Initialize SMOTE object
smote = SMOTE(random_state=42)

# Apply SMOTE to the training data
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# For example, training a RandomForest Classifier to see the effect of SMOTE
classifier = RandomForestClassifier(random_state=42)
classifier.fit(X_train_smote, y_train_smote)

# Predict on the test set
y_pred = classifier.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.28      0.38      0.32       408
           1       0.45      0.51      0.48       936
           2       0.41      0.47      0.44      1628
           3       0.34      0.39      0.36       760
           4       0.50      0.55      0.52       675
           5       0.58      0.52      0.55       901
           6       0.40      0.40      0.40       390
           7       0.40      0.46      0.43      1596
           8       0.44      0.37      0.40      1862
           9       0.36      0.37      0.36       738
          10       0.52      0.41      0.46      2045
          11       0.17      0.10      0.13       477
          12       0.50      0.57      0.53       334

    accuracy                           0.43     12750
   macro avg       0.41      0.42      0.41     12750
weighted avg       0.43      0.43      0.43     12750

