In [1]:
import numpy as np
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input

# Directory where your images are located
dataset_dir = r"D:\SEM-4\PROJECTS\ML\Archive"

# Initialize VGG16 model, excluding the top (fully connected) layers
model = VGG16(include_top=False, input_shape=(224, 224, 3))

# Create an instance of the ImageDataGenerator for loading images
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Create a data generator for reading images from directories
# Be sure to set the target size to 224x224 to match VGG16 input size
generator = datagen.flow_from_directory(
    dataset_dir,
    target_size=(224, 224),
    batch_size=32,  # Adjust based on your GPU memory
    class_mode='sparse',  # 'sparse' yields integer labels
    shuffle=False  # Important for keeping labels in order
)

# Number of images and labels
num_images = generator.samples
print("Number of images found:", num_images)
num_classes = generator.num_classes
print("Number of classes found:", num_classes)
# Extract features
features = model.predict(generator, steps=np.ceil(num_images/32), verbose=1)

# Get the labels (ensure they are in the same order as the images)
labels = generator.classes


# Saving features and labels to .npy files
np.save('features.npy', features)
np.save('labels.npy', labels)






Found 42500 images belonging to 13 classes.
Number of images found: 42500
Number of classes found: 13
   2/1329 [..............................] - ETA: 35:34



  23/1329 [..............................] - ETA: 1:05:05









In [2]:
import numpy as np
features = np.load(r'D:\SEM-4\ML\CODES\Machine-Learning\features.npy')
size = np.shape(features)
print(size)

labels = np.load(r'D:\SEM-4\ML\CODES\Machine-Learning\labels.npy')
size1 = np.shape(labels)
print(size1)

(42500, 7, 7, 512)
(42500,)


In [1]:
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

# Assuming features and labels are already loaded from the .npy files
features = np.load('D://SEM-4//ML//CODES//Machine-Learning//features.npy')
labels = np.load('D:\SEM-4\ML\CODES\Machine-Learning\labels.npy')

# Reshape features for kNN
features = features.reshape(features.shape[0], -1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Define the parameter grid: number of neighbors
param_grid = {'n_neighbors': [3, 5, 7, 9, 11, 13, 15]}

# Initialize a kNN classifier
knn = KNeighborsClassifier()

# Initialize the GridSearchCV object
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')

# Fit it to the data
grid_search.fit(X_train, y_train)

# Print the best parameters and the best score
print("Best parameters found: ", grid_search.best_params_)
print("Best accuracy found: ", grid_search.best_score_)

# Evaluate on the test set with the best parameters
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)

# Print the classification report
print(classification_report(y_test, y_pred))


Best parameters found:  {'n_neighbors': 11}
Best accuracy found:  0.25888235294117645
              precision    recall  f1-score   support

           0       0.05      0.26      0.08       276
           1       0.36      0.15      0.21       646
           2       0.20      0.38      0.26      1081
           3       0.31      0.05      0.08       506
           4       0.52      0.06      0.10       448
           5       0.50      0.25      0.33       594
           6       1.00      0.03      0.06       269
           7       0.32      0.53      0.40      1088
           8       0.37      0.25      0.30      1204
           9       0.34      0.36      0.35       498
          10       0.41      0.33      0.36      1352
          11       0.10      0.01      0.02       318
          12       0.57      0.10      0.18       220

    accuracy                           0.27      8500
   macro avg       0.39      0.21      0.21      8500
weighted avg       0.36      0.27      0.26     

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

# Paths to the features and labels files
features_path = 'D://SEM-4//ML//CODES//Machine-Learning//features.npy'
labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\labels.npy'

# Load features and labels
features = np.load(features_path)
labels = np.load(labels_path)

# Reshape features from 4D (n_samples, height, width, channels) to 2D (n_samples, height*width*channels)
features = features.reshape(features.shape[0], -1)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Dictionary of classifiers
classifiers = {
    "CatBoost": CatBoostClassifier(verbose=0),
    "XGBoost": XGBClassifier(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB()
}

# Results dictionary
results = {}

for name, clf in classifiers.items():
    # Train the classifier
    clf.fit(X_train, y_train)
    # Predict the responses for the test dataset
    y_pred = clf.predict(X_test)
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    conf_matrix = confusion_matrix(y_test, y_pred)
    # Store results
    results[name] = (accuracy, precision, recall, f1, conf_matrix)

# Print all results
for name, metrics in results.items():
    print(f"{name} Performance Metrics:")
    print(f"Accuracy: {metrics[0]:.2f}")
    print(f"Precision: {metrics[1]:.2f}")
    print(f"Recall: {metrics[2]:.2f}")
    print(f"F1 Score: {metrics[3]:.2f}")
    print("Confusion Matrix:")
    print(metrics[4])
    print("\n")


KeyboardInterrupt: 

In [None]:
import numpy as np
from sklearn.decomposition import PCA

# Paths to the features and labels files
features_path = 'D://SEM-4//ML//CODES//Machine-Learning//features.npy'
labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\labels.npy'

# Load features and labels
features = np.load(features_path)
labels = np.load(labels_path)

# Reshape features from 4D (n_samples, height, width, channels) to 2D (n_samples, height*width*channels)
features = features.reshape(features.shape[0], -1)

# Applying PCA to capture 99% of the variance
pca = PCA(0.99)
features_pca = pca.fit_transform(features)

# Saving the reduced features to a new file
reduced_features_path = 'E:\\College\\SEMESTER 4\\MACHINE LEARNING\\LAB\\reduced_features.npy'
np.save(reduced_features_path, features_pca)

# Optionally, save the labels if you need to keep them aligned with the reduced features for later use
reduced_labels_path = 'E:\\College\\SEMESTER 4\\MACHINE LEARNING\\LAB\\reduced_labels.npy'
np.save(reduced_labels_path, labels)

# Number of components selected
n_components = pca.n_components_
print(f"Number of principal components selected to explain at least 99% of the variance: {n_components}")


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

# Paths to the features and labels files
features_path = 'D://SEM-4//ML//CODES//Machine-Learning//Reduced//reduced_features.npy'
labels_path = 'D://SEM-4//ML//CODES//Machine-Learning//Reduced//reduced_labels.npy'

# Load features and labels
features = np.load(features_path)
labels = np.load(labels_path)

# Reshape features from 4D (n_samples, height, width, channels) to 2D (n_samples, height*width*channels)
features = features.reshape(features.shape[0], -1)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Dictionary of classifiers
classifiers = {
    "CatBoost": CatBoostClassifier(verbose=0),
    "XGBoost": XGBClassifier(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB()
}

# Results dictionary
results = {}

for name, clf in classifiers.items():
    # Train the classifier
    clf.fit(X_train, y_train)
    # Predict the responses for the test dataset
    y_pred = clf.predict(X_test)
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    conf_matrix = confusion_matrix(y_test, y_pred)
    # Store results
    results[name] = (accuracy, precision, recall, f1, conf_matrix)

# Print all results
for name, metrics in results.items():
    print(f"{name} Performance Metrics:")
    print(f"Accuracy: {metrics[0]:.2f}")
    print(f"Precision: {metrics[1]:.2f}")
    print(f"Recall: {metrics[2]:.2f}")
    print(f"F1 Score: {metrics[3]:.2f}")
    print("Confusion Matrix:")
    print(metrics[4])
    print("\n")