In [1]:
import os
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
from PIL import Image
import torch.nn as nn
import torchvision.models as models
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix, roc_curve, roc_auc_score, auc
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.decomposition import PCA
import seaborn as sns
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier


In [2]:
import torch
from torchvision import transforms
from torchvision.datasets import OxfordIIITPet
from torch.utils.data import DataLoader, random_split

# transforms.Compose: Composes several transforms together. Here, it resizes images to (224, 224) and converts them to PyTorch tensors.
# Define the data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Download and load the Oxford-IIIT Pet dataset
dataset = OxfordIIITPet(root='./', transform=transform, download=True)

# Split the dataset into train and test sets (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Define the data loader with different batches
batch_size_train = 64
batch_size_test = 16

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size_test, shuffle=False, num_workers=4)

# Check the length of train and test loaders
print("Number of batches in train loader:", len(train_loader))
print("Number of batches in test loader:", len(test_loader))


Downloading https://thor.robots.ox.ac.uk/datasets/pets/images.tar.gz to oxford-iiit-pet/images.tar.gz


100%|██████████| 791918971/791918971 [00:37<00:00, 21248457.75it/s]


Extracting oxford-iiit-pet/images.tar.gz to oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/datasets/pets/annotations.tar.gz to oxford-iiit-pet/annotations.tar.gz


100%|██████████| 19173078/19173078 [00:01<00:00, 10695350.19it/s]


Extracting oxford-iiit-pet/annotations.tar.gz to oxford-iiit-pet
Number of batches in train loader: 46
Number of batches in test loader: 46




The code:

```python
train_loader = DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size_test, shuffle=False, num_workers=4)
```

creates data loaders for the training and testing sets using PyTorch's `DataLoader` class. Here's what each parameter does:

- `train_dataset` and `test_dataset`: These are the training and testing datasets, respectively.

- `batch_size`: Specifies the number of samples in each batch during training or testing. `batch_size_train` and `batch_size_test` are the values defined earlier.

- `shuffle`: When set to `True`, the DataLoader shuffles the data at the beginning of each epoch during training. Shuffling helps in preventing the model from learning the order of examples and improves generalization. For testing (`shuffle=False`), the data is not shuffled to ensure consistent evaluation.

- `num_workers`: Specifies the number of worker processes to use for data loading. It allows loading data in parallel using multiple processes, which can speed up the data loading process. In this case, `num_workers=4` means that four parallel worker processes will be used to load the data.

So, this code creates two data loaders (`train_loader` and `test_loader`) with the specified batch sizes and settings, which can be used to iterate over batches of data during the training and testing phases of a deep learning model.

In [3]:
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.datasets import OxfordIIITPet
from torch.utils.data import DataLoader, random_split
from sklearn import svm, metrics
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import torchvision.models as models

# Define the data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Download and load the Oxford-IIIT Pet dataset
dataset = OxfordIIITPet(root='./', transform=transform, download=True)

# Split the dataset into train and test sets (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Define the data loader with different batches
batch_size_train = 64
batch_size_test = 16

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size_test, shuffle=False, num_workers=4)

# Check the length of train and test loaders
print("Number of batches in train loader:", len(train_loader))
print("Number of batches in test loader:", len(test_loader))


Number of batches in train loader: 46
Number of batches in test loader: 46


This code defines a custom dataset class named CustomDataset using PyTorch's torch.utils.data.Dataset.
The custom dataset (`CustomDataset`) is designed for a machine learning task involving images of cats and dogs with different breeds. This dataset is implemented as a custom class in PyTorch, which extends the `torch.utils.data.Dataset` class. The purpose of this dataset is to provide an organized and customizable interface for loading and processing image data for machine learning tasks.

Here are the key characteristics and functionalities of the `CustomDataset`:

1. **Initialization:**
   - The dataset is initialized with the root directory where the images are stored (`root_dir`) and an optional transformation for image preprocessing (`transform`).

2. **Labeling:**
   - The dataset automatically extracts labels (breeds) from the image filenames. It determines if an image represents a cat or a dog based on the first letter of the filename and extracts the breed accordingly.

3. **Mapping Classes to Indices:**
   - The dataset creates a mapping from class names (breeds) to numerical indices. This mapping is useful for machine learning models, as they typically require numerical labels.

4. **Length of Dataset:**
   - The `__len__` method is implemented to return the total number of images in the dataset.

5. **Retrieving Samples:**
   - The `__getitem__` method is implemented to retrieve and process a single sample at a given index. It loads the image, applies the specified transformation, determines the label, and returns the processed image and label.



 loading the train_dataset from a file, displaying an image from the dataset, printing its corresponding label, and checking the classes present in the dataset.
Uses matplotlib.pyplot.imshow to display the image at index 1 in the dataset. The .permute(1, 2, 0) rearranges the dimensions of the tensor to match the expected order for displaying images.

In [4]:
resnet18 = models.resnet18(pretrained=True)
# Remove the fully connected layers from the model
resnet18 = nn.Sequential(*list(resnet18.children())[:-2])

class ResNet18Features(nn.Module):
    def __init__(self):
        super(ResNet18Features, self).__init__()
        self.resnet18 = resnet18

    def forward(self, x):
        return self.resnet18(x)

# Create instances of ResNet18Features
model_avgpool = nn.Sequential(*list(resnet18.children())[:-2])
model_block1 = nn.Sequential(*list(resnet18.children())[:-6])
model_block3 = nn.Sequential(*list(resnet18.children())[:-4])


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 72.9MB/s]


In this code segment, you modify the pre-trained ResNet-18 model by removing the fully connected layers, and then define a new neural network model (`ResNet18Features`) based on this modified ResNet-18. Additionally, three instances of this model are created with different names (`model_avgpool`, `model_block1`, and `model_block3`). Here's a detailed explanation:

1. **Remove Fully Connected Layers from ResNet-18:**
   ```python
   resnet18 = nn.Sequential(*list(resnet18.children())[:-2])
   ```

   - This line creates a new `nn.Sequential` model by excluding the last two layers (fully connected layers) from the pre-trained ResNet-18 model. It effectively removes the classification layers, leaving only the feature extraction part.

2. **Define `ResNet18Features` Model:**
   ```python
   class ResNet18Features(nn.Module):
       def __init__(self):
           super(ResNet18Features, self).__init__()
           self.resnet18 = resnet18

       def forward(self, x):
           return self.resnet18(x)
   ```

   - Defines a new neural network model `ResNet18Features` that inherits from `nn.Module`. The `forward` method simply passes the input through the modified ResNet-18 model.

3. **Create Instances of `ResNet18Features`:**
   ```python
   model_avgpool = ResNet18Features()
   model_block1 = ResNet18Features()
   model_block3 = ResNet18Features()
   ```

   - Three instances of the `ResNet18Features` model are created with different names (`model_avgpool`, `model_block1`, and `model_block3`). These instances can be used independently, each representing a modified ResNet-18 model without the fully connected layers.

Overall, this code prepares modified ResNet-18 models for feature extraction. Depending on your task, you might use these models to extract features from different layers of the ResNet-18 architecture. For example, `model_avgpool` may be used to extract features after the average pooling layer, while `model_block1` and `model_block3` may be used to extract features after the first and third residual blocks, respectively.

In [5]:
# Extract features from average pooling, block 1, and block 3
import numpy as np
def extract_features(model, loader):
    features = []
    labels = []

    model.eval()
    with torch.no_grad():
        for images, targets in loader:
            outputs = model(images)
            features.append(outputs.squeeze().cpu().numpy())
            labels.append(targets.cpu().numpy())

    features = np.vstack(features)
    labels = np.concatenate(labels)
    return features, labels

This code defines a function `extract_features` that takes a model and a data loader as input and extracts features from the specified model. The features are extracted for each batch in the data loader, and the final features and corresponding labels are returned as NumPy arrays. Here's a breakdown of the code:

```python
import numpy as np

def extract_features(model, loader):
    features = []
    labels = []

    model.eval()
    with torch.no_grad():
        for images, targets in loader:
            outputs = model(images)
            features.append(outputs.squeeze().cpu().numpy())
            labels.append(targets.cpu().numpy())

    features = np.vstack(features)
    labels = np.concatenate(labels)
    return features, labels
```

- `model`: The neural network model from which features are to be extracted.
- `loader`: The data loader containing the images and labels for which features need to be extracted.

1. **Initialize Empty Lists:**
   ```python
   features = []
   labels = []
   ```

   - Initialize empty lists to store the extracted features and corresponding labels.

2. **Set Model to Evaluation Mode:**
   ```python
   model.eval()
   ```

   - Sets the model to evaluation mode. This is important because certain layers, such as dropout or batch normalization, behave differently during training and evaluation.

3. **Iterate Through DataLoader Batches:**
   ```python
   with torch.no_grad():
       for images, targets in loader:
           outputs = model(images)
           features.append(outputs.squeeze().cpu().numpy())
           labels.append(targets.cpu().numpy())
   ```

   - Iterates through batches in the data loader. For each batch, it computes the model outputs and appends the features and labels to the respective lists. `torch.no_grad()` is used to disable gradient computation during inference, reducing memory usage.

4. **Stack Features and Concatenate Labels:**
   ```python
   features = np.vstack(features)
   labels = np.concatenate(labels)
   ```

   - Stacks the feature arrays vertically (along the first axis) to form the final feature array. Labels are concatenated along the first axis to form the final label array.

5. **Return Extracted Features and Labels:**
   ```python
   return features, labels
   ```

   - Returns the extracted features and labels as NumPy arrays.


In [6]:
# Extract features from average pooling
avgpool_features_train, avgpool_labels_train = extract_features(model_avgpool, train_loader)
avgpool_features_val, avgpool_labels_val = extract_features(model_avgpool, test_loader)



In [None]:
# Extract features from block 1
block1_features_train, block1_labels_train = extract_features(model_block1, train_loader)
block1_features_val, block1_labels_val = extract_features(model_block1, test_loader)



In [None]:
# Extract features from block 3
block3_features_train, block3_labels_train = extract_features(model_block3, train_loader)
block3_features_val, block3_labels_val = extract_features(model_block3, test_loader)

Average Pooling Layer:

Features are extracted from the model (model_avgpool) after the average pooling layer for both the training and validation sets. The extracted features and labels are stored in avgpool_features_train, avgpool_labels_train, avgpool_features_val, and avgpool_labels_val.
Block 1:

Features are extracted from the model (model_block1) after the first residual block for both the training and validation sets. The extracted features and labels are stored in block1_features_train, block1_labels_train, block1_features_val, and block1_labels_val.
Block 3:

Features are extracted from the model (model_block3) after the third residual block for both the training and validation sets. The extracted features and labels are stored in block3_features_train, block3_labels_train, block3_features_val, and block3_labels_val.
These extracted features can be used as input to train a different classifier, such as a Support Vector Machine (SVM), or for any other downstream task. The separation of features from different layers allows for experimentation with different levels of abstraction in the feature representations.

In [None]:
# Concatenate features from different blocks
train_features = np.concatenate([avgpool_features_train, block1_features_train, block3_features_train], axis=1)
val_features = np.concatenate([avgpool_features_val, block1_features_val, block3_features_val], axis=1)

# Flatten the features
train_features_flat = train_features.reshape(train_features.shape[0], -1)
val_features_flat = val_features.reshape(val_features.shape[0], -1)

In this code, features extracted from different layers (average pooling, block 1, and block 3) are concatenated horizontally and then flattened. This is often done to create a single feature vector for each sample that can be used as input to a downstream classifier or model. Here's the breakdown:

```python
# Concatenate features from different blocks
train_features = np.concatenate([avgpool_features_train, block1_features_train, block3_features_train], axis=1)
val_features = np.concatenate([avgpool_features_val, block1_features_val, block3_features_val], axis=1)

# Flatten the features
train_features_flat = train_features.reshape(train_features.shape[0], -1)
val_features_flat = val_features.reshape(val_features.shape[0], -1)
```

- **Concatenate Features:**
  - The features extracted from different blocks (`avgpool_features_train`, `block1_features_train`, `block3_features_train`, etc.) are concatenated horizontally along axis 1 using `np.concatenate`. This creates a single array for each sample with features from different blocks side by side.

- **Flatten the Features:**
  - The concatenated feature arrays are flattened using `reshape` so that each sample's features are represented as a 1D array. This results in `train_features_flat` and `val_features_flat`, which can be used as inputs to a downstream classifier.

The final `train_features_flat` and `val_features_flat` arrays can be used as input features for training a classifier, such as a Support Vector Machine (SVM) or any other machine learning model. The concatenation and flattening help create a unified representation of features from different layers for each sample.


SVMs are robust and effective in high-dimensional spaces, making them suitable for various applications, including image classification, text classification, and bioinformatics. SVMs have been widely used in both binary and multi-class classification problems due to their ability to handle complex decision boundaries.

In [None]:
print(train_features_flat.shape, val_features_flat.shape)

In [None]:
n_components = 2000  # You can adjust this value based on your requirements

# Apply PCA to training features
pca = PCA(n_components=n_components)
train_features_pca = pca.fit_transform(train_features_flat)

# Apply PCA to validation features
val_features_pca = pca.transform(val_features_flat)
print(train_features_pca.shape, val_features_pca.shape)

In this code, Principal Component Analysis (PCA) is applied to reduce the dimensionality of the training and validation features. PCA is a technique used for dimensionality reduction and can be especially useful when dealing with high-dimensional data.

n_components Definition:

n_components is a hyperparameter that determines the number of principal components to retain after PCA. It controls the dimensionality of the reduced feature space. In this example, it is set to 2000, but you can adjust this value based on your specific requirements and the amount of variance you want to retain.
Apply PCA to Training Features:

PCA(n_components=n_components) initializes a PCA object with the specified number of components. fit_transform is then called on the training features (train_features_flat) to compute the principal components and transform the original features into the reduced feature space.
Apply PCA to Validation Features:

transform is used on the validation features (val_features_flat) to apply the same PCA transformation learned from the training set. It's important to use the same transformation on both training and validation sets to maintain consistency.
Print the Resulting Shapes:

The shapes of the resulting feature arrays after PCA transformation are printed to the console. This is useful for verifying the impact of PCA on the dimensionality of the data.
After applying PCA, train_features_pca and val_features_pca are the reduced-dimensional representations of the original features. The number of columns in these arrays corresponds to the number of principal components specified by n_components. This dimensionality reduction is often performed to reduce computational complexity and potential overfitting, especially when working with high-dimensional data.

In [None]:
# Train an SVM with RBF kernel
svm_model = SVC(kernel='rbf', max_iter=2000)
svm_model.fit(train_features_pca, avgpool_labels_train)

# Evaluate the model on the validation set
predictions = svm_model.predict(val_features_pca)
accuracy = accuracy_score(avgpool_labels_val, predictions)
precision = precision_score(avgpool_labels_val, predictions, average='macro')
recall = recall_score(avgpool_labels_val, predictions, average='macro')
f1 = f1_score(avgpool_labels_val, predictions, average='macro')
print(f'Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}')

In this code snippet, a Support Vector Machine (SVM) with a Radial Basis Function (RBF) kernel is trained using the reduced-dimensional features obtained from PCA. The model is then evaluated on the validation set.
Train SVM with RBF Kernel:

svm_model = SVC(kernel='rbf', max_iter=2000) initializes an SVM model with an RBF (Radial Basis Function) kernel. max_iter is set to 2000, controlling the maximum number of iterations for optimization. The model is then trained using the fit method on the PCA-transformed training features (train_features_pca) and corresponding labels (avgpool_labels_train).
Evaluate the Model on Validation Set:

predictions = svm_model.predict(val_features_pca) uses the trained SVM model to make predictions on the PCA-transformed validation features (val_features_pca).
Various evaluation metrics are computed, including accuracy, precision, recall, and F1-score, using functions from the sklearn.metrics module (accuracy_score, precision_score, recall_score, f1_score).
Print Evaluation Metrics:

The computed evaluation metrics (accuracy, precision, recall, and F1-score) are printed to the console.
This code trains an SVM model with an RBF kernel on the reduced-dimensional features obtained from PCA and assesses its performance on the validation set. The choice of kernel and hyperparameters, such as max_iter, may need to be adjusted based on the characteristics of your data and the specific requirements of your task.

The joblib.dump function is used to save the trained SVM model (svm_model) to a file named 'svm_model_resnet.pkl'. This allows you to persist the trained model so that you can later load it and use it for making predictions on new data without retraining the model.
 you will have a file named 'svm_model_resnet.pkl' containing the serialized representation of the trained SVM model. You can later load this model using joblib.load to make predictions on new data without having to retrain the model from scratch.

In [None]:
# plot the confusion matrix
true_labels = avgpool_labels_val
pred_labels = predictions

cm = confusion_matrix(avgpool_labels_val, predictions)

classes = train_dataset.dataset.classes

# Plot confusion matrix
plt.figure(figsize=(16, 12))
sns.set(font_scale=1.2)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)

plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

This code generates and visualizes a confusion matrix using the true labels (avgpool_labels_val) and predicted labels (predictions) obtained from evaluating the SVM model on the validation set. The confusion matrix provides insights into the model's performance by showing the number of true positive, true negative, false positive, and false negative predictions for each class.

In [None]:
import os
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
from PIL import Image
import torch.nn as nn
import torchvision.models as models
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix, roc_curve, roc_auc_score, auc
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.decomposition import PCA
import seaborn as sns
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier


In [None]:
true_labels = avgpool_labels_val
predicted_scores = svm_model.decision_function(val_features_pca)

n_classes = len(train_dataset.dataset.classes)

true_labels_bin = label_binarize(true_labels, classes=np.unique(true_labels))
roc_auc_macro = roc_auc_score(true_labels_bin, predicted_scores, average='macro')
print(f'Macro-average ROC AUC: {roc_auc_macro:.2f}')

# Plot ROC curve for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(true_labels_bin[:, i], predicted_scores[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve
plt.figure(figsize=(20, 16))
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {i} (AUC = {roc_auc[i]:.2f})')

plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Multi-class Classification')
plt.legend(loc='lower right')
plt.show()

Obtain True Labels and Predicted Scores:

true_labels are the true class labels from the validation set.
predicted_scores are the decision function values obtained from the SVM model.
Convert True Labels to Binary Format:

true_labels_bin converts the true class labels into a binary format suitable for multi-class ROC analysis using label_binarize from scikit-learn.
Calculate Macro-average ROC AUC:

roc_auc_macro computes the macro-average ROC AUC score, representing the overall performance across all classes.
Plot ROC Curve for Each Class:

For each class, ROC curves are computed (fpr, tpr) and the ROC AUC is calculated.
Individual ROC curves are plotted for each class.
Plot Random Line (Baseline):

A dashed line representing random guessing (baseline) is added to the plot.
Set Labels and Title:

X-axis and Y-axis labels, as well as the title of the plot, are set.
Display Legend:

The legend is displayed in the lower right corner to identify each class.

In [None]:
# Define the transformation to be applied to each image
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load the saved datasets
train_dataset = torch.load('/content/train_dataset.pth')
val_dataset = torch.load('/content/val_dataset.pth')

# Create DataLoader for training and validation sets
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Load pre-trained GoogLeNet (Inception) model
googlenet = models.googlenet(pretrained=True)

# Remove the fully connected layers from the model
googlenet = nn.Sequential(*list(googlenet.children())[:-1])

# Define the SVM model
class GoogLeNetFeatures(nn.Module):
    def __init__(self):
        super(GoogLeNetFeatures, self).__init__()
        self.googlenet = googlenet

    def forward(self, x):
        return self.googlenet(x)

# Create instances of GoogLeNetFeatures
model_inception3b = GoogLeNetFeatures()
model_inception4e = GoogLeNetFeatures()
model_avgpool = GoogLeNetFeatures()

In this code, a pre-trained GoogLeNet (Inception) model is loaded, and features are extracted from specific layers of the model using the defined GoogLeNetFeatures class. The features are extracted separately for two different layers: 'inception3b' and 'inception4e', as well as from the 'AvgPool' layer.
Load Pre-trained GoogLeNet Model:

models.googlenet(pretrained=True) loads the pre-trained GoogLeNet model.
Remove Fully Connected Layers:

The fully connected layers are removed from the model using nn.Sequential(*list(googlenet.children())[:-1]).
Define GoogLeNetFeatures Class:

The GoogLeNetFeatures class is defined, which takes an input tensor and returns the features obtained from the GoogLeNet model.
Create Instances of GoogLeNetFeatures:

Instances of the GoogLeNetFeatures class are created for two different layers ('inception3b' and 'inception4e') and the 'AvgPool' layer.
Extract Features from Inception (3b):

Features are extracted from the 'inception3b' layer for both the training and validation sets.
Extract Features from Inception (4e):

Features are extracted from the 'inception4e' layer for both the training and validation sets.
Extract Features from AvgPool:

Features are extracted from the 'AvgPool' layer for both the training and validation sets.
These extracted features can be used for downstream tasks, such as training a Support Vector Machine (SVM) or any other classification model. The separation of features from different layers allows for experimenting with different levels of abstraction in the feature representations.

In [None]:
# Extract features from Inception (3b)
inception3b_features_train, inception3b_labels_train = extract_features(model_inception3b, train_loader)
inception3b_features_val, inception3b_labels_val = extract_features(model_inception3b, val_loader)

# Extract features from Inception (4e)
inception4e_features_train, inception4e_labels_train = extract_features(model_inception4e, train_loader)
inception4e_features_val, inception4e_labels_val = extract_features(model_inception4e, val_loader)

# Extract features from AvgPool
avgpool_features_train, avgpool_labels_train = extract_features(model_avgpool, train_loader)
avgpool_features_val, avgpool_labels_val = extract_features(model_avgpool, val_loader)

In [None]:
# Concatenate features from different Inception blocks
train_features = np.concatenate([inception3b_features_train, inception4e_features_train, avgpool_features_train], axis=1)
val_features = np.concatenate([inception3b_features_val, inception4e_features_val, avgpool_features_val], axis=1)

# Flatten the features
train_features_flat = train_features.reshape(train_features.shape[0], -1)
val_features_flat = val_features.reshape(val_features.shape[0], -1)


# Train an SVM with RBF kernel
svm_model = SVC(kernel='rbf', max_iter=2000)
svm_model.fit(train_features, inception3b_labels_train)  # You can choose any set of labels, e.g., inception3b_labels_train or inception4e_labels_train

# Evaluate the model on the validation set
predictions = svm_model.predict(val_features)
accuracy = accuracy_score(inception3b_labels_val, predictions)  # You can choose any set of labels, e.g., inception3b_labels_val or inception4e_labels_val
print(f"Validation Accuracy: {accuracy}")

In [None]:
loaded_svm_model = joblib.dump(svm_model, './inception/svm_model_inception_features.joblib')
# plot the confusion matrix
true_labels = inception3b_labels_val
pred_labels = predictions

cm = confusion_matrix(inception3b_labels_val, predictions)

classes = train_dataset.dataset.classes

# Plot confusion matrix
plt.figure(figsize=(16, 12))
sns.set(font_scale=1.2)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)

plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
true_labels = inception3b_labels_val
predicted_scores = svm_model.decision_function(val_features_flat)

n_classes = len(train_dataset.dataset.classes)

true_labels_bin = label_binarize(true_labels, classes=np.unique(true_labels))
roc_auc_macro = roc_auc_score(true_labels_bin, predicted_scores, average='macro')
print(f'Macro-average ROC AUC: {roc_auc_macro:.2f}')

# Plot ROC curve for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(true_labels_bin[:, i], predicted_scores[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve
plt.figure(figsize=(20, 16))
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {i} (AUC = {roc_auc[i]:.2f})')

plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Multi-class Classification')
plt.legend(loc='lower right')
plt.show()

In [None]:
# Define the transformation to be applied to each image
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load the saved datasets
train_dataset = torch.load('/content/train_dataset.pth')
val_dataset = torch.load('/content/val_dataset.pth')

# Create DataLoader for training and validation sets
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Load pre-trained MobileNetV2 model
mobilenet = models.mobilenet_v2(pretrained=True)

# Remove the fully connected layers from the model
mobilenet = nn.Sequential(*list(mobilenet.children())[:-1])

# Define the SVM model
class MobileNetV2Features(nn.Module):
    def __init__(self):
        super(MobileNetV2Features, self).__init__()
        self.mobilenet = mobilenet

    def forward(self, x):
        return self.mobilenet(x)

# Create instances of MobileNetV2Features
model_avgpool = MobileNetV2Features()
model_conv1 = MobileNetV2Features()
model_middle_layer = MobileNetV2Features()


In this code, a pre-trained MobileNetV2 model is loaded, and the fully connected layers are removed from the model. Then, three instances of the MobileNetV2Features class are created, each designed to extract features from different layers of the MobileNetV2 model.
Load Pre-trained MobileNetV2 Model:

models.mobilenet_v2(pretrained=True) loads the pre-trained MobileNetV2 model.
Remove Fully Connected Layers:

The fully connected layers are removed from the model using nn.Sequential(*list(mobilenet.children())[:-1]).
Define MobileNetV2Features Class:

The MobileNetV2Features class is defined, which takes an input tensor and returns the features obtained from the MobileNetV2 model.
Create Instances of MobileNetV2Features:

Three instances of the MobileNetV2Features class are created: model_avgpool, model_conv1, and model_middle_layer.
These instances can be used to extract features from different layers of the MobileNetV2 model. The choice of layers (AvgPool, Conv1, Middle Layer, etc.) allows for experimenting with different levels of abstraction in the feature representations. These extracted features can be further utilized for tasks such as classification, feature analysis, or other downstream applications.

In [None]:
def extract_features_labels(model, loader):
    features = []
    labels = []

    model.eval()
    with torch.no_grad():
        for images, targets in loader:
            outputs = model(images)
            features.append(outputs.squeeze().cpu().numpy())
            labels.append(targets.cpu().numpy())

    features = np.vstack(features)
    labels = np.concatenate(labels)
    return features, labels
# Extract features from AvgPool
avgpool_features_train, avgpool_labels_train = extract_features_labels(model_avgpool, train_loader)
avgpool_features_val, avgpool_labels_val = extract_features_labels(model_avgpool, val_loader)

np.save('mobile_avgpool_features_train.npy', avgpool_features_train)
np.save('mobile_avgpool_labels_train.npy', avgpool_labels_train)
np.save('mobile_avgpool_features_val.npy', avgpool_features_val)
np.save('mobile_avgpool_labels_val.npy', avgpool_labels_val)
avgpool_features_train.shape

In [None]:
# Extract features from Conv1
conv1_features_train, conv1_labels_train = extract_features_labels(model_conv1, train_loader)
conv1_features_val, conv1_labels_val = extract_features_labels(model_conv1, val_loader)
# Save the extracted features and labels to .npy files
np.save('mobile_features_train.npy', conv1_features_train)
np.save('mobile_labels_train.npy', conv1_labels_train)
np.save('mobile_features_val.npy', conv1_features_val)
np.save('mobile_labels_val.npy', conv1_labels_val)
conv1_features_train.shape

In [None]:
# Extract features from the middle layer
middle_layer_features_train, middle_layer_labels_train = extract_features_labels(model_middle_layer, train_loader)
middle_layer_features_val, middle_layer_labels_val = extract_features_labels(model_middle_layer, val_loader)

np.save('mobile_middle_features_train.npy', middle_layer_features_train)
np.save('mobile_middle_labels_train.npy', middle_layer_labels_train)
np.save('mobile_middle_features_val.npy', middle_layer_features_val)
np.save('mobile_middle_labels_val.npy', middle_layer_labels_val)
middle_layer_features_train.shape

In [None]:

# Concatenate features from different layers
train_features = np.concatenate([avgpool_features_train, conv1_features_train, middle_layer_features_train], axis=1)
val_features = np.concatenate([avgpool_features_val, conv1_features_val, middle_layer_features_val], axis=1)


# Flatten the features
train_features_flat = train_features.reshape(train_features.shape[0], -1)
val_features_flat = val_features.reshape(val_features.shape[0], -1)

# empty train_features and val_features
train_features = None
val_features = None
avgpool_features_train = None
avgpool_features_val = None
conv1_features_train = None
conv1_features_val = None
middle_layer_features_train = None
middle_layer_features_val = None
n_components = 1000  # You can adjust this value based on your requirements

# Apply PCA to training features
pca = PCA(n_components=n_components)
train_features_pca = pca.fit_transform(train_features_flat)

# Apply PCA to validation features
val_features_pca = pca.transform(val_features_flat)
# Train an SVM with RBF kernel
svm_model = SVC(kernel='rbf', max_iter=3000)
svm_model.fit(train_features_pca, avgpool_labels_train)  # You can choose any set of labels, e.g., avgpool_labels_train or conv1_labels_train

# Evaluate the model on the validation set
predictions = svm_model.predict(val_features_pca)
accuracy = accuracy_score(avgpool_labels_val, predictions)  # You can choose any set of labels, e.g., avgpool_labels_val or conv1_labels_val
print(f"Validation Accuracy: {accuracy}")

In [None]:
precision_score = precision_score(avgpool_labels_val, predictions, average='macro')
recall_score = recall_score(avgpool_labels_val, predictions, average='macro')
f1_score = f1_score(avgpool_labels_val, predictions, average='macro')

print(f"Precision Score: {precision_score}, Recall Score: {recall_score}, F1 Score: {f1_score}")

In [None]:
# plot the confusion matrix
true_labels = avgpool_labels_train
pred_labels = predictions

cm = confusion_matrix(avgpool_labels_val, predictions)

classes = train_dataset.dataset.classes

# Plot confusion matrix
plt.figure(figsize=(16, 12))
sns.set(font_scale=1.2)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)

plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
true_labels = avgpool_labels_val
predicted_scores = svm_model.decision_function(val_features_pca)

n_classes = len(train_dataset.dataset.classes)

true_labels_bin = label_binarize(true_labels, classes=np.unique(true_labels))
roc_auc_macro = roc_auc_score(true_labels_bin, predicted_scores, average='macro')
print(f'Macro-average ROC AUC: {roc_auc_macro:.2f}')

# Plot ROC curve for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(true_labels_bin[:, i], predicted_scores[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve
plt.figure(figsize=(20, 16))
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {i} (AUC = {roc_auc[i]:.2f})')

plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Multi-class Classification')
plt.legend(loc='lower right')
plt.show()

In [None]:

# Define the neural network model for MobileNetV2
class NetMobile(nn.Module):
    def __init__(self):
        super(NetMobile, self).__init__()
        self.mobile_net = models.mobilenet_v2(pretrained=True)
        self.mobile_net.features[0][0] = nn.Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

    def forward(self, x):
        avgpool = nn.AdaptiveAvgPool2d(1)

        # Print the architecture to identify the correct layer index
        print(self.mobile_net.features)

        # Extract features from the desired layers
        output_avgpool = avgpool(self.mobile_net.features(x))

        # Adjust the index to access the desired layer within InvertedResidual block
        output_mid = self.mobile_net.features[4][0][3].conv[1](self.mobile_net.features[4][0][0](x))

        return output_avgpool, output_mid



# Instantiate the model and set it to evaluation mode


model_mobile = NetMobile()
model_mobile.eval()

# Extract features using the model
def extract_mobile_features(loader):
    all_avgpool, all_mid = [], []
    with torch.no_grad():
        for data, _ in loader:
            avgpool, mid = model_mobile(data)
            all_avgpool.append(avgpool)
            all_mid.append(mid)

    return torch.cat(all_avgpool), torch.cat(all_mid)

# Extract features for training and testing sets
train_avgpool, train_mid = extract_mobile_features(train_loader)
test_avgpool, test_mid = extract_mobile_features(test_loader)

# Concatenate features
X_train = torch.cat([train_avgpool, train_mid], dim=1).numpy()
X_test = torch.cat([test_avgpool, test_mid], dim=1).numpy()

# Placeholder for labels (replace with your actual labels)
y_train = torch.zeros(train_avgpool.shape[0]).numpy()
y_test = torch.zeros(test_avgpool.shape[0]).numpy()

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train SVM model with RBF kernel
svm_mobile = svm.SVC(kernel='rbf')
svm_mobile.fit(X_train, y_train)

# Make predictions
y_pred = svm_mobile.predict(X_test)

# Calculate confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc='lower right')
plt.show()


In [None]:
!pip install scikit-learn matplotlib

Phase 3

In [None]:
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

# Assuming you have a CustomDataset class and models (model_avgpool, model_conv1, model_middle_layer) defined

# Define the transformation to be applied to each image
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])


# Load the dataset
# dataset = CustomDataset(root_dir='path_to_your_dataset', transform=transform)
data_loader = DataLoader(dataset, batch_size=1, shuffle=False)

# Extract features from the 'AvgPool' layer of the model
def extract_avgpool_features(model, loader):
    features = []
    labels = []

    model.eval()
    with torch.no_grad():
        for images, targets in loader:
            outputs = model(images)
            features.append(outputs.squeeze().cpu().numpy())
            labels.append(targets.cpu().numpy())

    features = np.vstack(features)
    labels = np.concatenate(labels)
    return features, labels

features_avgpool, labels = extract_avgpool_features(model_avgpool, data_loader)
# Flatten the features for each image
flattened_features_avgpool = features_avgpool.reshape(features_avgpool.shape[0], -1)


In [None]:

# Apply k-means clustering
n_clusters = 5  # Adjust as needed
kmeans = KMeans(n_clusters=n_clusters, random_state=42)

# Reshape the features to make them 2D
features_2d = features_avgpool.reshape(features_avgpool.shape[0], -1)

# Perform k-means clustering
cluster_assignments = kmeans.fit_predict(features_2d)

# Continue with the rest of your code


# Select several images from the dataset (e.g., indices)
selected_image_indices = [0, 10, 20]

# Compare each selected image with its cluster representative
for idx in selected_image_indices:
    # Extract features from AvgPool for the selected image
    selected_image_features = features_avgpool[idx].reshape(1, -1)

    # Calculate distances to cluster centroids
    distances = np.linalg.norm(selected_image_features - kmeans.cluster_centers_, axis=1)

    # Find the cluster with the nearest centroid
    nearest_cluster = np.argmin(distances)

    # Display the selected image
    plt.subplot(1, 2, 1)
    plt.imshow(dataset[idx][0].permute(1, 2, 0))
    plt.title('Selected Image')

    # Display the representative image of the nearest cluster
    representative_image_idx = np.argmin(distances)
    representative_image_path = dataset.image_files[np.where(cluster_assignments == nearest_cluster)[0][0]]
    representative_image = Image.open(os.path.join('path_to_your_dataset', representative_image_path)).convert("RGB")

    plt.subplot(1, 2, 2)
    plt.imshow(representative_image)
    plt.title(f'Representative Image (Cluster {nearest_cluster})')

    plt.show()


In [None]:
train_features = np.concatenate([avgpool_features_train, block1_features_train, block3_features_train], axis=1)
val_features = np.concatenate([avgpool_features_val, block1_features_val, block3_features_val], axis=1)

In [None]:
# Load the extracted features and labels from .npy files
avgpool_features_train = np.load('/content/resnet_output/avgpool_features_train.npy')
avgpool_labels_train = np.load('/content/resnet_output/avgpool_labels_train.npy')
avgpool_features_val = np.load('/content/resnet_output/avgpool_features_val.npy')
avgpool_labels_val = np.load('/content/resnet_output/avgpool_labels_val.npy')

block1_features_train = np.load('/content/resnet_output/block1_features_train.npy')
block1_labels_train = np.load('/content/resnet_output/block1_labels_train.npy')
block1_features_val = np.load('/content/resnet_output/block1_features_val.npy')
block1_labels_val = np.load('/content/resnet_output/block1_labels_val.npy')

block3_features_train = np.load('/content/resnet_output/block3_features_train.npy')
block3_labels_train = np.load('/content/resnet_output/block3_labels_train.npy')
block3_features_val = np.load('/content/resnet_output/block3_features_val.npy')
block3_labels_val = np.load('/content/resnet_output/block3_labels_val.npy')

In [None]:
# Concatenate features from different blocks
train_features = np.concatenate([avgpool_features_train, block1_features_train, block3_features_train], axis=1)
val_features = np.concatenate([avgpool_features_val, block1_features_val, block3_features_val], axis=1)

# Flatten the features
train_features_flat = train_features.reshape(train_features.shape[0], -1)
val_features_flat = val_features.reshape(val_features.shape[0], -1)

In [None]:
from sklearn.cluster import KMeans
import numpy as np

# Concatenate features from different layers
train_features = np.concatenate([avgpool_features_train, block1_features_train, block3_features_train], axis=1)

# Flatten the features
train_features_flat = train_features.reshape(train_features.shape[0], -1)

# Apply k-means clustering to training features
n_clusters = 5  # You can adjust this based on your requirements
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
cluster_assignments = kmeans.fit_predict(train_features_flat)

# The rest of your code remains the same...


In [None]:
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

# Assume you have already loaded the features and labels
# avgpool_features_train, block1_features_train, block3_features_train, etc.

# Concatenate features from different layers
train_features = np.concatenate([avgpool_features_train, block1_features_train, block3_features_train], axis=1)
val_features = np.concatenate([avgpool_features_val, block1_features_val, block3_features_val], axis=1)

# Flatten the features
train_features_flat = train_features.reshape(train_features.shape[0], -1)
val_features_flat = val_features.reshape(val_features.shape[0], -1)

# Apply k-means clustering to training features
n_clusters = 5  # You can adjust this based on your requirements
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
cluster_assignments = kmeans.fit_predict(train_features_flat)

# Select several images from the validation set (e.g., indices)
selected_image_indices = [0, 10, 20]

# Compare each selected image with its cluster representative
for idx in selected_image_indices:
    # Extract features from MobileNetV2 for the selected image
    selected_image_features = val_features_flat[idx].reshape(1, -1)

    # Calculate distances to cluster centroids
    distances = np.linalg.norm(selected_image_features - kmeans.cluster_centers_, axis=1)

    # Find the cluster with the nearest centroid
    nearest_cluster = np.argmin(distances)

    # Display the selected image
    plt.subplot(1, 2, 1)
    plt.imshow(val_dataset[idx][0].permute(1, 2, 0))
    plt.title('Selected Image')

    # Display the representative image of the nearest cluster
    representative_images = train_features_flat[cluster_assignments == nearest_cluster]
    representative_image = representative_images.mean(axis=0)
    representative_image = representative_image.reshape(train_features.shape[1:3])  # Use the original shape

    plt.subplot(1, 2, 2)
    plt.imshow(representative_image, cmap='viridis')  # Use an appropriate colormap
    plt.title(f'Representative Image (Cluster {nearest_cluster})')

    plt.show()


In [None]:
resnet_avgpool_labels_train = np.load('res-net/avgpool_labels_train.npy')
resnet_avgpool_features_train = np.load('res-net/avgpool_features_train.npy')
resnet_avgpool_features_val = np.load('res-net/avgpool_features_val.npy')
resnet_avgpool_labels_val = np.load('res-net/avgpool_labels_val.npy')

inception_avgpool_labels_train = np.load('inception/inception_avgpool_labels_train.npy')
inception_avgpool_features_train = np.load('inception/inception_avgpool_features_train.npy')
inception_avgpool_features_val = np.load('inception/inception_avgpool_features_val.npy')
inception_avgpool_labels_val = np.load('inception/inception_avgpool_labels_val.npy')

mobilenet_avgpool_labels_train = np.load('mobile-net/mobile_avgpool_labels_train.npy')
mobilenet_avgpool_features_train = np.load('mobile-net/mobile_avgpool_features_train.npy')
mobilenet_avgpool_features_val = np.load('mobile-net/mobile_avgpool_features_val.npy')
mobilenet_avgpool_labels_val = np.load('mobile-net/mobile_avgpool_labels_val.npy')
print("resnet_avgpool_features_train.shape", resnet_avgpool_features_train.shape)
print("inception_avgpool_features_train.shape", inception_avgpool_features_train.shape)
print("mobilenet_avgpool_features_train.shape", mobilenet_avgpool_features_train.shape)