<a href="https://colab.research.google.com/github/siddharthapramanik771/classification_imagenet/blob/main/Image_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# import necessary packages 
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
import requests
import tarfile
import os
import numpy as np
from tensorflow.keras.preprocessing import image
import os
import xml.etree.ElementTree as ET
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, multilabel_confusion_matrix
from sklearn.model_selection import train_test_split

In [2]:
# Step 1: Download pre-trained VGG16 model
# Load the pre-trained VGG16 model with weights trained on ImageNet
model = VGG16(weights='imagenet', include_top=False)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# Step 2: Download the PASCAL VOC 2007 dataset
url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar"
filepath = "/content/drive/MyDrive/"
filename = "VOCtrainval_06-Nov-2007.tar"

# check if we already have the data 
if not os.path.exists(filepath+filename):
  # Download the file
  response = requests.get(url)
  with open(filepath+filename, "wb") as f:
      f.write(response.content)

  # Extract the contents
  with tarfile.open(filepath+filename, "r") as tar:
      tar.extractall(filepath)

In [5]:
# Step 3: Extract features from the last fully-connected layer of VGG16
def extract_features(image_path):
    """
    Extract features from an image using the VGG16 model.
    
    Args:
        image_path (str): Path to the image file.
    
    Returns:
        numpy.ndarray: Flattened array of image features.
    """
    img = image.load_img(image_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features.flatten()



def parse_xml(xml_file):
    """
    Parse an XML file and extract class labels.
    
    Args:
        xml_file (str): Path to the XML file.
    
    Returns:
        list: List of class labels.
    """
    tree = ET.parse(xml_file)
    root = tree.getroot()
    objects = root.findall("object")
    labels = []
    for obj in objects:
        name = obj.find("name").text
        if name not in class_labels:
            class_labels.append(name)
        labels.append(class_labels.index(name))
    return labels


image_folder = filepath + "VOCdevkit/VOC2007/JPEGImages/"
annotations_folder = filepath + "VOCdevkit/VOC2007/Annotations"
class_labels = []  # List to store unique class labels
features = []  # List to store extracted features
labels = []  # List to store corresponding labels
images = []  # List to store image names
for image_name in os.listdir(image_folder):
    feat = extract_features(image_folder + image_name)
    xml_path = os.path.join(annotations_folder, image_name.split('.')[0] + '.xml')
    labels.append(parse_xml(xml_path))
    features.append(feat)
    images.append(image_name)

features = np.array(features)
labels = np.array(labels, dtype='object')




In [6]:
# Step 4: Train binary one-vs.-rest SVM classifiers

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)

# Create a list to store the trained binary classifiers
classifiers = []

# Train binary one-vs-rest SVM classifiers for each class
for class_idx in range(len(class_labels)):
    # Convert the multi-label problem to a binary classification problem
    y_train_binary = np.array([1 if class_idx in sample_labels else 0 for sample_labels in y_train])
    
    # Create an SVM classifier with linear kernel
    classifier = SVC(kernel='linear')
    
    # Fit the classifier to the training data
    classifier.fit(X_train, y_train_binary)
    
    # Add the trained classifier to the list
    classifiers.append(classifier)


In [7]:
# Step 5: Evaluate classification accuracy and confusion matrix

# Create empty lists to store accuracies and confusion matrices
accuracies = []
confusion_matrices = []

# Iterate over the classifiers and evaluate accuracy and confusion matrix for each class
for idx, classifier in enumerate(classifiers):
    # Predict labels for the validation set
    y_pred = classifier.predict(X_val)
    
    # Convert the multi-label problem to a binary classification problem for the current class
    y_val_binary = np.array([1 if idx in sample_labels else 0 for sample_labels in y_val])
    
    # Compute accuracy score for the current class
    accuracy = accuracy_score(y_val_binary, y_pred)
    
    # Append accuracy to the accuracies list
    accuracies.append(accuracy)
    
    # Compute the confusion matrix for the current class
    cm = multilabel_confusion_matrix(y_val_binary, y_pred)
    
    # Append confusion matrix to the confusion_matrices list
    confusion_matrices.append(cm)

# Iterate over the class labels and print accuracy and confusion matrix for each class
for class_idx, class_label in enumerate(class_labels):
    print(f"Accuracy for class {class_label}: {accuracies[class_idx]}")
    print(f"Confusion matrix for class {class_label}:\n{confusion_matrices[class_idx]}")


Accuracy for class aeroplane: 0.9868554095045501
Confusion matrix for class aeroplane:
[[[ 34  10]
  [  3 942]]

 [[942   3]
  [ 10  34]]]
Accuracy for class boat: 0.9888776541961577
Confusion matrix for class boat:
[[[ 21   6]
  [  5 957]]

 [[957   5]
  [  6  21]]]
Accuracy for class person: 0.8746208291203236
Confusion matrix for class person:
[[[357  76]
  [ 48 508]]

 [[508  48]
  [ 76 357]]]
Accuracy for class chair: 0.9241658240647118
Confusion matrix for class chair:
[[[ 57  48]
  [ 27 857]]

 [[857  27]
  [ 48  57]]]
Accuracy for class diningtable: 0.9656218402426694
Confusion matrix for class diningtable:
[[[ 26  25]
  [  9 929]]

 [[929   9]
  [ 25  26]]]
Accuracy for class pottedplant: 0.9656218402426694
Confusion matrix for class pottedplant:
[[[ 18  28]
  [  6 937]]

 [[937   6]
  [ 28  18]]]
Accuracy for class sofa: 0.9383215369059656
Confusion matrix for class sofa:
[[[ 31  37]
  [ 24 897]]

 [[897  24]
  [ 37  31]]]
Accuracy for class dog: 0.9544994944388271
Confusion 