In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
from sklearn.model_selection import train_test_split

def extract_features(image_paths):
    # Load a pre-trained ResNet model
    resnet = models.resnet18(pretrained=True)
    # Remove the classification layer (the last layer) to get feature vectors
    resnet_feature_extractor = torch.nn.Sequential(*list(resnet.children())[:-1])
    # Set the model to evaluation mode
    resnet_feature_extractor.eval()

    features = []
    for img_path in image_paths:
        try:
            # Open and preprocess the image
            img = Image.open(img_path).convert('RGB')
            preprocess = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])
            img_tensor = preprocess(img)
            img_tensor = img_tensor.unsqueeze(0)  # Add batch dimension

            # Extract features
            with torch.no_grad():
                feature_vector = resnet_feature_extractor(img_tensor)
            features.append(feature_vector.squeeze().numpy())
        except (OSError, Exception) as e:
            print("Skipping image:", img_path)
            print("Error:", e)

    return features

# Define your dataset directory
dataset_dir = r"Dataset"

# Get list of class folders
class_folders = [folder for folder in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, folder))]

# Prepare data and labels
data = []
labels = []
image_count = 0
for i, folder in enumerate(class_folders):
    class_dir = os.path.join(dataset_dir, folder)
    image_files = [os.path.join(class_dir, file) for file in os.listdir(class_dir) if file.endswith(('.jpg', '.jpeg', '.png'))]
    image_count += len(image_files)
    features = extract_features(image_files)
    data.extend(features)
    labels.extend([i]*len(features))

print("Total number of images in the dataset:", image_count)
print("Number of features extracted:", len(data))

data = np.array(data)
labels = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)




Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Barong Indo\15.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Barong Indo\\15.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\100.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\100.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\101.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\101.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\103.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\103.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\104.jpg
Error: cannot identif

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\180.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\180.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\182.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\182.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\184.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\184.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\185.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\185.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\186.jpg
Error: cannot identify im

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\274.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\274.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\28.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\28.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\280.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\280.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\283.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\283.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\285.jpg
Error: cannot identify imag

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\346.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\346.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\349.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\349.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\35.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\35.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\350.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\350.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\354.jpg
Error: cannot identify imag

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\427.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\427.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\43.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\43.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\430.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\430.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\431.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\431.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\433.jpg
Error: cannot identify imag

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\509.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\509.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\510.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\510.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\511.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\511.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\514.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\514.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\515.jpg
Error: cannot identify im

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\608.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\608.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\61.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\61.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\610.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\610.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\611.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\611.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\617.jpg
Error: cannot identify imag

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\698.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\698.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\699.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\699.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\700.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\700.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\702.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\702.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\703.jpg
Error: cannot identify im

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\773.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\773.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\774.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\774.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\777.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\777.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\778.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Fandango\\778.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Fandango\78.jpg
Error: cannot identify ima

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\125.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\125.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\127.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\127.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\128.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\128.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\129.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\129.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\131.jpg
Error: cannot identify im

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\191.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\191.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\196.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\196.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\197.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\197.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\199.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\199.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\200.jpg
Error: cannot identify im

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\251.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\251.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\253.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\253.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\254.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\254.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\255.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\255.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\256.jpg
Error: cannot identify im

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\320.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\320.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\321.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\321.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\323.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\323.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\324.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\324.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\325.jpg
Error: cannot identify im

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\384.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\384.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\385.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\385.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\386.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\386.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\387.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\387.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\388.jpg
Error: cannot identify im

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\44.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\44.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\440.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\440.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\441.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\441.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\447.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\447.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\448.jpg
Error: cannot identify imag

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\511.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\511.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\512.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\512.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\513.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\513.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\514.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\514.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\515.jpg
Error: cannot identify im

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\577.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\577.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\579.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\579.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\583.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\583.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\584.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\584.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\587.jpg
Error: cannot identify im

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\634.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\634.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\635.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\635.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\637.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\637.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\638.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\638.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\64.jpg
Error: cannot identify ima

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\702.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\702.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\703.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\703.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\704.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\704.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\705.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\705.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\706.jpg
Error: cannot identify im

Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\760.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\760.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\763.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\763.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\764.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\764.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\767.jpg
Error: cannot identify image file 'C:\\Users\\dhede\\OneDrive\\Desktop\\SEM IV\\ML\\Projectwork\\Dataset\\Manipuri\\767.jpg'
Skipping image: C:\Users\dhede\OneDrive\Desktop\SEM IV\ML\Projectwork\Dataset\Manipuri\768.jpg
Error: cannot identify im

In [3]:
import numpy as np

class DecisionTree:
    def __init__(self, max_depth=None, binning_type='equal_width', num_bins=10):
        self.max_depth = max_depth
        self.binning_type = binning_type
        self.num_bins = num_bins
        self.tree = None
    
    def calculate_entropy(self, labels):
        """Calculate entropy for a list of labels."""
        unique_labels, counts = np.unique(labels, return_counts=True)
        probabilities = counts / len(labels)
        entropy = -np.sum(probabilities * np.log2(probabilities))
        return entropy

    def calculate_information_gain(self, data, labels, feature_index, threshold):
        """Calculate information gain for a specific feature and threshold."""
        true_indices = data[:, feature_index] <= threshold
        false_indices = ~true_indices
        true_labels = labels[true_indices]
        false_labels = labels[false_indices]
        parent_entropy = self.calculate_entropy(labels)
        true_entropy = self.calculate_entropy(true_labels)
        false_entropy = self.calculate_entropy(false_labels)
        true_weight = len(true_labels) / len(labels)
        false_weight = len(false_labels) / len(labels)
        information_gain = parent_entropy - (true_weight * true_entropy + false_weight * false_entropy)
        return information_gain

    def binning(self, data, num_bins=None, binning_type=None):
        """Perform binning on continuous data."""
        if binning_type is None:
            binning_type = self.binning_type
        if num_bins is None:
            num_bins = self.num_bins

        if len(data) == 0:
            raise ValueError("Data array is empty.")

        if binning_type == 'equal_width':
            min_val = np.min(data)
            max_val = np.max(data)
            if min_val == max_val:
                # Handle the case when all values are the same
                bins = np.linspace(min_val, max_val + 1, num_bins + 1)
            else:
                bins = np.linspace(min_val, max_val, num_bins + 1)
        elif binning_type == 'equal_frequency':
            bins = np.percentile(data, np.linspace(0, 100, num_bins + 1))
        else:
            raise ValueError("Invalid binning type. Choose 'equal_width' or 'equal_frequency'.")
        binned_data = np.digitize(data, bins)
        return binned_data, bins
    
    def find_best_split(self, data, labels):
        """Find the best split using information gain."""
        num_features = data.shape[1]
        best_information_gain = -1
        best_feature_index = None
        best_threshold = None

        for feature_index in range(num_features):
            feature_values = data[:, feature_index]
            if len(np.unique(feature_values)) == 1:
                continue  # Skip if there's only one unique value
            
            if np.issubdtype(data.dtype, np.number):
                # Continuous feature, perform binning
                binned_data, bins = self.binning(feature_values)
                for threshold in bins[1:-1]:
                    true_indices = feature_values <= threshold
                    information_gain = self.calculate_information_gain(data, labels, feature_index, threshold)
                    if information_gain > best_information_gain:
                        best_information_gain = information_gain
                        best_feature_index = feature_index
                        best_threshold = threshold
            else:
                # Categorical feature
                unique_values = np.unique(feature_values)
                for value in unique_values:
                    indices = feature_values == value
                    information_gain = self.calculate_information_gain(data, labels, feature_index, value)
                    if information_gain > best_information_gain:
                        best_information_gain = information_gain
                        best_feature_index = feature_index
                        best_threshold = value
    
        return best_feature_index, best_threshold
    def fit(self, X, y, depth=0):
        """Build the Decision Tree recursively."""
        if len(X) == 0 or len(y) == 0:  # Check if X or y is empty
            return None  # Handle empty data gracefully

        if depth == self.max_depth or len(np.unique(y)) == 1:
            return np.argmax(np.bincount(y))

        best_feature_index, best_threshold = self.find_best_split(X, y)
        if best_feature_index is None:
            return np.argmax(np.bincount(y))

        if depth == 0:
            print("Principle Root Node: Feature Index =", best_feature_index, ", Threshold =", best_threshold)

        if np.issubdtype(X.dtype, np.number):
            # Continuous feature
            binned_data, _ = self.binning(X)
            true_indices = binned_data[:, best_feature_index] <= best_threshold
        else:
            # Categorical feature
            true_indices = X[:, best_feature_index] == best_threshold

        X_true, y_true = X[true_indices], y[true_indices]
        X_false, y_false = X[~true_indices], y[~true_indices]

        subtree = {}
        subtree[(best_feature_index, best_threshold, True)] = self.fit(X_true, y_true, depth + 1)
        subtree[(best_feature_index, best_threshold, False)] = self.fit(X_false, y_false, depth + 1)

        return subtree

    def predict_sample(self, sample):
        """Predict the class label for a single sample."""
        node = self.tree
        while isinstance(node, dict):
            feature_index, threshold, direction = list(node.keys())[0]
            if np.issubdtype(sample.dtype, np.number):
                if sample[feature_index] <= threshold:
                    node = node[(feature_index, threshold, True)]
                else:
                    node = node[(feature_index, threshold, False)]
            else:
                if sample[feature_index] == threshold:
                    node = node[(feature_index, threshold, True)]
                else:
                    node = node[(feature_index, threshold, False)]
        return node

    def predict(self, X):
        """Predict class labels for multiple samples."""
        return [self.predict_sample(sample) for sample in X]

dt = DecisionTree(max_depth=3, binning_type='equal_width', num_bins=5)
dt.tree = dt.fit(X_train, y_train)


Principle Root Node: Feature Index = 252 , Threshold = 1.7924240112304688
