In [1]:
import pandas as pd
import os

In [2]:
file_path = os.path.join('.','final_cloth_img.csv')

In [3]:
cloth_img=pd.read_csv(file_path)

In [4]:

footwear_img=pd.read_csv('final_footwear_img.csv')

In [5]:
folder_path=os.path.join('.','images')

In [6]:
imgs_df=pd.concat([cloth_img,footwear_img],ignore_index=True)

In [7]:
imgs_df

Unnamed: 0,id,masterCategory,subCategory,articleType,filename
0,24607,Apparel,Bottomwear,Jeans,24607.jpg
1,13259,Apparel,Bottomwear,Jeans,13259.jpg
2,7188,Apparel,Bottomwear,Jeans,7188.jpg
3,7709,Apparel,Bottomwear,Jeans,7709.jpg
4,50942,Apparel,Bottomwear,Jeans,50942.jpg
...,...,...,...,...,...
832,1786,Footwear,Shoes,Sports Shoes,1786.jpg
833,15719,Footwear,Shoes,Sports Shoes,15719.jpg
834,23929,Footwear,Shoes,Sports Shoes,23929.jpg
835,5393,Footwear,Shoes,Sports Shoes,5393.jpg


In [8]:
imgs_df['label']=imgs_df['masterCategory']+'_'+imgs_df['subCategory']+'_'+imgs_df['articleType']

In [9]:
imgs_df.head()

Unnamed: 0,id,masterCategory,subCategory,articleType,filename,label
0,24607,Apparel,Bottomwear,Jeans,24607.jpg,Apparel_Bottomwear_Jeans
1,13259,Apparel,Bottomwear,Jeans,13259.jpg,Apparel_Bottomwear_Jeans
2,7188,Apparel,Bottomwear,Jeans,7188.jpg,Apparel_Bottomwear_Jeans
3,7709,Apparel,Bottomwear,Jeans,7709.jpg,Apparel_Bottomwear_Jeans
4,50942,Apparel,Bottomwear,Jeans,50942.jpg,Apparel_Bottomwear_Jeans


In [10]:
imgs_df['label'].unique()


array(['Apparel_Bottomwear_Jeans', 'Apparel_Bottomwear_Skirts',
       'Apparel_Bottomwear_Track Pants', 'Apparel_Saree_Sarees',
       'Apparel_Topwear_Shirts', 'Apparel_Topwear_Sweaters',
       'Apparel_Topwear_Tshirts', 'Footwear_Flip Flops_Flip Flops',
       'Footwear_Sandal_Sandals', 'Footwear_Shoes_Casual Shoes',
       'Footwear_Shoes_Formal Shoes', 'Footwear_Shoes_Sports Shoes'],
      dtype=object)

In [11]:
import random
import numpy as np

import matplotlib.pyplot as plt
import os
import numpy as np


In [12]:
target_size = (224, 224)

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import numpy as np


In [14]:
from PIL import Image
import numpy as np

def preprocess_images(imgs_df, folder_path, target_size=(224, 224)):
    """
    Preprocess images and labels without TensorFlow.

    Args:
        imgs_df: DataFrame containing image metadata (e.g., filename, category, label).
        folder_path: Root folder containing the images.
        target_size: Tuple specifying the target size of the image (width, height).

    Returns:
        images: Numpy array of preprocessed images.
        labels: List of corresponding labels.
    """
    images = []
    labels = []

    for ind, row in imgs_df.iterrows():
        try:
            # Build the file path based on the category
            if row['masterCategory'] == 'Apparel':
                file_path = f"{folder_path}/clothing/{row['filename']}"
            elif row['masterCategory'] == 'Footwear':
                file_path = f"{folder_path}/foot wear/{row['filename']}"
            else:
                continue  # Skip rows with other categories

            # Load the image using Pillow
            img = Image.open(file_path).convert('RGB')

            # Resize the image
            img = img.resize(target_size)

            # Convert the image to a numpy array
            img_array = np.asarray(img, dtype=np.float32)

            # Normalize the image to match ResNet preprocessing
            img_array /= 255.0  # Scale pixel values to [0, 1]
          

            # Append the preprocessed image and label
            images.append(img_array)
            labels.append(row['label'])

        except Exception as e:
            print(f"Error processing file {row['id']}: {e}")

    # Convert the list of images to a numpy array
    images = np.array(images)

    return images, labels


In [15]:
class SiameseDataset(Dataset):
    def __init__(self, pairs, labels, transform=None):
        """
        Args:
            pairs: Array of image pairs (numpy arrays).
            labels: Array of labels (1 for similar, 0 for dissimilar).
            transform: Transformations to apply to the images.
        """
        self.pairs = pairs
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        img1, img2 = self.pairs[idx]
        label = self.labels[idx]

        # Apply transformations if provided
        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return img1, img2, torch.tensor(label, dtype=torch.float32)


In [16]:
class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        resnet = models.resnet50(pretrained=True)
        for param in resnet.parameters():
            param.requires_grad = False  # Freeze the pretrained layers
        self.feature_extractor = nn.Sequential(*list(resnet.children())[:-1])  # Remove the fully connected layer

    def forward(self, x):
        # Pass through ResNet and flatten the output
        x = self.feature_extractor(x)
        return torch.flatten(x, 1)


In [17]:
class SiameseNetwork(nn.Module):
    def __init__(self, embedding_dim=2048):
        super(SiameseNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(embedding_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, embedding1, embedding2):
        # Compute the absolute difference between embeddings
        diff = torch.abs(embedding1 - embedding2)
        return self.fc(diff)


In [18]:
images, labels = preprocess_images(imgs_df, folder_path, target_size)

In [19]:
from sklearn.preprocessing import LabelEncoder

# Encode string labels to integers
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

print(f"Encoded labels: {encoded_labels}")

Encoded labels: [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  2  2  2  2
  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2
  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2
  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  3  3  3  3  3  3
  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  4  4  4  4  4  4  4  4
  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4
  4  4  4  4  4  4  4  4  4  4  4  

In [20]:
import numpy as np

def create_image_pairs(images, labels, target_pair_count=2000):
    """
    Create at least `target_pair_count` pairs of images and their corresponding labels (1 for similar, 0 for dissimilar).

    Args:
    - images: Array of preprocessed images.
    - labels: Array of labels corresponding to the images.
    - target_pair_count: Total number of pairs to generate.

    Returns:
    - pairs: Array of paired images.
    - pair_labels: Array of labels for the pairs (1: similar, 0: dissimilar).
    """
    pairs = []
    pair_labels = []
    unique_labels = np.unique(labels)
    #print('here')
    label_indices = {label: np.where(labels == label)[0] for label in unique_labels}
    print(label_indices)
    #print('here_1')

    # Calculate number of positive and negative pairs
    num_positive_pairs = target_pair_count // 2
    num_negative_pairs = target_pair_count - num_positive_pairs

    # Generate positive pairs
    while len(pairs) < num_positive_pairs:
        #print('h3')
        for label, indices in label_indices.items():
            if len(indices) < 2:
                continue  # Skip if not enough samples for positive pairs
            i, j = np.random.choice(indices, size=2, replace=False)
            pairs.append([images[i], images[j]])
            pair_labels.append(1)
            #print('1')
            if len(pairs) >= num_positive_pairs:
                break

    # Generate negative pairs
    while len(pairs) < target_pair_count:
        for label, indices in label_indices.items():
            i = np.random.choice(indices)
            neg_label = np.random.choice(unique_labels[unique_labels != label])
            j = np.random.choice(label_indices[neg_label])
            pairs.append([images[i], images[j]])
            pair_labels.append(0)
            #print('0')
            if len(pairs) >= target_pair_count:
                break

    return np.array(pairs), np.array(pair_labels)


In [21]:
# Assuming `images` is a numpy array of preprocessed images and `labels` is a numpy array of their labels
pairs, pair_labels = create_image_pairs(images, encoded_labels, target_pair_count=4000)
print(f"Number of pairs: {len(pairs)}")
print(f"Pair labels distribution: {np.unique(pair_labels, return_counts=True)}")

{0: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69], dtype=int64), 1: array([ 70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,
        83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
        96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
       109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
       122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
       135, 136, 137, 138, 139], dtype=int64), 2: array([140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
       153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165,
       166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178,
       179, 180, 181, 182, 183, 184, 

In [22]:
from sklearn.model_selection import train_test_split

# Split pairs and labels into training and validation sets
train_pairs, val_pairs, train_labels, val_labels = train_test_split(
    pairs, pair_labels, test_size=0.2, random_state=42
)

In [23]:


# Define transformations for the images
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets
train_dataset = SiameseDataset(train_pairs, train_labels, transform=transform)
val_dataset = SiameseDataset(val_pairs, val_labels, transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [24]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize models
feature_extractor = FeatureExtractor().to(device)
siamese_network = SiameseNetwork(embedding_dim=2048).to(device)

# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(siamese_network.parameters(), lr=0.0001)




In [25]:
epochs = 10
train_losses = []
val_losses = []

for epoch in range(epochs):
    # Training Phase
    feature_extractor.eval()  # Feature extractor in evaluation mode
    siamese_network.train()  # Siamese network in training mode

    train_loss = 0.0
    for img1, img2, labels in train_loader:
        img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)

        # Extract embeddings
        with torch.no_grad():
            embedding1 = feature_extractor(img1)
            embedding2 = feature_extractor(img2)

        # Forward pass through the Siamese network
        outputs = siamese_network(embedding1, embedding2).squeeze()
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)
    train_losses.append(train_loss)

    # Validation Phase
    feature_extractor.eval()
    siamese_network.eval()

    val_loss = 0.0
    with torch.no_grad():
        for img1, img2, labels in val_loader:
            img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)

            # Extract embeddings
            embedding1 = feature_extractor(img1)
            embedding2 = feature_extractor(img2)

            # Forward pass through the Siamese network
            outputs = siamese_network(embedding1, embedding2).squeeze()
            loss = criterion(outputs, labels)

            val_loss += loss.item()

    val_loss /= len(val_loader)
    val_losses.append(val_loss)

    # Print the losses for this epoch
    print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")


Epoch 1/10, Train Loss: 0.5617, Val Loss: 0.4902
Epoch 2/10, Train Loss: 0.4151, Val Loss: 0.3876
Epoch 3/10, Train Loss: 0.3446, Val Loss: 0.3460
Epoch 4/10, Train Loss: 0.3043, Val Loss: 0.3153
Epoch 5/10, Train Loss: 0.2741, Val Loss: 0.2884
Epoch 6/10, Train Loss: 0.2474, Val Loss: 0.2743
Epoch 7/10, Train Loss: 0.2302, Val Loss: 0.2676
Epoch 8/10, Train Loss: 0.2123, Val Loss: 0.2543
Epoch 9/10, Train Loss: 0.2007, Val Loss: 0.2473
Epoch 10/10, Train Loss: 0.1870, Val Loss: 0.2423


In [26]:
torch.save(siamese_network.state_dict(), "siamese_network_2.pth")
