In [7]:
import numpy as np 
import pandas as pd
import torch
import os
import time
import random
import matplotlib.pyplot as plt
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split, Dataset
from PIL import Image
from pathlib import Path 
from sklearn.preprocessing import LabelEncoder
from datetime import datetime
from zoneinfo import ZoneInfo

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [9]:
folder_train = "/kaggle/input/visual-taxonomy/train_images"
folder_test = "/kaggle/input/visual-taxonomy/test_images"
train_attribute = pd.read_csv('/kaggle/input/visual-taxonomy/train.csv')
test_attribute = pd.read_csv('/kaggle/input/visual-taxonomy/test.csv')
category_attributes = pd.read_parquet("/kaggle/input/visual-taxonomy/category_attributes.parquet")

In [None]:
/kaggle/input/visual-taxonomy/test_images/000000.jpg


In [7]:
train_attribute

Unnamed: 0,id,Category,len,attr_1,attr_2,attr_3,attr_4,attr_5,attr_6,attr_7,attr_8,attr_9,attr_10
0,0,Men Tshirts,5,default,round,printed,default,short sleeves,,,,,
1,1,Men Tshirts,5,multicolor,polo,solid,solid,short sleeves,,,,,
2,2,Men Tshirts,5,default,polo,solid,solid,short sleeves,,,,,
3,3,Men Tshirts,5,multicolor,polo,solid,solid,short sleeves,,,,,
4,4,Men Tshirts,5,multicolor,polo,solid,solid,short sleeves,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
70208,70374,Women Tops & Tunics,10,multicolor,fitted,regular,square neck,casual,printed,default,short sleeves,regular sleeves,ruffles
70209,70375,Women Tops & Tunics,10,yellow,regular,crop,round neck,casual,default,default,short sleeves,regular sleeves,knitted
70210,70376,Women Tops & Tunics,10,maroon,fitted,crop,round neck,casual,solid,solid,short sleeves,regular sleeves,knitted
70211,70377,Women Tops & Tunics,10,,,,high,,,,short sleeves,,


In [8]:
test_attribute

Unnamed: 0,id,Category
0,0,Men Tshirts
1,1,Men Tshirts
2,2,Men Tshirts
3,3,Men Tshirts
4,4,Men Tshirts
...,...,...
30200,30484,Women Tops & Tunics
30201,30485,Women Tops & Tunics
30202,30486,Women Tops & Tunics
30203,30487,Women Tops & Tunics


In [10]:
category_attributes

Unnamed: 0,Category,No_of_attribute,Attribute_list
0,Men Tshirts,5,"[color, neck, pattern, print_or_pattern_type, ..."
1,Sarees,10,"[blouse_pattern, border, border_width, color, ..."
2,Kurtis,9,"[color, fit_shape, length, occasion, ornamenta..."
3,Women Tshirts,8,"[color, fit_shape, length, pattern, print_or_p..."
4,Women Tops & Tunics,10,"[color, fit_shape, length, neck_collar, ocassi..."


In [1]:
'6'.zfill(6)

'000006'

In [12]:
class ImageAttributeDataset(Dataset):
    def __init__(self, data_category_pd, img_dir, num_attribute, transform=None):
        self.img_labels = data_category_pd
        self.img_dir = img_dir
        self.transform = transform
        
        # Initialize label encoders for each attribute
        self.encoders = [LabelEncoder() for _ in range(num_attribute)]  
        
        # Fit encoders and transform categorical labels into integers, handling NaNs by filling them with -1
        for i in range(num_attribute):
            self.img_labels[f'attr_{i+1}'] = self.img_labels[f'attr_{i+1}'].fillna(-1)  # Fill NaN with -1
            valid_indices = self.img_labels[f'attr_{i+1}'] != -1
            self.img_labels.loc[valid_indices, f'attr_{i+1}'] = self.encoders[i].fit_transform(
                self.img_labels.loc[valid_indices, f'attr_{i+1}'])
    
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        # Load image
        img_name = os.path.join(self.img_dir, f"{str(self.img_labels.iloc[idx, 0]).zfill(6)}.jpg")
        image = Image.open(img_name).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        # Get attribute labels
        attributes = self.img_labels.iloc[idx, 1:].values.astype('int')
        
        return image, torch.tensor(attributes)
    

class MultiOutputResNet(nn.Module):
    def __init__(self, num_classes_per_attribute):
        super(MultiOutputResNet, self).__init__()
        
        # Load pretrained ResNet
        self.resnet = models.resnet18(pretrained=True)
        
        # Remove the final fully connected layer (replace it with our custom heads)
        num_ftrs = self.resnet.fc.in_features
        
        # Create one FC layer per attribute
        self.resnet.fc = nn.Identity()  # Remove the original fully connected layer
        self.attribute_heads = nn.ModuleList([nn.Linear(num_ftrs, num_classes) for num_classes in num_classes_per_attribute])

    def forward(self, x):
        # Extract features using ResNet (without the final FC layer)
        x = self.resnet(x)
        
        # Predict each attribute separately
        outputs = [head(x) for head in self.attribute_heads]
        return outputs
    
class InferenceAttributeDataset(Dataset):
    def __init__(self, data_category_pd, img_dir, transform=None):
        self.img_labels = data_category_pd
        self.img_dir = img_dir
        self.transform = transform
        
    
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        # Load image
        product_id = self.img_labels.iloc[idx, 0]
        img_name = os.path.join(self.img_dir, f"{str(product_id).zfill(6)}.jpg")
        image = Image.open(img_name).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        return product_id, image

In [1]:
num_attribute

NameError: name 'num_attribute' is not defined

In [6]:
x = [5,8,9]
for i in x:
    print(i)

5
8
9


In [5]:
def predict_batch(model, images, encoders, num_attribute):
    """
    Perform inference on a batch of images to predict attributes.
    
    Args:
    - model (nn.Module): Trained model.
    - images (torch.Tensor): Batch of images.
    - encoders (list of LabelEncoder): List of LabelEncoders for each attribute to decode predictions.
    
    Returns:
    - List of lists containing predicted attributes for each image in the batch.
    """
    # Forward pass through the model
    with torch.no_grad():
        outputs = model(images)  # Get outputs from the model (list of logits for each attribute)
    
    # List to store predicted attributes for each image
    batch_predictions = []
    
    # Loop over the batch of images
    for i in range(images.size(0)):  # images.size(0) gives the batch size
        predicted_attributes = []
        
        # For each attribute, get the predicted class and decode it
        for j in range(num_attribute):  
            logits = outputs[j][i]  # Logits for the i-th image for the j-th attribute
            predicted_class = torch.argmax(logits).item()  # Get the predicted class index
            
            # Decode the predicted class using the corresponding LabelEncoder
            decoded_label = encoders[j].inverse_transform([predicted_class])[0]
            predicted_attributes.append(decoded_label)
        
        batch_predictions.append(predicted_attributes)
    
    return batch_predictions

In [6]:
category_list = category_attributes.Category
results_list = []
# model_list = []


In [None]:
for Category in category_list:
    print('running the module for : ', Category)
    start_time = time.time()
    data_category_pd  = (train_attribute.query(f'Category == "{Category}"'))
    num_attribute = data_category_pd.iloc[0,2]
    feature_list = [f'attr_{i+1}'  for i in range(num_attribute)]
    data_category_pd = data_category_pd.reindex(columns=['id']+  feature_list)

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    dataset = ImageAttributeDataset(data_category_pd= data_category_pd, img_dir=folder_train, num_attribute=num_attribute, transform = transform )
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    # Initialize model
    num_classes_per_attribute = [len(dataset.encoders[i].classes_) for i in range(num_attribute)]
    model = MultiOutputResNet(num_classes_per_attribute)

    # Move model to GPU if available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    # Loss function and optimizer
    criterion = nn.CrossEntropyLoss(reduction='mean')  # Set reduction to 'none' to handle masking
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    end_time = time.time()
    total_time = (end_time-start_time)/60
    print(f'time taken for data reading is {total_time}')
    # Training loop
    for epoch in range(10):  # Train for 10 epochs
        model.train()
        running_loss = 0.0
        start_time = time.time()
        print(datetime.now(tz=ZoneInfo('Asia/Kolkata')))
        for images, attributes in dataloader:
            images, attributes = images.to(device), attributes.to(device)

            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)

            total_loss = 0
            valid_count = 0  # Count valid attributes to average the loss

            # Compute loss for each attribute, ignoring missing values (-1)
            for i in range(num_attribute):
                mask = attributes[:, i] != -1  # Mask where the attribute is not NaN (-1)
                if mask.sum() > 0:  # Only compute loss if there are valid targets
                    valid_targets = attributes[:, i][mask]  # Filter out invalid targets (-1)
                    valid_outputs = outputs[i][mask]  # Filter out corresponding outputs
                    loss = criterion(valid_outputs, valid_targets)#.mean()
    #                 loss = loss[mask].mean()  # Compute loss only for valid targets
                    total_loss += loss
                    valid_count += 1

            # Average the total loss over the number of valid attributes
            if valid_count > 0:
                total_loss /= valid_count

            # Backpropagation and optimization
            total_loss.backward()
            optimizer.step()

            running_loss += total_loss.item()
        end_time = time.time()
        total_time = (end_time-start_time)/60

        print(f"Epoch {epoch+1}, Loss: {running_loss/len(dataloader)}")
        print(f'epoch time taken is {total_time}')

    model_path = f"/kaggle/working/model_{Category}.pth"
    torch.save(model.state_dict(), model_path)
    model.eval()
    test_data_category = test_attribute.query(f'Category=="{Category}"')
    # Create the dataset
    inference_dataset = InferenceAttributeDataset(test_data_category, folder_test , transform=transform)
    # DataLoader for parallel batch processing
    inference_loader = DataLoader(inference_dataset, batch_size=32, shuffle=False, num_workers=4)  # num_workers for parallel loading
#     train_inference_dataset = InferenceAttributeDataset(data_category_pd, folder_train , transform=transform)
#     train_inference_loader = DataLoader(train_inference_dataset, batch_size=32, shuffle=False, num_workers=4)  # num_workers for parallel loading
    encoders = dataset.encoders
    # 
    results = []
    for product_ids, images in inference_loader:
        images = images.to(device)  # Move images to GPU/CPU
        batch_predictions = predict_batch(model, images, encoders, num_attribute)  # Get predictions for the batch

        # Combine product IDs with their predicted attributes
        for i in range(len(product_ids)):
            x = batch_predictions[i]
            results.append({
                'id': product_ids[i].item(),
                **{ f'attr_{j+1}':x[j] for j in range(len(x))}
            })

    # Convert results to a DataFrame and save it
    results_df = pd.DataFrame(results)
    results_df.insert(1, 'Category', Category)
    results_df.insert(2, 'len', num_attribute)
    full_attr = ['id', 'Category', 'len'] + [f'attr_{i+1}' for i in range(10)]
    results_df = results_df.reindex(full_attr, axis=1).fillna('nu')
    results_list = results_list+([results_df])

running the module for :  Men Tshirts


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 138MB/s] 


time taken for data reading is 0.015455814202626546
2024-11-19 00:24:54.591471+05:30
Epoch 1, Loss: 0.30588590151123835
epoch time taken is 1.4903095444043477
2024-11-19 00:26:24.008248+05:30
Epoch 2, Loss: 0.23285283792045033
epoch time taken is 0.9788148959477743
2024-11-19 00:27:22.737580+05:30
Epoch 3, Loss: 0.2018332122906781
epoch time taken is 0.9870182275772095
2024-11-19 00:28:21.959136+05:30
Epoch 4, Loss: 0.1816017225776848
epoch time taken is 0.9924176692962646
2024-11-19 00:29:21.504615+05:30
Epoch 5, Loss: 0.17333396086305902
epoch time taken is 0.9925856272379557
2024-11-19 00:30:21.060186+05:30
Epoch 6, Loss: 0.1628746918582341
epoch time taken is 0.9645192702611287
2024-11-19 00:31:18.931750+05:30
Epoch 7, Loss: 0.14260212400634037
epoch time taken is 0.9502082864443461
2024-11-19 00:32:15.944676+05:30
Epoch 8, Loss: 0.12100535168786321
epoch time taken is 0.9623099366823832
2024-11-19 00:33:13.683704+05:30
Epoch 9, Loss: 0.11758835749341208
epoch time taken is 0.96317



time taken for data reading is 0.005498560269673666
2024-11-19 00:35:24.131574+05:30


In [None]:
result_final = pd.concat(results_list)
result_final = result_final.sort_values('id')
result_final.to_csv('/kaggle/working/cods_results_3.csv')
result_final