In [None]:
import numpy as np

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import models, transforms
from PIL import Image
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder

# Paths
main_folder_loc = '/kaggle/input/visual-taxonomy'
folder_train = f"{main_folder_loc}/train_images"
folder_test = f"{main_folder_loc}/test_images"
train_attribute = pd.read_csv(f"{main_folder_loc}/train.csv")
test_attribute = pd.read_csv(f"{main_folder_loc}/test.csv")
category_attributes = pd.read_parquet(f"{main_folder_loc}/category_attributes.parquet")
model_variant = 'resnet18'
main_folder_save_loc = '/kaggle/working'
model_path_template = f"{main_folder_save_loc}/model_{model_variant}_{{}}_epoch{{}}.pth"  # Model path template for each epoch
encoders_path_template = f"{main_folder_save_loc}/encoders_{model_variant}_{{}}.pkl" 

# Training Configuration
num_epochs = 25  # Number of epochs

# Define Dataset Class
class ImageAttributeDataset(Dataset):
    def __init__(self, data_category_pd, img_dir, num_attribute, transform=None):
        self.img_labels = data_category_pd
        self.img_dir = img_dir
        self.transform = transform
        self.encoders = [LabelEncoder() for _ in range(num_attribute)]
        
        # Fit encoders and transform categorical labels into integers
        for i in range(num_attribute):
            self.img_labels[f'attr_{i+1}'] = self.img_labels[f'attr_{i+1}'].fillna(-1)
            valid_indices = self.img_labels[f'attr_{i+1}'] != -1
            self.img_labels.loc[valid_indices, f'attr_{i+1}'] = self.encoders[i].fit_transform(
                self.img_labels.loc[valid_indices, f'attr_{i+1}'])
    
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, f"{str(self.img_labels.iloc[idx, 0]).zfill(6)}.jpg")
        image = Image.open(img_name).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        attributes = self.img_labels.iloc[idx, 1:].values.astype('int')
        return image, torch.tensor(attributes)

# Define MultiOutputResNet Model
class MultiOutputResNet18(nn.Module):
    def __init__(self, num_classes_per_attribute):
        super(MultiOutputResNet18, self).__init__()
        
        # Load pretrained ResNet-18
        self.resnet = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        
        # Remove the final fully connected layer and replace with custom heads
        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Identity()
        self.attribute_heads = nn.ModuleList([nn.Linear(num_ftrs, num_classes) for num_classes in num_classes_per_attribute])

    def forward(self, x):
        x = self.resnet(x)
        outputs = [head(x) for head in self.attribute_heads]
        return outputs

# Training Function
def train_model(Category, data_category_pd):
    num_attribute = data_category_pd.iloc[0, 2]
    feature_list = [f'attr_{i+1}' for i in range(num_attribute)]
    data_category_pd = data_category_pd.reindex(columns=['id'] + feature_list)

    # Define transform
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Standard input size for ResNet-18
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    dataset = ImageAttributeDataset(data_category_pd, folder_train, num_attribute, transform=transform)
    
    # Split dataset into 90% train and 10% validation
    train_size = int(0.9 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Model and optimizer
    num_classes_per_attribute = [len(dataset.encoders[i].classes_) for i in range(num_attribute)]
    model = MultiOutputResNet18(num_classes_per_attribute)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training Loop
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for images, attributes in train_loader:
            images, attributes = images.to(device), attributes.to(device)
            optimizer.zero_grad()
            outputs = model(images)

            batch_loss = 0
            valid_count = 0

            for i in range(num_attribute):
                mask = attributes[:, i] != -1
                if mask.sum() > 0:
                    valid_targets = attributes[:, i][mask]
                    valid_outputs = outputs[i][mask]
                    loss = criterion(valid_outputs, valid_targets)
                    batch_loss += loss
                    valid_count += 1

            if valid_count > 0:
                batch_loss /= valid_count

            batch_loss.backward()
            optimizer.step()
            running_loss += batch_loss.item()

        print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader):.4f}")

        # Save model for the epoch
        model_path = model_path_template.format(Category, epoch+1)
        torch.save(model.state_dict(), model_path)
        print(f"Saved model for Category {Category}, Epoch {epoch+1} at {model_path}")

    # Save encoders after training
    encoders_path = encoders_path_template.format(Category)
    with open(encoders_path, 'wb') as f:
        pickle.dump(dataset.encoders, f)
    print(f"Saved encoders for category: {Category}")

# Training for each category
category_list = category_attributes['Category']
for Category in category_list:
    data_category_pd = train_attribute.query(f'Category == "{Category}"')
    train_model(Category, data_category_pd)

In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import pandas as pd
import pickle
import numpy as np

# main_folder_loc = '/kaggle/input/visual-taxonomy'
# folder_train = f"{main_folder_loc}/train_images"
# folder_test = f"{main_folder_loc}/test_images"
# train_attribute = pd.read_csv(f"{main_folder_loc}/train.csv")
# test_attribute = pd.read_csv(f"{main_folder_loc}/test.csv")
# category_attributes = pd.read_parquet(f"{main_folder_loc}/category_attributes.parquet")
# model_variant = 'resnet18'
# main_folder_save_loc = '/kaggle/working'
# model_path_template = f"{main_folder_save_loc}/model_{model_variant}_{{}}_epoch{{}}.pth"  # Model path template for each epoch
# encoders_path_template = f"{main_folder_save_loc}/encoders_{model_variant}_{{}}.pkl" 

# Paths
main_folder_loc = '/kaggle/input/visual-taxonomy'
main_folder_save_loc = '/kaggle/working'
folder_test = f"{main_folder_loc}/test_images"
test_attribute = pd.read_csv(f"{main_folder_loc}/test.csv")
category_attributes = pd.read_parquet(f"{main_folder_loc}/category_attributes.parquet")
model_variant = 'resnet18'
model_path_template = f"{main_folder_save_loc}/model_{model_variant}_{{}}_epoch{{}}.pth"  # Model path template for each epoch
encoders_path_template = f"{main_folder_save_loc}/encoders_{model_variant}_{{}}.pkl"  # Encoder path template

# Specify the epoch to use for inference
epoch = 24  # Set this to the epoch you want to use

# Define the inference dataset class
class InferenceAttributeDataset(Dataset):
    def __init__(self, data_category_pd, img_dir, transform=None):
        self.img_labels = data_category_pd
        self.img_dir = img_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        product_id = self.img_labels.iloc[idx, 0]
        img_name = os.path.join(self.img_dir, f"{str(product_id).zfill(6)}.jpg")
        image = Image.open(img_name).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        return product_id, image

# Define MultiOutputResNet Model (ResNet-18)
class MultiOutputResNet18(nn.Module):
    def __init__(self, num_classes_per_attribute):
        super(MultiOutputResNet18, self).__init__()
        
        # Load pretrained ResNet-18
        self.resnet = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        
        # Remove the final fully connected layer and replace it with custom heads
        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Identity()
        self.attribute_heads = nn.ModuleList([nn.Linear(num_ftrs, num_classes) for num_classes in num_classes_per_attribute])

    def forward(self, x):
        x = self.resnet(x)
        outputs = [head(x) for head in self.attribute_heads]
        return outputs

# Prediction function
def predict_batch(model, images, encoders, num_attribute):
    with torch.no_grad():
        outputs = model(images)  # Get logits for each attribute
    
    batch_predictions = []
    
    for i in range(images.size(0)):  # Loop over batch size
        predicted_attributes = []
        
        for j in range(num_attribute):
            logits = outputs[j][i]  # Logits for the i-th image and j-th attribute
            predicted_class = torch.argmax(logits).item()
            decoded_label = encoders[j].inverse_transform([predicted_class])[0]
            predicted_attributes.append(decoded_label)
        
        batch_predictions.append(predicted_attributes)
    
    return batch_predictions

# Inference Pipeline
category_list = category_attributes['Category']
results_list = []

for Category in category_list:
    print(f"Inference for Category: {Category}")
    
    # Load test data for the category
    test_data_category = test_attribute.query(f'Category == "{Category}"')
    train_data_category = train_attribute.query(f'Category == "{Category}"')
    num_attribute = train_data_category.iloc[0, 2]

    # Load the model and encoders for the specified epoch
    model_path = model_path_template.format(Category, epoch)
    # model = MultiOutputResNet18([0] * num_attribute) ## this is the correct code. 
    # Load encoders to get num_classes_per_attribute dynamically
    with open(encoders_path_template.format(Category), 'rb') as f:
        encoders = pickle.load(f)
    num_classes_per_attribute = [len(encoder.classes_) for encoder in encoders]
    
    # Initialize the model with correct num_classes_per_attribute
    model = MultiOutputResNet18(num_classes_per_attribute)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    encoders_path = encoders_path_template.format(Category)
    with open(encoders_path, 'rb') as f:
        encoders = pickle.load(f)

    # Define the transform
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    # Initialize Inference Dataset and DataLoader
    inference_dataset = InferenceAttributeDataset(test_data_category, folder_test, transform=transform)
    inference_loader = DataLoader(inference_dataset, batch_size=32, shuffle=False, num_workers=4)
    
    results = []
    for product_ids, images in inference_loader:
        images = images.to(device)
        batch_predictions = predict_batch(model, images, encoders, num_attribute)

        for i, product_id in enumerate(product_ids):
            result = {'id': product_id.item()}
            result.update({f'attr_{j+1}': batch_predictions[i][j] for j in range(num_attribute)})
            results.append(result)
    
    # Convert results to DataFrame and handle missing attributes
    results_df = pd.DataFrame(results)
    results_df.insert(1, 'Category', Category)
    results_df.insert(2, 'len', num_attribute)
    
    # Define full set of attribute columns and reindex to ensure consistent columns
    full_attr = ['id', 'Category', 'len'] + [f'attr_{i+1}' for i in range(10)]
    results_df = results_df.reindex(columns=full_attr, fill_value='nu')  # Fill missing attributes with 'nu'
    
    # Append to results list
    results_list.append(results_df)



In [None]:
# Merge all category-specific results and save to a single CSV file
final_results = pd.concat(results_list, ignore_index=True)
final_results.to_csv(f"{main_folder_save_loc}/1_inference_results_{model_variant}_epoch{epoch}.csv", index=False)
print("Inference complete and merged results saved.")