In [1]:

import torchvision.models as models
import torch.nn as nn
import torch
import os
import glob
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score




device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
#RESNET MODIFIED MODEL

class ResNetMultiLabel(nn.Module):
    def __init__(self):
        super(ResNetMultiLabel, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        # Replace the final fully connected layer
        self.resnet.fc = nn.Sequential(
            nn.Linear(self.resnet.fc.in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 312),
            nn.Sigmoid()  # Multi-label classification
        )
    
    def forward(self, x):
        return self.resnet(x)

In [3]:
# Initialize ResNet model
model = ResNetMultiLabel()
model = model.to(device)
image_size = (224, 224)  # Required input size for ResNet

In [4]:
print(model)

ResNetMultiLabel(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
  

In [20]:
#DATASET

from torchvision import transforms

class ImageDataset(Dataset):
    def __init__(self, image_dir, label_file, image_size=(224, 224)):
        self.image_paths = self.get_image_paths(image_dir)
        self.labels = self.load_labels(label_file)
        self.transform = transforms.Compose([
            transforms.Resize(image_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize for ResNet
        ])
        assert len(self.image_paths) == len(self.labels), "Mismatch between images and labels."

    def get_image_paths(self, image_dir):
        image_paths = glob.glob(os.path.join(image_dir, "**", "*.jpg"), recursive=True)
        return sorted(image_paths)

    def load_labels(self, label_file):
        with open(label_file, 'r') as file:
            labels = [list(map(int, line.strip().split())) for line in file]
        return torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        image = self.transform(image)
        label = self.labels[idx]
        return image, label

In [21]:
def train_model(model, train_loader, criterion, optimizer, epochs):
  model = model.to(device)
  model.train()
  for epoch in range(epochs):
    epoch_loss =0
    all_labels =[]
    all_preds =[]
    for batch_idx, (images, labels) in enumerate(train_loader):
      images, labels = images.to(device), labels.to(device)
      optimizer.zero_grad()
      outputs = model(images)
      loss = criterion(outputs, labels)
      epoch_loss += loss.item()
      loss.backward()
      optimizer.step()

      preds = (outputs>0.5).cpu().numpy()
      all_preds.append(preds)
      all_labels.append(labels.cpu().numpy())

    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)
    epoch_accuracy = accuracy_score(all_labels, all_preds, normalize=True)

    PATH = "../models/multi-label-attribute.pth"
    PATHOPT = "../models/multi-label-attribute_optimizer.pth"
    torch.save(model.state_dict(), PATH)
    torch.save(optimizer.state_dict(), PATHOPT)

    # Print epoch summary
    print(f'Epoch [{epoch + 1}/{epochs}] Complete - '
          f'Average Loss: {epoch_loss / len(train_loader):.4f}, '
          f'Epoch Accuracy: {epoch_accuracy * 100:.2f}%\n')


In [8]:
# Training function
def train_model1(model, train_loader, criterion, optimizer, epochs):
    model = model.to(device)
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_loader):.4f}")

In [23]:
from sklearn.metrics import precision_recall_fscore_support
# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    yhat_list, ytrue_list = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            yhat = (outputs > 0.5).float()  # Convert probabilities to binary
            yhat_list.append(yhat.cpu().numpy())
            ytrue_list.append(labels.cpu().numpy())
    yhat = np.vstack(yhat_list)
    ytrue = np.vstack(ytrue_list)
    precision, recall, f1, support = precision_recall_fscore_support(ytrue, yhat, average='micro')
    accuracy = accuracy_score(ytrue, yhat)
    print(f"Accuracy: {accuracy:.3f}, Precision: {precision:.3f}, Recall: {recall:.3f}, F1-Score: {f1:.3f}")
    return yhat, ytrue

In [25]:

# Paths to data
train_image_dir = "../data/images/train"
train_label_file = "../data/attribute_matrix_train.txt"
test_image_dir = "../data/images/test"
test_label_file = "../data/attribute_matrix_test.txt"

# Hyperparameters
n_labels = 312
batch_size = 32
epochs = 5
learning_rate = 0.001

In [26]:


# Prepare datasets and dataloaders
train_dataset = ImageDataset(train_image_dir, train_label_file, image_size)
test_dataset = ImageDataset(test_image_dir, test_label_file, image_size)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [27]:
train_dataset.labels[0]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0.,
        0., 0., 1., 0., 0., 0., 0., 0., 

In [28]:


# Initialize model, loss, and optimizer
model = ResNetMultiLabel()
criterion = nn.BCELoss()  # Binary cross-entropy loss
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
print(model)

In [29]:




# Train the model
train_model(model, train_loader, criterion, optimizer, epochs)


# Evaluate the model
yhat_list, ytrue_list = evaluate_model(model, test_loader)


Epoch [1/5] Complete - Average Loss: 0.2701, Epoch Accuracy: 0.00%

Epoch [2/5] Complete - Average Loss: 0.2562, Epoch Accuracy: 0.00%

Epoch [3/5] Complete - Average Loss: 0.2526, Epoch Accuracy: 0.00%

Epoch [4/5] Complete - Average Loss: 0.2518, Epoch Accuracy: 0.00%

Epoch [5/5] Complete - Average Loss: 0.2510, Epoch Accuracy: 0.00%

Accuracy: 0.000, Precision: 0.633, Recall: 0.124, F1-Score: 0.207


In [39]:
# Print predictions for the first few test samples
for i in range(5):  # Show results for the first 5 images
    print(f"Image {i + 1}:")
    print(f"Predicted: {yhat_list[i].astype(int)}")
    print(f"Actual:    {ytrue_list[i].astype(int)}\n")

Image 1:
Predicted: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 