In [1]:
import torch
from torchvision import models, transforms
from PIL import Image
import requests
from io import BytesIO
import matplotlib.pyplot as plt
import os
import cv2
import keras
import warnings
import time
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
import torch.optim as optim
import random

## Load dog breed and dog classification

In [2]:
def loading_training_data(data_dir):
    data = []
    labels_list = []
    
    for label in labels:
        # path = os.path.join(data_dir, label)
        path = data_dir+"/"+label
        class_num = labels.index(label)
        for img in os.listdir(path):
            # print(img)
            # print(path)
            # img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
            img_arr = cv2.imread(os.path.join(path, img))
            
            # print(os.path.join(path,img))
            if img_arr is None:
                # print(f"Warning: Unable to load image {img}")
                print(os.path.join(path,img))
                continue  # Skip the image if it can't be loaded
            resized_arr = cv2.resize(img_arr, (img_size, img_size))
            data.append(resized_arr)
            labels_list.append(class_num)
            
    return np.array(data), np.array(labels_list)


In [3]:
def loading_training_data_subset(data_dir, subset_size=None):
    data = []
    labels_list = []
    
    for label in labels:
        # path = os.path.join(data_dir, label)  # Assuming label directories are in data_dir
        path = data_dir+"/"+label
        class_num = labels.index(label)
        
        # Get list of images in the directory
        all_images = os.listdir(path)
        
        # If subset_size is provided, select a random subset of images
        if subset_size is not None and len(all_images) > subset_size:
            all_images = random.sample(all_images, subset_size)
        
        for img in all_images:
            img_arr = cv2.imread(os.path.join(path, img))
            
            if img_arr is None:
                print(f"Warning: Unable to load image {img}")
                continue  # Skip the image if it can't be loaded
            
            resized_arr = cv2.resize(img_arr, (img_size, img_size))
            data.append(resized_arr)
            labels_list.append(class_num)
    
    return np.array(data), np.array(labels_list)

In [4]:
def transform_input_resnet(data):
    # Convert teh data array to a PIL Image
    
    # Example: Load multiple images (let's say 3 images)
    # image_paths = ["image1.jpg", "image2.jpg", "image3.jpg"]  # List of image file paths

    # Define the transformations
    transform = transforms.Compose([
        transforms.Resize(256),           # Resize to 256x256 (larger than 224x224 to allow for cropping)
        transforms.CenterCrop(224),       # Crop to 224x224
        transforms.ToTensor(),            # Convert to tensor (will scale pixel values to [0, 1])
        transforms.Normalize(             # Normalize with ImageNet mean and std
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225]
        ),
    ])
    
    # Prepare the list of images
    input_tensors = []
    for image in data:
        image = Image.fromarray(image)
        image_tensor = transform(image)  # Apply transformations
        input_tensors.append(image_tensor)
    
    # Stack the images into a single tensor (batch of images)
    input_batch = torch.stack(input_tensors)  # Shape will be [batch_size, 3, 224, 224]
    return input_batch


In [5]:
# Labels for image categories
labels = ['Beagle', 'Boxer', 'Bulldog', 'Dachshund', 'German_Shepherd', 'Golden_Retriever', 'Labrador_Retriever', 'Poodle', 'Rottweiler', 'Yorkshire_Terrier']
img_size = 32

In [6]:
dog_breed_train_data, dog_breed_train_label = loading_training_data('./DogBreedImageDataset/dataset')

In [7]:
dog_breed_train_data.shape

(967, 32, 32, 3)

In [8]:
test_data = dog_breed_train_data[0:5]

In [9]:
test_data.shape

(5, 32, 32, 3)

In [10]:
type(test_data)

numpy.ndarray

In [11]:
input_tensor = transform_input_resnet(test_data)

In [12]:
input_tensor.shape

torch.Size([5, 3, 224, 224])

In [13]:
labels = ['angry', 'happy', 'relaxed', 'sad']
# Load data for training
dog_emotion_train_data, dog_emotion_train_label = loading_training_data_subset('./DogEmotionPrediction/images/', 250) #100 images per class

In [14]:
dog_emotion_train_data.shape

(1000, 32, 32, 3)

## Combine the two together

In [15]:
dog_breed_train_data.shape

(967, 32, 32, 3)

In [16]:
dog_emotion_train_data.shape

(1000, 32, 32, 3)

### Modify label to have two label, first label is emotion, second is dog breed

In [17]:
dog_breed_train_label = dog_breed_train_label.reshape(-1, 1)

# Create a column of zeros (same number of rows as arr_reshaped)
emotion_col = np.full((dog_breed_train_label.shape[0], 1), 10)

# Add the column of zeros to the right using np.hstack
dog_breed_train_label1 = np.hstack((emotion_col, dog_breed_train_label))

In [18]:
dog_emotion_train_label = dog_emotion_train_label.reshape(-1, 1)

# Create a column of zeros (same number of rows as arr_reshaped)
breed_col = np.full((dog_emotion_train_label.shape[0], 1), 10)

# Add the column of zeros to the right using np.hstack
dog_emotion_train_label1 = np.hstack((dog_emotion_train_label, breed_col))

In [19]:
dog_breed_train_label1.shape

(967, 2)

In [20]:
dog_breed_train_label1

array([[10,  0],
       [10,  0],
       [10,  0],
       ...,
       [10,  9],
       [10,  9],
       [10,  9]])

In [21]:
dog_emotion_train_label1.shape

(1000, 2)

In [22]:
dog_emotion_train_label1

array([[ 0, 10],
       [ 0, 10],
       [ 0, 10],
       ...,
       [ 3, 10],
       [ 3, 10],
       [ 3, 10]])

In [23]:
combined_dataset = np.concatenate((dog_breed_train_data, dog_emotion_train_data), axis=0)

In [24]:
combined_label = np.concatenate((dog_breed_train_label1, dog_emotion_train_label1), axis = 0)

In [25]:
combined_label

array([[10,  0],
       [10,  0],
       [10,  0],
       ...,
       [ 3, 10],
       [ 3, 10],
       [ 3, 10]])

### Train test split

In [26]:
from sklearn.model_selection import train_test_split

In [27]:
X_train, X_test, y_train, y_test = train_test_split(combined_dataset, combined_label, test_size=0.01, random_state=42)

In [28]:
X_train.shape

(1947, 32, 32, 3)

In [29]:
type(X_train)

numpy.ndarray

### Take part of data only

In [30]:
train_size = 300

In [31]:
random_indices = np.random.choice(X_train.shape[0], train_size, replace=False)

# Use these indices to extract 50 random samples
X_train1 = X_train[random_indices]
y_train1 = y_train[random_indices]

In [32]:
X_train1.shape

(300, 32, 32, 3)

In [33]:
np.unique(y_train)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [34]:
y_train1.shape

(300, 2)

## Define model

In [35]:
import torch
import torch.nn as nn
from torchvision import models

class MultiTaskResNet50(nn.Module):
    def __init__(self, num_classes_task1, num_classes_task2):
        super(MultiTaskResNet50, self).__init__()
        
        # Load the pre-trained ResNet-50 model
        self.resnet = models.resnet50()
        
        # Remove the final fully connected layer (classifier) for both tasks
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])  # Exclude final FC layer
        
        # Task 1: A new fully connected layer for the first task
        self.fc_task1 = nn.Linear(2048, num_classes_task1)
        
        # Task 2: A new fully connected layer for the second task
        self.fc_task2 = nn.Linear(2048, num_classes_task2)
        
    def forward(self, x):
        # Pass through the ResNet backbone (everything before the final fully connected layer)
        features = self.resnet(x)  # Output size: [batch_size, 2048, 1, 1]
        
        # Flatten the output to [batch_size, 2048]
        features = features.view(features.size(0), -1)
        
        # Task 1: Pass through the task-specific classifier
        task1_output = self.fc_task1(features)
        
        # Task 2: Pass through the task-specific classifier
        task2_output = self.fc_task2(features)
        
        # Get predicted class by taking argmax (index of the maximum logit)
        task1_pred = torch.argmax(task1_output,1 )  # Predicted class for task 1
        task2_pred = torch.argmax(task2_output,1)  # Predicted class for task 2
        
        return task1_pred, task2_pred
        # return task1_output, task2_output


In [36]:
# Example usage:
num_classes_task1 = 5  # 5 emotion classes
num_classes_task2 = 11 # 11 dog classifcation classes

# Instantiate the model
model = MultiTaskResNet50(num_classes_task1, num_classes_task2)

# Example input tensor with shape [batch_size, 3, 224, 224]
input_tensor = np.random.rand(10, 3, 224, 224)  # Batch size of 2

# Forward pass
task1_output, task2_output = model(transform_input_resnet(X_train[:10]))

print("Task 1 Output:", task1_output.shape)  # Should be [batch_size, num_classes_task1]
print("Task 2 Output:", task2_output.shape)  # Should be [batch_size, num_classes_task2]


Task 1 Output: torch.Size([10])
Task 2 Output: torch.Size([10])


In [37]:
X_train1.shape

(300, 32, 32, 3)

In [38]:
task1_output

tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [39]:
task2_output

tensor([ 9, 10,  9,  6, 10, 10, 10, 10,  9, 10])

## Define code for actual training

In [40]:
X_train1.shape

(300, 32, 32, 3)

In [41]:
y_train1.shape

(300, 2)

In [42]:
X_train1.shape[0]

300

### Test model first

In [43]:
test_input = transform_input_resnet(X_train1[0:4])

In [44]:
pred = model(test_input)

In [45]:
pred

(tensor([2, 2, 2, 2]), tensor([ 9,  6, 10, 10]))

First tensor is emotion classification, second tensor is dog breed classification

In [46]:
y_train1[0:4]

array([[ 3, 10],
       [10,  6],
       [10,  1],
       [10,  0]])

In [47]:
y_train1[0:4, 0]

array([ 3, 10, 10, 10])

The syntax above get all the first element, which is emotion classification

In [49]:

# Set up optimizer (Adam in this case)
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Set up loss function (CrossEntropyLoss)
criterion = nn.CrossEntropyLoss()

optimizer.zero_grad()  # Clear gradients from previous step

# Forward pass: compute predicted logits
emotion_pred, bred_pred = model(transform_input_resnet(X_train1[0:2]))

print(emotion_pred)
print(bred_pred)
print(y_train1[0:2])
print(y_train1[0:2, 0])
# Compute loss
loss = criterion(emotion_pred.float(), torch.tensor(y_train1[0:2, 0]).float())

# Backward pass: compute gradients
loss.requires_grad = True
loss.backward()

# Update model parameters
optimizer.step()

tensor([2, 2])
tensor([ 9, 10])
[[ 3 10]
 [10  6]]
[ 3 10]


emotion_pred compared with y_train1[:, 0] while bred_pred compared with y_train1[:, 1]

## Actual training happen below

In [50]:
batch_size = 4

In [51]:
start_time = time.time()

In [52]:
# Set up loss function (CrossEntropyLoss)
criterion = nn.CrossEntropyLoss()

emotion_loss_fn = nn.CrossEntropyLoss()  # For classification task
breed_loss_fn = nn.CrossEntropyLoss()  # For regression task

optimizer = optim.Adam(model.parameters(), lr=0.001)
for i in range(0, X_train1.shape[0], batch_size):
    running_emotion_loss = 0.0
    running_breed_loss = 0.0
    running_total_loss = 0.0
    
    batch_input = X_train1[i:i+batch_size]
    batch_label = y_train1[i:i+batch_size]
    
    optimizer.zero_grad()
    batch_input = transform_input_resnet(batch_input)
    emotion_pred, breed_pred = model(batch_input)

    emotion_loss = criterion(emotion_pred.float(), torch.tensor(batch_label[:, 0]).float())
    breed_loss = criterion(breed_pred.float(), torch.tensor(batch_label[:, 1]).float())

    emotion_loss.requires_grad = True
    breed_loss.requires_grad = True

    # Combine the losses
    total_loss = emotion_loss + breed_loss

    # Backpropagate the loss and update weights
    total_loss.backward()
    optimizer.step()

    # Accumulate the losses for reporting
    running_emotion_loss += emotion_loss.item()
    running_breed_loss += breed_loss.item()
    running_total_loss += total_loss.item()
    # Print the average losses for this epoch
    avg_emotion_loss = running_emotion_loss/train_size
    avg_breed_loss = running_breed_loss/train_size
    avg_total_loss = running_total_loss / train_size

    print(f"Epoch [{i+1}/{X_train1.shape[0]}], "
          f"Emotion loss: {avg_emotion_loss:.4f}, "
          f"Breed Loss: {avg_breed_loss:.4f}, "
          f"Total Loss: {avg_total_loss:.4f}")

    
    
    

Epoch [1/300], Emotion loss: 0.1525, Breed Loss: 0.1626, Total Loss: 0.3151
Epoch [5/300], Emotion loss: 0.1386, Breed Loss: 0.1540, Total Loss: 0.2926
Epoch [9/300], Emotion loss: 0.1386, Breed Loss: 0.1104, Total Loss: 0.2491
Epoch [13/300], Emotion loss: 0.1017, Breed Loss: 0.1574, Total Loss: 0.2590
Epoch [17/300], Emotion loss: 0.1433, Breed Loss: 0.1372, Total Loss: 0.2804
Epoch [21/300], Emotion loss: 0.1017, Breed Loss: 0.1585, Total Loss: 0.2601
Epoch [25/300], Emotion loss: 0.0739, Breed Loss: 0.1750, Total Loss: 0.2489
Epoch [29/300], Emotion loss: 0.1525, Breed Loss: 0.1120, Total Loss: 0.2645
Epoch [33/300], Emotion loss: 0.0693, Breed Loss: 0.1912, Total Loss: 0.2605
Epoch [37/300], Emotion loss: 0.1155, Breed Loss: 0.1674, Total Loss: 0.2829
Epoch [41/300], Emotion loss: 0.1525, Breed Loss: 0.1506, Total Loss: 0.3030
Epoch [45/300], Emotion loss: 0.1479, Breed Loss: 0.1556, Total Loss: 0.3035
Epoch [49/300], Emotion loss: 0.0693, Breed Loss: 0.1410, Total Loss: 0.2103
Ep

In [53]:
end_time = time.time()

In [54]:
duration = end_time - start_time
print("Time to train is " + str(duration) + " seconds")

Time to train is 77.34147357940674 seconds


In [55]:
X_test.shape

(20, 32, 32, 3)

In [56]:
y_test.shape

(20, 2)

In [57]:
from sklearn.metrics import accuracy_score

In [58]:
emotion_pred, breed_pred = model(transform_input_resnet(X_test))

In [59]:
emotion_pred

tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [60]:
breed_pred

tensor([ 6, 10, 10, 10,  6,  9, 10, 10,  2,  9, 10,  9,  9, 10,  9, 10, 10,  9,
         9, 10])

In [61]:
accuracy_score(y_test[:, 0], emotion_pred)

0.05

In [62]:
accuracy_score(y_test[:, 1], breed_pred)

0.2

The model performs poorly

## Model below doesn't incorporate breed_loss

In [63]:
start_time = time.time()

In [64]:
batch_size = 4

In [65]:
emotion_loss_fn = nn.CrossEntropyLoss()  # For classification task
breed_loss_fn = nn.CrossEntropyLoss()  # For regression task

optimizer = optim.Adam(model.parameters(), lr=0.01)
for i in range(0, X_train1.shape[0], batch_size):
    running_emotion_loss = 0.0
    running_breed_loss = 0.0
    running_total_loss = 0.0
    
    batch_input = X_train1[i:i+batch_size]
    batch_label = y_train1[i:i+batch_size]
    
    optimizer.zero_grad()
    batch_input = transform_input_resnet(batch_input)
    emotion_pred, breed_pred = model(batch_input)

    emotion_loss = criterion(emotion_pred.float(), torch.tensor(batch_label[:, 0]).float())
    breed_loss = criterion(breed_pred.float(), torch.tensor(batch_label[:, 1]).float())

    emotion_loss.requires_grad = True
    breed_loss.requires_grad = True

    # Combine the losses
    # total_loss = emotion_loss + breed_loss
    total_loss = emotion_loss

    # Backpropagate the loss and update weights
    total_loss.backward()
    optimizer.step()

    # Accumulate the losses for reporting
    running_emotion_loss += emotion_loss.item()
    running_breed_loss += breed_loss.item()
    running_total_loss += total_loss.item()
    # Print the average losses for this epoch
    avg_emotion_loss = running_emotion_loss/train_size
    avg_breed_loss = running_breed_loss/train_size
    avg_total_loss = running_total_loss / train_size

    print(f"Epoch [{i+1}/{X_train1.shape[0]}], "
          f"Emotion loss: {avg_emotion_loss:.4f}, "
          f"Breed Loss: {avg_breed_loss:.4f}, "
          f"Total Loss: {avg_total_loss:.4f}")

Epoch [1/300], Emotion loss: 0.1525, Breed Loss: 0.1626, Total Loss: 0.1525
Epoch [5/300], Emotion loss: 0.1386, Breed Loss: 0.1540, Total Loss: 0.1386
Epoch [9/300], Emotion loss: 0.1386, Breed Loss: 0.1104, Total Loss: 0.1386
Epoch [13/300], Emotion loss: 0.1017, Breed Loss: 0.1574, Total Loss: 0.1017
Epoch [17/300], Emotion loss: 0.1433, Breed Loss: 0.1372, Total Loss: 0.1433
Epoch [21/300], Emotion loss: 0.1017, Breed Loss: 0.1585, Total Loss: 0.1017
Epoch [25/300], Emotion loss: 0.0739, Breed Loss: 0.1750, Total Loss: 0.0739
Epoch [29/300], Emotion loss: 0.1525, Breed Loss: 0.1120, Total Loss: 0.1525
Epoch [33/300], Emotion loss: 0.0693, Breed Loss: 0.1912, Total Loss: 0.0693
Epoch [37/300], Emotion loss: 0.1155, Breed Loss: 0.1674, Total Loss: 0.1155
Epoch [41/300], Emotion loss: 0.1525, Breed Loss: 0.1506, Total Loss: 0.1525
Epoch [45/300], Emotion loss: 0.1479, Breed Loss: 0.1556, Total Loss: 0.1479
Epoch [49/300], Emotion loss: 0.0693, Breed Loss: 0.1410, Total Loss: 0.0693
Ep

In [66]:
end_time = time.time()

In [67]:
duration = end_time - start_time
print("Time to train is " + str(duration) + " seconds")

Time to train is 74.80636024475098 seconds


In [68]:
emotion_pred, breed_pred = model(transform_input_resnet(X_test))

In [69]:
emotion_pred

tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [70]:
breed_pred

tensor([ 6, 10, 10, 10,  6,  9, 10, 10,  2,  9, 10,  9,  9, 10,  9, 10, 10,  9,
         9, 10])

In [71]:
accuracy_score(y_test[:, 0], emotion_pred)

0.05

In [72]:
accuracy_score(y_test[:, 1], breed_pred)

0.2

The model performs poorly as well