In [1]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import os
import cv2
import keras
import warnings
import time
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
import random

In [122]:
model = models.resnet50()

In [123]:
model.bn1

BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

In [124]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## Load dog breed

In [5]:
def loading_training_data(data_dir):
    data = []
    labels_list = []
    
    for label in labels:
        # path = os.path.join(data_dir, label)
        path = data_dir+"/"+label
        class_num = labels.index(label)
        for img in os.listdir(path):
            # print(img)
            # print(path)
            img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
            
            # print(os.path.join(path,img))
            if img_arr is None:
                # print(f"Warning: Unable to load image {img}")
                print(os.path.join(path,img))
                continue  # Skip the image if it can't be loaded
            resized_arr = cv2.resize(img_arr, (img_size, img_size))
            data.append(resized_arr)
            labels_list.append(class_num)
            
    return np.array(data), np.array(labels_list)


In [6]:
# Labels for image categories
labels = ['Beagle', 'Boxer', 'Bulldog', 'Dachshund', 'German_Shepherd', 'Golden_Retriever', 'Labrador_Retriever', 'Poodle', 'Rottweiler', 'Yorkshire_Terrier']
img_size = 32

In [7]:
dog_breed_train_data, dog_breed_train_label = loading_training_data('./DogBreedImageDataset/dataset')

In [8]:
np.unique(dog_breed_train_label)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
dog_breed_train_label.shape

(967,)

In [10]:
dog_breed_train_label = dog_breed_train_label.reshape(-1, 1)

# Create a column of zeros (same number of rows as arr_reshaped)
emotion_col = np.full((dog_breed_train_label.shape[0], 1), 10)

# Add the column of zeros to the right using np.hstack
dog_breed_train_label1 = np.hstack((emotion_col, dog_breed_train_label))

In [11]:
dog_breed_train_label1[0:3]

array([[10,  0],
       [10,  0],
       [10,  0]])

## Load pet emotion

In [12]:
labels = ['Angry', 'happy', 'Other', 'Sad']
# Load data for training, testing, and validation
pet_facial_train_data, pet_facial_train_label = loading_training_data('./PetFacialExpressionDataset/Master_Folder/train/')
pet_facial_test_data, pet_facial_test_label = loading_training_data('./PetFacialExpressionDataset/Master_Folder/train/')
pet_facial_val_data, pet_facial_val_label = loading_training_data('./PetFacialExpressionDataset/Master_Folder/train/')

In [13]:
pet_facial_train_label.shape

(1000,)

In [14]:
pet_facial_train_label = pet_facial_train_label.reshape(-1, 1)

# Create a column of zeros (same number of rows as arr_reshaped)
breed_col = np.full((pet_facial_train_label.shape[0], 1), 10)

# Add the column of zeros to the right using np.hstack
pet_facial_train_label1 = np.hstack(( pet_facial_train_label, breed_col))

In [15]:
pet_facial_train_label1

array([[ 0, 10],
       [ 0, 10],
       [ 0, 10],
       ...,
       [ 3, 10],
       [ 3, 10],
       [ 3, 10]])

## Combine the two together

In [16]:
dog_breed_train_data.shape

(967, 32, 32)

In [17]:
pet_facial_train_data.shape

(1000, 32, 32)

In [18]:
combined_dataset = np.concatenate((dog_breed_train_data, pet_facial_train_data), axis=0)

In [19]:
combined_dataset.shape

(1967, 32, 32)

In [20]:
dog_breed_train_label1.shape

(967, 2)

In [21]:
pet_facial_train_label1.shape

(1000, 2)

In [22]:
combined_label = np.concatenate((dog_breed_train_label1, pet_facial_train_label1), axis = 0)

In [23]:
combined_label.shape[0]

1967

first is emoition, second is dog breed

In [118]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image
import os

# Step 1: Create a custom dataset to load images
class CustomImageDataset(Dataset):
    def __init__(self, img, label):
        self.img = img
        
        # print(input_tensor.shape)  # Output: torch.Size([20, 3, 32, 32])
        self.label = label
        
    def __len__(self):
        return self.img.shape[0]

    def __getitem__(self, idx):
        img = self.img[idx]
    
        label = self.label[idx]
        return img, label[0], label[1]

# Step 2: Define the Model with two heads (classification and regression)
class MultiTaskResNet(nn.Module):
    def __init__(self, n_emotion, n_breed):
        super(MultiTaskResNet, self).__init__()
        # Use a pre-trained ResNet model
        self.model = models.resnet50(pretrained=True)
        
        # Loop through all layers and set weight and bias to float
        for param in self.model.parameters():
            param.data = param.data.float()  # Set the parameter data type to float32
        
        # Alternatively, you can access the specific layers' weight and bias
        # Example: Set the first convolutional layer weights and bias to float
        self.model.conv1.weight.data = self.model.conv1.weight.data.float()
        if self.model.conv1.bias is not None:
            self.model.conv1.bias.data = self.model.conv1.bias.data.float()

        
        # Check the data type of the weight and bias to confirm
        print(self.model.conv1.weight.dtype)  # Should print torch.float32
        if self.model.conv1.bias is not None:
            print(self.model.conv1.bias.dtype)  # Should print torch.float32
            
        # Classify emotion
        self.model.fc = nn.Sequential(
            nn.Linear(self.model.fc.in_features, 512),
            nn.ReLU(),
            nn.Linear(512, n_emotion)  # 10 classes for emotion classification
        )
        
        # Add a breed classification head 
        self.breed_head = nn.Sequential(
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Linear(512, n_breed),  # 11 depends on number of class
            nn.Softmax(dim=1)  # Softmax to output probabilities for each class
        )

    def forward(self, x):
        x1 = x.long()
        features = self.model.conv1(x1)  # Initial layers
        features = self.model.bn1(features)
        features = self.model.relu(features)
        features = self.model.maxpool(features)

        features = self.model.layer1(features)  # Pass through ResNet layers
        features = self.model.layer2(features)
        features = self.model.layer3(features)
        features = self.model.layer4(features)


        # view is used to reshape a tensor
        # Classification output
        emotion_output = self.model.fc(features.view(features.size(0), -1))

        # Regression output
        breed_output = self.breed_head(features.view(features.size(0), -1))

        return emotion_output, breed_output



In [119]:
subset_train = combined_dataset[:20] 
subset_label = combined_label[:20]

In [120]:
combined_label

array([[10,  0],
       [10,  0],
       [10,  0],
       ...,
       [ 3, 10],
       [ 3, 10],
       [ 3, 10]])

In [121]:
# Step 3: Define the Loss Functions
emotion_loss_fn = nn.CrossEntropyLoss()  # For classification task
breed_loss_fn = nn.CrossEntropyLoss()  # For regression task

# Step 4: Define the optimizer
model = MultiTaskResNet(5,11)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Create dataset and dataloader
dataset = CustomImageDataset(input_tensor.long(), subset_label)
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Step 6: Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_emotion_loss = 0.0
    running_breed_loss = 0.0
    running_total_loss = 0.0

    for input, emotion_target, breed_target in dataloader:

        input = input.long()
        optimizer.zero_grad()  # Zero the gradients

        # Forward pass
        emotion_output, breed_output = model(input)

        # Compute the classification and regression losses
        emotion_loss = emotion_loss_fn(emotion_output, emotion_target)
        breed_loss = breed_loss_fn(breed_output, breed_target)

        # Combine the losses (can use weighted sum if needed)
        total_loss = emotion_loss + breed_loss

        # Backpropagate the loss and update weights
        total_loss.backward()
        optimizer.step()

        # Accumulate the losses for reporting
        running_emotion_loss += emotion_loss.item()
        running_breed_loss += breed_loss.item()
        running_total_loss += total_loss.item()
    # Print the average losses for this epoch
    avg_emotion_loss = running_emotion_loss/len(dataloader)
    avg_breed_loss = running_breed_loss/len(dataloader)
    avg_total_loss = running_total_loss / len(dataloader)

    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Emotion loss: {avg_emotion_loss:.4f}, "
          f"Breed Loss: {avg_breed_loss:.4f}, "
          f"Total Loss: {avg_total_loss:.4f}")

# Step 7: Save the trained model (optional)
torch.save(model.state_dict(), 'multi_task_resnet.pth')


torch.float32


RuntimeError: expected scalar type Long but found Float

In [None]:
# Convert to (20, 32, 32, 3) by repeating grayscale data along the last dimension (3 channels)
subset_rgb = np.stack([subset_train] * 3, axis=-1)  # Shape will be (20, 32, 32, 3)
# print(subset_rgb)
# Convert to (20, 3, 32, 32) for PyTorch compatibility
subset_rgb = np.transpose(subset_rgb, (0, 3, 1, 2))  # Rearranging to (batch_size, channels, height, width)
# print(subset_rgb)
# Convert to torch tensor
input_tensor = torch.tensor(subset_rgb, dtype=torch.float32)

print(input_tensor.shape)  # Output: torch.Size([20, 3, 32, 32])

In [93]:
subset_train.shape

(20, 32, 32)

In [96]:

subset_rgb = np.stack([subset_train] * 3, axis=-1)  # Shape will be (20, 32, 32, 3)
# Convert to (20, 3, 32, 32) for PyTorch compatibility
subset_rgb = np.transpose(subset_rgb, (0, 3, 1, 2))  # Rearranging to (batch_size, channels, height, width)
# Convert to torch tensor
input_tensor = torch.tensor(subset_rgb, dtype=torch.int64)

In [104]:
input_tensor.long().shape

torch.Size([20, 3, 32, 32])