In [19]:
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

import ast
import re

import json
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

<matplotlib.pyplot._IonContext at 0x2121fc0e460>

In [20]:
mbra_df = pd.read_csv('mbra_dataset.csv')

n = 3833
img_name = mbra_df.iloc[n, 0]
class_name = mbra_df.loc[n, 'class']
common_name = mbra_df.loc[n, 'common_name']

print('Image name: {}'.format(img_name))
print(class_name, common_name)

# Hyper parameters
num_epochs = 8
num_classes = 4
batch_size = 256
learning_rate = 0.002

root_path = 'C:/Users/KurtJi/OneDrive - University of Illinois - Urbana/Desktop/Personal Projects/data/inaturalist/'

Image name: train_mini/02989_Animalia_Chordata_Amphibia_Anura_Hylidae_Hyla_gratiosa/c07e2e0a-9eb3-4af5-b4dd-783493d7182e.jpg
Amphibia Barking Tree Frog


In [21]:

### **Dataset Class**
class MbraDataset(Dataset):
    """Dataset class for the MBRA dataset."""

    def __init__(self, df, root_dir, transform=None):
        """
        Args:
            df (DataFrame): DataFrame with image_path and class_id.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.df = df
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.df.iloc[idx, 0])
        image = io.imread(img_name)
        class_id = int(self.df.iloc[idx, -1])  # selecting the last column as class_id
        sample = {'image': image, 'label': class_id}

        if self.transform:
            sample = self.transform(sample)

        return sample

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_path = os.path.join(self.root_dir, self.df.iloc[idx, 0])
        image = io.imread(img_path) # Load the image using skimage.io
        label = self.df.iloc[idx, 1]

        sample = {'image': image, 'label': label}

        if self.transform:
            sample = self.transform(sample)

        return sample

# Convert class names to unique integer identifiers
label_encoder = LabelEncoder()
mbra_df['class_id'] = label_encoder.fit_transform(mbra_df['class'])

# Check the transformation
mbra_df[['class', 'class_id']].head()

# Create a new DataFrame with only necessary columns
mbra_df_class = mbra_df[['image_path', 'class_id']].copy()

# Test the dataset class
mbra_dataset = MbraDataset(df=mbra_df_class, root_dir=root_path)

# Get the first sample to test the dataset class
first_sample = mbra_dataset[0] if len(mbra_dataset) > 0 else None
first_sample

{'image': array([[[247, 200, 156],
         [144, 100,  55],
         [ 99,  54,  15],
         ...,
         [ 44,  18,   5],
         [ 76,  50,  37],
         [ 86,  60,  47]],
 
        [[251, 208, 166],
         [202, 160, 112],
         [163, 119,  70],
         ...,
         [ 66,  47,  30],
         [ 84,  65,  48],
         [ 78,  59,  42]],
 
        [[240, 201, 168],
         [175, 137,  90],
         [221, 179, 119],
         ...,
         [ 83,  70,  53],
         [ 66,  53,  36],
         [ 49,  36,  19]],
 
        ...,
 
        [[255, 239, 213],
         [149, 121,  97],
         [ 51,  25,   2],
         ...,
         [255, 244, 251],
         [185, 156, 150],
         [122,  91,  73]],
 
        [[208, 179, 163],
         [ 86,  59,  42],
         [ 52,  27,   7],
         ...,
         [174, 158, 169],
         [255, 247, 249],
         [171, 152, 146]],
 
        [[159, 130, 114],
         [ 77,  50,  33],
         [ 51,  26,   6],
         ...,
         [ 75,  70,

In [22]:
device = torch.cuda.current_device()
torch.cuda.get_device_name(device)
### **Model Structure**
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader, Dataset
# Creating a CNN class
# image size is (3,256, 256)
class SimpleCNN(nn.Module):
	#  Determine what layers and their order in CNN object 
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
        self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
        self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        
        self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
        self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        
        self.fc1 = nn.Linear(1600, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)
    
    # Progresses data across layers    
    def forward(self, x):
        out = self.conv_layer1(x)
        out = self.conv_layer2(out)
        out = self.max_pool1(out)
        
        out = self.conv_layer3(out)
        out = self.conv_layer4(out)
        out = self.max_pool2(out)
                
        out = out.reshape(out.size(0), -1)
        
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        return out
model = SimpleCNN(num_classes).to(device)






### **Add Transformer**
from sklearn import preprocessing
import torch
class Rescale(object):
    """Rescale the image in a sample to a given size.

    Args:
        output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        h, w = image.shape[:2]
        # if isinstance(self.output_size, int):
        #     if h > w:
        #         new_h, new_w = self.output_size * h / w, self.output_size
        #     else:
        #         new_h, new_w = self.output_size, self.output_size * w / h
        # else:
        new_h, new_w = self.output_size, self.output_size

        new_h, new_w = int(new_h), int(new_w)

        img = transform.resize(image, (new_h, new_w))

        # h and w are swapped for label because for images,
        # x and y axes are axis 1 and 0 respectively
        # label = label * [new_w / w, new_h / h]

        return {'image': img, 'label': label}


class RandomCrop(object):
    """Crop randomly the image in a sample.

    Args:
        output_size (tuple or int): Desired output size. If int, square crop
            is made.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
            assert len(output_size) == 2
            self.output_size = output_size

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        h, w = image.shape[:2]
        new_h, new_w = self.output_size

        top = np.random.randint(0, h - new_h + 1)
        left = np.random.randint(0, w - new_w + 1)

        image = image[top: top + new_h,
                      left: left + new_w]

        # label = label - [left, top]

        return {'image': image, 'label': label}


class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C x H x W
        image = image.transpose((2, 0, 1))
        return {'image': torch.from_numpy(image).float(),  # convert the image to a float Tensor
                'label': label}  # keep the label as an integer
    
mbra_data_class = MbraDataset(df = mbra_df_class,
                                           root_dir=root_path,
                                           transform=transforms.Compose([
                                               Rescale(256),
                                               ToTensor()
                                           ]))


train_size = int(0.8 * len(mbra_data_class))
test_size = len(mbra_data_class) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(mbra_data_class, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=256,
                        shuffle=True, num_workers=0)

test_loader = DataLoader(test_dataset, batch_size=256//2,
                        shuffle=False, num_workers=0)

len(train_loader)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adamax(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
print(total_step)
for epoch in range(num_epochs):
    for batch in train_loader:  # replace 'data_loader' with your actual DataLoader
        images = batch['image']
        labels = batch['label']
        print(type(images), type(labels))
        images = images.to(device)
        print(images)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

347
<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([[[[0.0566, 0.0640, 0.0664,  ..., 0.3324, 0.2555, 0.2578],
          [0.0501, 0.0623, 0.0643,  ..., 0.2690, 0.2389, 0.2264],
          [0.0520, 0.0629, 0.0613,  ..., 0.2794, 0.2524, 0.2775],
          ...,
          [0.1254, 0.1253, 0.1256,  ..., 0.2586, 0.2208, 0.1852],
          [0.1206, 0.1194, 0.1244,  ..., 0.2410, 0.3343, 0.3436],
          [0.1214, 0.1175, 0.1180,  ..., 0.2064, 0.2569, 0.3446]],

         [[0.0949, 0.0993, 0.1017,  ..., 0.3856, 0.2989, 0.2923],
          [0.0893, 0.0976, 0.0995,  ..., 0.3213, 0.2872, 0.2692],
          [0.0912, 0.0966, 0.0929,  ..., 0.3347, 0.3019, 0.3246],
          ...,
          [0.1332, 0.1332, 0.1334,  ..., 0.2541, 0.2160, 0.1799],
          [0.1284, 0.1273, 0.1322,  ..., 0.2275, 0.3153, 0.3177],
          [0.1293, 0.1254, 0.1259,  ..., 0.1976, 0.2382, 0.3206]],

         [[0.1302, 0.1346, 0.1370,  ..., 0.2994, 0.2333, 0.2385],
          [0.1263, 0.1358, 0.1351,  ..., 0.2446, 0.2217, 0

RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x238144 and 1600x128)