### Importing necessary libraries

In [49]:
import math
import os

import matplotlib
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as transforms
from torchvision.io import read_image
from torch.utils.data import Dataset

### Creating a GPU device if available

In [None]:
print(torch.cuda.is_available())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Creating the dataset and loading in the monkeys, defining transforms
- Composition of transforms (to tensor, grayscale)

In [59]:
composed = transforms.Compose([transforms.Grayscale(), transforms.ConvertImageDtype(torch.float32)])

class MonkeyDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform):
        self.annotations_file = annotations_file
        self.img_dir = img_dir
        self.img_labels = pd.read_csv(annotations_file)
        self.transform = transform
        self.number_samples = sum(self.img_labels.iloc[:, 3])  # 4th column, train images

        # Adding all monkeys to a list
        self.monkeys = []
        for dirname, _, filenames in os.walk(self.img_dir):
            for filename in filenames:
                self.monkeys.append((
                    os.path.join(dirname, filename),
                    filename
                ))

    def __getitem__(self, index):
        monkey_path, monkey_filename = self.monkeys[index]

        # Read monkey image from directory name
        monkey = read_image(monkey_path)

        # Get monkey label from filename
        label = monkey_filename[:2]  # Works since n0 to n9 all 2 characters
        
        # Apply the transforms:
        if self.transform:
            tmonkey = self.transform(monkey)
        
        return tmonkey, label

    def __len__(self):
        # This will allow len()
        return self.number_samples


'''
for dirname, _, filenames in os.walk('./kaggle/input'):
'''
# train_data = MonkeyDataset()
test_data = MonkeyDataset(
    annotations_file = "./kaggle/input/10-monkey-species/monkey_labels.txt",
    img_dir = "./kaggle/input/10-monkey-species/training/training",
    transform = composed
)
print(test_data[69])

(tensor([[[0.0588, 0.0588, 0.0549,  ..., 0.4314, 0.4235, 0.4157],
         [0.0588, 0.0549, 0.0549,  ..., 0.4314, 0.4235, 0.4196],
         [0.0549, 0.0549, 0.0510,  ..., 0.3882, 0.3804, 0.3804],
         ...,
         [0.2431, 0.2353, 0.2314,  ..., 0.7529, 0.7490, 0.7490],
         [0.2118, 0.2118, 0.2078,  ..., 0.7451, 0.7451, 0.7451],
         [0.1961, 0.2039, 0.2039,  ..., 0.7451, 0.7412, 0.7412]]]), 'n0')


In [47]:
e = pd.read_csv("./kaggle/input/10-monkey-species/monkey_labels.txt")
e.iloc[:,3]

0    131
1    139
2    137
3    152
4    131
5    141
6    132
7    142
8    133
9    132
Name:  Train Images , dtype: int64