In [2]:
import torch
import torchvision
import random
import os

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as T

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

PATH = './emnist.pth'

## Download/load data

In [3]:
transform = T.Compose(
    [
        T.ToTensor(),
        T.Normalize((0.5), (0.5))
    ]
)

data_set = torchvision.datasets.EMNIST(
    root='./data',
    split="byclass",
    train=True,
    download=True, 
    transform=transform
)

In [4]:
import string
# removing upper case letters
uppers = np.arange(10,36)

pics = np.array(data_set.data)
targets = np.array(data_set.targets)

targets_indx = np.arange(len(targets))

#stores indexes of corresponding pics of classes
new_targets= []

for i in range(len(targets)):

    if targets[i] not in uppers:
        new_targets.append(targets_indx[i])

In [5]:
data_pics = pics[new_targets]
data_target=targets[new_targets]

In [146]:
# classes uneven
# finding class with least amount and ussing that number for all classes
frequency_dict = {item: np.count_nonzero(data_target == item) for item in np.unique(data_target)}

min_key = min(frequency_dict, key=frequency_dict.get)
min_value = frequency_dict[min_key]

print("Minimum key:", min_key)
print("Minimum amount:", min_value)

class_dict = {key: value for key, value in data_set.class_to_idx.items() if key not in list(string.ascii_uppercase)}

value = {i for i in class_dict if class_dict[i]==min_key}
print("min value key:",value)


Minimum key: 45
Minimum amount: 1896
min value key: {'j'}


In [7]:
# get even amount of pitures for every class
n = min_value
u = pd.unique(data_target)

class_indexs= {x: [] for x in u}

for class_num in u:
    for j in range(len(data_target)):
        if data_target[j] == class_num and len(class_indexs[class_num]) <n:
            class_indexs[class_num].append(j)
        

In [8]:
even_pics= []
even_targets = []

for key in class_indexs.keys():

    for j in range(n):
        even_pics.append( data_pics[class_indexs[key][j]] )
        even_targets.append( key )

In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(np.array(even_pics), np.array(even_targets),
                                                     test_size=0.2, random_state=1)

In [None]:
n= np.random.random_integers(0,len(y_train))
plt.imshow( np.transpose(even_pics[n]) ,cmap='gray')
plt.show()
print(even_targets[n])

for i in data_set.class_to_idx.keys():
    if data_set.class_to_idx[i]== even_targets[n]:
        print(i)

In [161]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x_data = x_data
        self.y_data = y_data
        
    def __len__(self):
        return len(self.x_data)
    
    def class_idx(self,Dataset):
        class_dict = {key: value for key, value in Dataset.class_to_idx.items() 
                      if key not in list(string.ascii_uppercase)}
        return class_dict

    def __getitem__(self, idx):
        x = self.x_data[idx]
        y = self.y_data[idx]
        return  torch.tensor(x, dtype=torch.float) , torch.tensor(y, dtype=torch.float)

train_dataset = CustomDataset( y_train, X_train )
test_dataset = CustomDataset( y_test, X_test )



In [163]:
train_dataset.class_idx

<bound method CustomDataset.class_idx of <__main__.CustomDataset object at 0x15b825990>>

In [None]:
batch_size = 5
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False
)

classes = train_loader.dataset.class_idx
number_of_classes = len(classes.keys())

def get_class_name(class_index):
    for name, index in classes.items():
        if index == class_index:
            return name