In [1]:
import numpy as np
import pandas as pd
# Importing all ML dependencies
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder

#import cv2
import torch
import torch.nn as nn 
import torch.nn.functional as F
import torchvision

# Plots
import matplotlib.pyplot as plt

# Utils
from tqdm import tqdm, trange
import os

ModuleNotFoundError: No module named 'torch'

## Creating the DataSet class

Importing DataSet and DataLoader from torch

In [5]:
from torch.utils.data import Dataset, DataLoader

ModuleNotFoundError: No module named 'torch'

In [None]:
class DataSet(Dataset):
    def __init__(self):
        numbers = np.arange(10000)
        self.data = numbers
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index]

In [None]:
dataset = DataSet()
print(len(dataset)) #The lenght of the data set
print(dataset.data) #The data set

### Training 

Importing the FashionMNIST dataset from torchvision datasets

In [None]:
from torchvision.datasets import FashionMNIST

Creating the train and test data for our model

In [None]:
train_data = FashionMNIST(root = os.path.join('..', 'data', 'tmp', 'FashionMNIST'), download=True, train=True)
test_data = FashionMNIST(root = os.path.join('..', 'data', 'tmp', 'FashionMNIST'), download=True, train=False)

Changhing the data type from a 256 int to 0-1 floating point, for a more optimal execution time.

In [None]:
import pandas as pd

X_train, y_train = train_data.data, train_data.targets
X_test, y_test = test_data.data, test_data.targets
X_train = X_train / 255
X_test = X_test / 255


Reshaping the tensor 

In [None]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]**2)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1]**2)
#De preferat sa explic cum a facut reshape



Displaying the tensors final shape

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
X = np.concatenate([X_train, X_test])
y = np.concatenate([y_train, y_test])
X.shape, y.shape

In [None]:
def display_ex_img():
    G = 16
    X_sel = X[:G * G]
    plt.figure(figsize = (10, 10))
    return plt.imshow(np.concatenate(X_sel.reshape(G, 28 * G, 28), axis = 1))
display_ex_img()

In [None]:
class DigitsDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype = torch.float32)
        self.y = torch.tensor(y, dtype = torch.long)
        self.num_features = X.shape[1]
        self.num_classes = len(np.unique(y))
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        return (self.X[index], self.y[index])

In [None]:
dataset = DigitsDataset(X, y)
dataloader = DataLoader(dataset, batch_size = 64, shuffle = True)

In [None]:
from torch.utils.data import random_split

train_len = int(.8 * len(dataset))
test_len = len(dataset) - train_len

train_dataset, test_dataset = random_split(dataset, [train_len, test_len])

In [None]:
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Model, self).__init__()
        # Hidden layers
        self.fc = nn.Linear(input_dim, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, output_dim)
        
    def forward(self, x):
        x = self.fc(x)
        x = F.leaky_relu(x, negative_slope=0.2)
        x = self.fc2(x)
        x = F.leaky_relu(x, negative_slope=0.2)
        x = self.fc3(x)
        return F.softmax(x, dim = 1)

In [None]:
input_shape = dataset.num_features
output_shape = dataset.num_classes
model = Model(input_shape, output_shape)

In [None]:
for p in model.parameters():
    print(p.shape)

In [None]:
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.Adadelta(model.parameters(), learning_rate, rho=0.3, eps=1e-03, weight_decay=0)

loss_function = nn.CrossEntropyLoss()

In [None]:
batch_size = 64
trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(train_dataset, batch_size=len(test_dataset))

In [None]:
epochs = 100
accs = []
losses = []
model.train() 
for epoch in (t:= trange(epochs)):
    
    loss_epoch = 0.
    acc = 0.
    num_batches = 0
    for X_batch, y_batch in trainloader:
        num_batches +=1
        y_pred = model(X_batch)
        
        loss = loss_function(y_pred, y_batch)    
        loss_epoch += loss.item()
        
        acc += accuracy_score(torch.argmax(y_pred, axis = 1), y_batch)
        
        optimizer.zero_grad() 
        loss.backward()
        optimizer.step()
    
    acc /= num_batches
    loss_epoch /= num_batches
    losses.append(loss_epoch)
    accs.append(acc)
    t.set_description(f"Loss: {round(loss_epoch, 2), round(acc, 2)}")

In [None]:
fig, axs = plt.subplots(1, 2, figsize = (20, 5))

axs[0].plot(losses)
axs[1].plot(accs)

In [None]:
model.eval()
X_test, y_test = test_dataset.dataset.X, test_dataset.dataset.y
y_pred= model(X_test)

print(accuracy_score(torch.argmax(y_pred, axis = 1), y_test))