In [37]:
import torch.nn.functional as F
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
def norm_np_img(data):
    return data.astype(float) / 255.

In [3]:
def load_data_from_csv(file_name, y_column, batch_size=32, valid_size=0.25):
    import pandas as pd
    
    df = pd.read_csv(file_name).astype(float)
    df = df.sample(frac=1)  # shuffle dataframe
    train_size, valid_size = int(df.shape[0]*(1-valid_size)), int(df.shape[0]*(valid_size))
    print(f"train size = {train_size}, validation size = {valid_size}")
    
    y_train_np_data, y_valid_np_data = df[y_column].head(train_size).values, df[y_column].tail(valid_size).values
    x_train_np_data, x_valid_np_data = (df.drop(columns=[y_column]).head(train_size).values, 
                                        df.drop(columns=[y_column]).tail(valid_size).values)
    
    x_train_np_data, x_valid_np_data = norm_np_img(x_train_np_data), norm_np_img(x_valid_np_data)
    print(f"X train shape = {x_train_np_data.shape}, Y train shape = {y_train_np_data.shape}")
    
    y_train_tensor, y_valid_tensor = torch.from_numpy(y_train_np_data), torch.from_numpy(y_valid_np_data)
    x_train_tensor, x_valid_tensor = torch.from_numpy(x_train_np_data), torch.from_numpy(x_valid_np_data)
    
    tensor_ds_train = torch.utils.data.TensorDataset(x_train_tensor, y_train_tensor)  # create TRAIN tensor dataset
    tensor_ds_valid = torch.utils.data.TensorDataset(x_valid_tensor, y_valid_tensor)  # create VALID tensor dataset
    
    tensor_dl_train = torch.utils.data.DataLoader(tensor_ds_train, batch_size=batch_size, shuffle=True)
    tensor_dl_valid = torch.utils.data.DataLoader(tensor_ds_valid, batch_size=batch_size*2)
    
    return (tensor_dl_train, tensor_dl_valid)

In [71]:
class CudaLoader:
    # Iterable obj
    
    def __init__(self, DataLoader):
        self.dl = DataLoader
        self.dev = torch.device(0) if torch.cuda.is_available() else torch.device('cpu')
        print('Device is', self.dev)
    
    def __len__(self):
        return len(self.dl)
    
    def __iter__(self):
        iter_dl = iter(self.dl)
        for xb, yb in iter_dl:
            yield self.preprocess(xb, yb)
    
    def preprocess(self, xb, yb):
        return xb.view(-1, 1, 28, 28).to(self.dev), yb.view(-1, 1).to(self.dev)

In [8]:
train_dl, valid_dl = load_data_from_csv('mnist_dataset\\train.csv', y_column='label', batch_size=64)

train size = 31500, validation size = 10500
X train shape = (31500, 784), Y train shape = (31500,)


In [72]:
train_cdl, valid_cdl = CudaLoader(train_dl), CudaLoader(valid_dl)

Device is cuda:0
Device is cuda:0


In [56]:
class CNN_Net(torch.nn.Module):
    
    def __init__(self, output_size):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        
        self.fc1 = nn.Linear(18432, 32)  # bad value - 18432
        self.out = nn.Linear(32, output_size)
    
    def forward(self, x):
        c1 = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        c2 = F.max_pool2d(F.relu(self.conv2(c1)), (2, 2))
        c3 = F.max_pool2d(F.relu(self.conv3(c2)), (2, 2))
        
        c3 = c3.view(-1, 18432)
        f1 = F.relu(self.fc1(c3))
        out = self.out(f1)
        return out

In [78]:
model = CNN_Net(output_size=1).double().cuda()
optim = torch.optim.Adam(model.parameters(), lr=2e-3)

In [67]:
loss_func = F.cross_entropy

In [69]:
def train(model, epochs, optimizer, loss_func, train_loader, valid_loader=None):
    for i in range(epochs):
        for xb, yb in train_loader:
            optimizer.zero_grad()
            
            preds = model(xb)
            loss = loss_func(preds, yb)
            
            loss.backward()
            optimizer.step()
            print(loss)

In [74]:
train(model, epochs=1, optimizer=optim, loss_func=loss_func, train_loader=train_cdl)

ValueError: Expected input batch_size (1) to match target batch_size (64).

In [80]:
for xb, yb in train_cdl:
    print(yb.shape)
    break

torch.Size([64, 1])
