In [1]:
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler,Normalizer,MinMaxScaler
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
from torch.utils.data import DataLoader,Dataset
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn

In [3]:
class dataset(Dataset):
    def __init__(self,transform = None):
        self.data = make_classification(n_samples=90000,n_features=5,n_classes=3,n_clusters_per_class=1,random_state=42)
        self.features,self.classes =self.data
        self.transform = transform
        
    def __getitem__(self,index):
        sample = self.features[index].reshape(-1,1),self.classes[index]
        if self.transform:
            return self.transform(sample)
        else:
            return sample

    def __len__(self):
        return len(self.features)

In [4]:
class transform(MinMaxScaler):
    """implement a custom Normalization using Min Max Scaler """
    
    def __init__(self,ToTensor = False):
        """ToTensor >>>> Convert output samples to Tensors"""
        
        self.ToTensor = ToTensor
        super(transform,self).__init__()
        
    def __call__(self,sample):
        x,y = sample
        x = super().fit_transform(x)
        if self.ToTensor:
            x = torch.from_numpy(x)
            y = torch.tensor(y)
            y = y.type(torch.int64) #cross entropy requires target to be in int64 format (long)
        return x,y

In [16]:
scaler = transform(ToTensor=True)
sample = dataset(scaler)
sample[0:1]

(tensor([[0.0000],
         [0.2873],
         [0.2400],
         [0.1653],
         [1.0000]], dtype=torch.float64),
 tensor([2]))

In [6]:
data = dataset(transform = transform(ToTensor=True))

In [7]:
"""70% percent of our data will be used for training, 15% for testing and the last 15% for validation"""


print("len of our training data ::: ",(70/100)*len(data))
print("len of our testing data ::: ",(15/100)*len(data))
print("len of validation data ::: ",(15/100)*len(data))

len of our training data :::  62999.99999999999
len of our testing data :::  13500.0
len of validation data :::  13500.0


In [8]:
indeces = np.arange(90000)
np.random.shuffle(indeces)

In [9]:
training_dataloader = DataLoader(data,batch_size = 64,sampler = torch.utils.data.SubsetRandomSampler(indeces[0:62999]),
                                 shuffle = False)

testing_dataloader = DataLoader(data,batch_size = 64,sampler = torch.utils.data.SubsetRandomSampler(indeces[62999:76499]),
                               shuffle = False)

validation_dataloader = DataLoader(data,batch_size=64,sampler = torch.utils.data.SubsetRandomSampler(indeces[76499::]),
                                  shuffle = False)

In [10]:
x,y = next(iter(training_dataloader))
x.shape

torch.Size([64, 5, 1])

In [11]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier,self).__init__()
        self.sequential = nn.Sequential(nn.Linear(5*1,200),nn.ReLU(inplace = True),nn.Linear(200,50),nn.ReLU(inplace = True),
                     nn.Linear(50,3))
        
    def forward(self,x):
        x = x.view(-1,5*1).type(torch.float)
        x = self.sequential(x)
        return x

In [12]:
model = Classifier()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr = 0.01)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('available device :::: {}'.format(device))

available device :::: cpu


In [13]:
def training_loop(model,dataloader,optimizer,loss_fn):
    model.train()
    for batch,(x,y) in enumerate(dataloader):
        x,y = x.to(device),y.to(device)
        y_pred  = model(x)
        loss = loss_fn(y_pred,y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print("Training Completed")

def testing_and_validation_loop(model,dataloader,loss_fn,mode):
    model.eval()
    test_val_loss,accuracy = 0,0
    with torch.no_grad():
        for x,y in dataloader:
            x,y = x.to(device),y.to(device)
            y_pred = model(x)
            loss = loss_fn(y_pred,y)
            test_val_loss+=loss
            acc = (y_pred.argmax(1)==y).sum().item()
            accuracy+= acc
        accuracy/= len(dataloader.dataset)
        test_val_loss/=len(dataloader)
        print('{} Accuracy ::: {}   >>>>>   Loss  ::: {}'.format(mode,accuracy,test_val_loss))

In [14]:
num_epochs = 50
for epochs in range(num_epochs):
    if epochs%5==0:
        print(f"Model Training ...........................................{epochs}/50")
        training_loop(model,training_dataloader,optimizer,loss_fn)
        print('Model Validation..........................................')
        testing_and_validation_loop(model,validation_dataloader,loss_fn,mode = 'validation')
        print('Model Testing')
        testing_and_validation_loop(model,testing_dataloader,loss_fn,mode = 'testing')
print('\t\t\t\t\t\tEpochs Completed..............................................\n')

Model Training ...........................................0/50
Training Completed
Model Validation..........................................
validation Accuracy ::: 0.11352222222222222   >>>>>   Loss  ::: 0.5645785331726074
Model Testing
testing Accuracy ::: 0.11425555555555555   >>>>>   Loss  ::: 0.5540823936462402
Model Training ...........................................5/50
Training Completed
Model Validation..........................................
validation Accuracy ::: 0.12223333333333333   >>>>>   Loss  ::: 0.44622355699539185
Model Testing
testing Accuracy ::: 0.12268888888888889   >>>>>   Loss  ::: 0.4358471930027008
Model Training ...........................................10/50
Training Completed
Model Validation..........................................
validation Accuracy ::: 0.12656666666666666   >>>>>   Loss  ::: 0.38925808668136597
Model Testing
testing Accuracy ::: 0.12675555555555557   >>>>>   Loss  ::: 0.3804127871990204
Model Training ............................

In [2]:
1+2

3