<a href="https://colab.research.google.com/github/pushyag1/DeepLearningClass/blob/master/DATA690_Fall_2020_HW_2_PushyamiReddyGinnavaram.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Question 1:
The input range of data can have a large impact on a neural network. This applies to inputs _and_ outputs, like for regression problems. Try applying  Scikit-learn's `StandardScaler` to the targets $\boldsymbol{y}$ of the toy regression problem at the start of this chapter, and train a new neural network on it. Does changing the scale of the outputs help or hurt the model's predictions?

In [None]:
from tqdm.autonotebook import tqdm
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import time

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import * 

from google.colab import drive
drive.mount('/content/gdrive/')

import sys
sys.path.append('/content/gdrive/My Drive/MPDL Fall 2020/')

from mpdl import *

In [None]:
from sklearn.preprocessing import StandardScaler

#importing StandardScaler

In [None]:
def train_simple_network(model, loss_func, training_loader, epochs=20, device="cpu"):
    #Yellow step is done here. We create the optimizer and move the model to the compute device
    #SGD is Stochastic Gradient Decent over the parameters $\Theta$
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

    #Place the model on the correct compute resource (CPU or GPU)
    model.to(device)
    #The next two for loops handle the Red steps, iterating through all the data (batches) multiple times (epochs)
    for epoch in tqdm(range(epochs), desc="Epoch"):
    
        model = model.train()#Put our model in training mode
        running_loss = 0.0

        for inputs, labels in tqdm(training_loader, desc="Batch", leave=False):
            #Move the batch of data to the device we are using. this is the last red step
            inputs = moveTo(inputs, device)
            labels = moveTo(labels, device)

            #First a yellow step, prepare the optimizer. Most PyTorch code will do this first to make sure everything is in a clean and ready state. 

            #PyTorch stores gradients in a mutable data structure. So we need to set it to a clean state before we use it. 
            #Otherwise, it will have old information from a previous iteration
            optimizer.zero_grad()

            #The next two lines of code perform the two blue steps
            y_hat = model(inputs) #this just computed $f_\theta(\boldsymbol{x_i})$

            # Compute loss.
            loss = loss_func(y_hat, labels)

            #Now the remaining two yellow steps, compute the gradient and ".step()" the optimizer!
            loss.backward()# $\nabla_\Theta$ just got computed by this one call!

            #Now we just need to update all the parameters! 
            optimizer.step()# $\Theta_{k+1} = \Theta_k − \eta \cdot \nabla_\Theta \ell(\hat{y}, y)$

            #Now we are just grabbing some information we would like to have
            running_loss += loss.item()

In [None]:
scalar = StandardScaler()

#initializing StandardScaler

In [None]:
#Create a 1-dimensional input
X = np.linspace(0, 20, num=200)
#create an output
y = X + np.sin(X)*2 + np.random.normal(size=X.shape)
sns.scatterplot(x=X, y=y)

In [None]:
y1=y.reshape(-1,1)

#reshaping the target variables y 

In [None]:
yscalar = scalar.fit_transform(y1)

#scaling the variables and then fit the values

In [None]:
class Simple1DRegressionDataset(Dataset):
        
    def __init__(self, X, yscalar):
        super(Simple1DRegressionDataset, self).__init__()
        self.X = X.reshape(-1,1)
        self.yscalar = yscalar.reshape(-1,1)
        
    
    def __getitem__(self, index):
        return torch.tensor(self.X[index,:], dtype=torch.float32), torch.tensor(self.yscalar[index], dtype=torch.float32)

    def __len__(self):
        return self.X.shape[0]
    
training_loader = DataLoader(Simple1DRegressionDataset(X, yscalar), shuffle=True)

In [None]:
in_features = 1
out_features = 1
model = nn.Linear(in_features, out_features)
loss_func = nn.MSELoss()

device = torch.device("cuda")

In [None]:
train_simple_network(model, loss_func, training_loader, device=device)

In [None]:
with torch.no_grad():
    Y_pred = model(torch.tensor(X.reshape(-1,1), device=device, dtype=torch.float32)).cpu().numpy()

In [None]:
yscalar_reshape = yscalar.reshape(-1,)

In [None]:
sns.scatterplot(x=X, y=yscalar_reshape, color='blue', label='Data') #The data
sns.lineplot(x=X, y=Y_pred.ravel(), color='red', label='Linear Model') #What our model learned

In [None]:

model = nn.Sequential(
    nn.Linear(1,  30),#hidden layer
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),#activation
    nn.Linear(30,  30),
    nn.Tanh(),#activation
    nn.Linear(30,  30),
    nn.Tanh(),#activation
    nn.Linear(30,  30),
    nn.Tanh(),#activation
    nn.Linear(30, 1),#output layer
)


In [None]:
loss_func = nn.MSELoss()

device = torch.device("cuda")
train_simple_network(model, loss_func, training_loader, device=device)

#training the model with multiple hidden layers and activation layers


In [None]:
with torch.no_grad():
    Y_pred = model(torch.tensor(X.reshape(-1,1), device=device, dtype=torch.float32)).cpu().numpy()

In [None]:
sns.scatterplot(x=X, y=yscalar_reshape, color='blue', label='Data') #The data
sns.lineplot(x=X, y=Y_pred.ravel(), color='red', label='Linear Model') #What our model learned

#### Does changing the scale of the outputs help or hurt the model's predictions?

Yes, changing the scale of the outputs hurt the model's predictions, this makes the model and the training process unstable because the target variable i.e., y with a collection of values , can give us error in gradient value with the effective change in the weight values . But by adding hidden layers and activation functions it is giving a little non linearity but not that well trained.

# Question 2:
 The AUC metric does not follow the standard pattern in scikit-learn, as it requires `y_pred` to be a vector of shape $(N)$ instead of a matrix of shape $(N, 2)$. Write a wrapper function for AUC that will make it compatible with our `train_simple_network` function.

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
#from sklearn.metrics import roc_auc_score
from sklearn import metrics

#importing required sklearn metrics

In [None]:
from sklearn.datasets import make_moons

#importing make moons dataset

In [None]:
def train_simple_network(model, loss_func, train_loader, val_loader=None, score_funcs=None, 
                         epochs=50, device="cpu", checkpoint_file=None):
    to_track = ["epoch", "total time", "train loss"]
    if val_loader is not None:
        to_track.append("val loss")
    for eval_score in score_funcs:
        to_track.append("train " + eval_score )
        if val_loader is not None:
            to_track.append("val " + eval_score )
        
    total_train_time = 0 #How long have we spent in the training loop? 
    results = {}
    #Initialize every item with an empty list
    for item in to_track:
        results[item] = []
        
    #SGD is Stochastic Gradient Decent.
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
    #Place the model on the correct compute resource (CPU or GPU)
    model.to(device)
    for epoch in tqdm(range(epochs), desc="Epoch"):
        model = model.train()#Put our model in training mode
        
        total_train_time += run_epoch(model, optimizer, train_loader, loss_func, device, results, score_funcs, prefix="train", desc="Training")

        results["total time"].append( total_train_time )
        results["epoch"].append( epoch )
        
        if val_loader is not None:
            model = model.eval()
            with torch.no_grad():
                run_epoch(model, optimizer, val_loader, loss_func, device, results, score_funcs, prefix="val", desc="Testing")
                    
    if checkpoint_file is not None:
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'results' : results
            }, checkpoint_file)

    return pd.DataFrame.from_dict(results)

In [None]:

def AUC_function(x, y):
  fpr, tpr, thresholds = metrics.roc_curve(x, y, pos_label=1)
  return metrics.auc(fpr, tpr)

#defining a function to calculate the AUC(Area Under the ROC Curve), AUC requires two values to be passed in the function because we need to return the values of tpr and fpr , so let us now initialize with x and y
#fpr(false positive rate) and tpr(true positive rate)

#reference: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.auc.html

In [None]:
def run_epoch(model, optimizer, data_loader, loss_func, device, results, score_funcs, prefix="", desc=None):
    running_loss = []
    y_true = []
    y_pred = []
    start = time.time()
    for inputs, labels in tqdm(data_loader, desc=desc, leave=False):
        #Move the batch to the device we are using. 
        inputs = moveTo(inputs, device)
        labels = moveTo(labels, device)

        y_hat = model(inputs) #this just computed f_Θ(x(i))
        # Compute loss.
        loss = loss_func(y_hat, labels)

        if model.training:
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        #Now we are just grabbing some information we would like to have
        running_loss.append(loss.item())

        if len(score_funcs) > 0 and isinstance(labels, torch.Tensor):
            #moving labels & predictions back to CPU for computing / storing predictions
            labels = labels.detach().cpu().numpy()
            y_hat = y_hat.detach().cpu().numpy()
            #add to predictions so far
            y_true.extend(labels.tolist())
            y_pred.extend(y_hat.tolist())
    #end training epoch
    end = time.time()
    
    y_pred = np.asarray(y_pred)
    if len(y_pred.shape) == 2 and y_pred.shape[1] > 1: #We have a classification problem, convert to labels
        y_pred = np.argmax(y_pred, axis=1)
    
    
    #metrics.auc(fpr, tpr)
    #Else, we assume we are working on a regression problem
    #print(roc_auc_score(y_true,y_pred)) # printing for reference
    results[prefix + " loss"].append( np.mean(running_loss) )
    for name, score_func in score_funcs.items(): #for the score functions 
      if name == "AUC":     #checking if the name is "AUC"
        AUC_function(y_true,y_pred)  #passing y_true and y_pred to the AUC_function defined above, which returns the metric values
        results[prefix + " " + name].append(AUC_function(y_true,y_pred))  #appending the values
      else:
        try:
          #fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred) 
          results[prefix + " " + name].append( score_func(y_true, y_pred) )   #to display the remaining scores i.e., Accuracy and F1 scores
        except:
          results[prefix + " " + name].append(float("NaN"))
    return end-start #time spent on epoch



In [None]:
loss_func = nn.CrossEntropyLoss()
#train_simple_network(model, loss_func, training_loader, epochs=50)

In [None]:
X_train, y_train = make_moons(n_samples=8000, noise=0.4)
X_test, y_test = make_moons(n_samples=200, noise=0.4)
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))
training_loader = DataLoader(train_dataset, shuffle=True)
testing_loader = DataLoader(test_dataset)

In [None]:
model = nn.Sequential(
    nn.Linear(2,  30),
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),
    nn.Linear(30, 2),
)


In [None]:
results_pd = train_simple_network(model, loss_func, training_loader, epochs=10, 
                                  val_loader=testing_loader, 
                                  checkpoint_file='model.pt', 
                                  score_funcs={'Acc':accuracy_score,'F1': f1_score,'AUC': AUC_function}) 

#score_funcs for accuracy, f1 and AUC 

In [None]:

results_pd

# Question 3: 
Write a new function `resume_simple_network`, which loads a `checkpoint_file` from disk, restores both the `optimizer` and `model` state, and continues training to a specified total number of epochs. So if the model was saved after 20 epochs, and you specify 30 epochs, it should only perform 10 more epochs of training.

doing Resume_simple_network for the above (question 2) executed model. In that the epochs were 10, here we are passing 20 epochs and showing the remaining.

In [None]:
def resume_simple_network(epoch, checkpointfile, device='cpu'):
  optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
  checkpoint= torch.load(checkpointfile, map_location=device)
  model.load_state_dict(checkpoint['model_state_dict'])
  optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  last_epoch=(checkpoint['epoch'])
  print("Remaining Epochs are",epoch-last_epoch-1)
  if(epoch-last_epoch-1<=0):
    return
  train_simple_network(model,loss_func,train_loader=training_loader, epochs=epoch-last_epoch-1, val_loader = testing_loader,checkpoint_file='new_model_file.pt', 
                                  score_funcs={'Acc':accuracy_score,'F1': f1_score})

resume_simple_network(20,'model.pt')

#the resume_simple_network, all the parameters are to be given, and each parameter which is being passed is defined. 
#the new_model_file.pt stores the file stores the new exection.
#the remaining epochs are printed.

In [None]:
#reference: https://pytorch.org/tutorials/beginner/saving_loading_models.html
 #           https://towardsdatascience.com/checkpointing-deep-learning-models-in-keras-a652570b8de6

# Question 4
The "deep" part of deep learning refers to the number of layers in a neural network. Try adding more layers (up to 20) to the models we used for the `make_moons` classification problem. How do more layers impact the performance?

In [None]:
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=200, noise=0.05)
sns.scatterplot(x=X[:,0], y=X[:,1], hue=y, style=y)

In [None]:
classification_dataset = torch.utils.data.TensorDataset(torch.tensor(X, dtype=torch.float32),
                                                        torch.tensor(y, dtype=torch.long))
training_loader = DataLoader(classification_dataset)

In [None]:
loss_func = nn.CrossEntropyLoss()

In [None]:
def visualize2DSoftmax(X, y, model, title=None):
    x_min = np.min(X[:,0])-0.5
    x_max = np.max(X[:,0])+0.5
    y_min = np.min(X[:,1])-0.5
    y_max = np.max(X[:,1])+0.5
    xv, yv = np.meshgrid(np.linspace(x_min, x_max, num=20), np.linspace(y_min, y_max, num=20), indexing='ij')
    xy_v = np.hstack((xv.reshape(-1,1), yv.reshape(-1,1)))
    with torch.no_grad():
        logits = model(torch.tensor(xy_v, dtype=torch.float32))
        y_hat = F.softmax(logits, dim=1).numpy()

    cs = plt.contourf(xv, yv, y_hat[:,0].reshape(20,20), levels=np.linspace(0,1,num=20), cmap=plt.cm.RdYlBu)
    sns.scatterplot(x=X[:,0], y=X[:,1], hue=y, style=y, ax=cs.ax)
    if title is not None:
        cs.ax.set_title(title)

visualize2DSoftmax(X, y, model)

In [None]:
#only one layer
model = nn.Sequential(
    nn.Linear(2,  30),
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),
    nn.Linear(30, 2),
)

In [None]:
results_1=train_simple_network(model, loss_func, training_loader, val_loader=None, score_funcs={'Acc':accuracy_score,'F1': f1_score}, 
                         epochs=500, device="cpu", checkpoint_file=None)

In [None]:
def visualize2DSoftmax(X, y, model, title=None):
    x_min = np.min(X[:,0])-0.5
    x_max = np.max(X[:,0])+0.5
    y_min = np.min(X[:,1])-0.5
    y_max = np.max(X[:,1])+0.5
    xv, yv = np.meshgrid(np.linspace(x_min, x_max, num=20), np.linspace(y_min, y_max, num=20), indexing='ij')
    xy_v = np.hstack((xv.reshape(-1,1), yv.reshape(-1,1)))
    with torch.no_grad():
        logits = model(torch.tensor(xy_v, dtype=torch.float32))
        y_hat = F.softmax(logits, dim=1).numpy()

    cs = plt.contourf(xv, yv, y_hat[:,0].reshape(20,20), levels=np.linspace(0,1,num=20), cmap=plt.cm.RdYlBu)
    sns.scatterplot(x=X[:,0], y=X[:,1], hue=y, style=y, ax=cs.ax)
    if title is not None:
        cs.ax.set_title(title)

visualize2DSoftmax(X, y, model)

In [None]:
sns.lineplot(x="epoch", y='train Acc', data=results_1, label='Train')
#sns.lineplot(x='epoch', y='val Acc', data=results, label='Validation')

In [None]:
sns.lineplot(x='total time', y='train F1', data=results_1, label='Train')

In [None]:
sns.lineplot(x='total time', y='train loss', data=results_1, label='Train')

In [None]:
#same neurons and adding more hidden layers

model = nn.Sequential(
    nn.Linear(2,  30),
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),
    nn.Linear(30,  30),
    nn.Tanh(),
    nn.Linear(30, 2),
)

In [None]:
results=train_simple_network(model, loss_func, training_loader, val_loader=None, score_funcs={'Acc':accuracy_score,'F1': f1_score}, 
                         epochs=500, device="cpu", checkpoint_file=None)

In [None]:
def visualize2DSoftmax(X, y, model, title=None):
    x_min = np.min(X[:,0])-0.5
    x_max = np.max(X[:,0])+0.5
    y_min = np.min(X[:,1])-0.5
    y_max = np.max(X[:,1])+0.5
    xv, yv = np.meshgrid(np.linspace(x_min, x_max, num=20), np.linspace(y_min, y_max, num=20), indexing='ij')
    xy_v = np.hstack((xv.reshape(-1,1), yv.reshape(-1,1)))
    with torch.no_grad():
        logits = model(torch.tensor(xy_v, dtype=torch.float32))
        y_hat = F.softmax(logits, dim=1).numpy()

    cs = plt.contourf(xv, yv, y_hat[:,0].reshape(20,20), levels=np.linspace(0,1,num=20), cmap=plt.cm.RdYlBu)
    sns.scatterplot(x=X[:,0], y=X[:,1], hue=y, style=y, ax=cs.ax)
    if title is not None:
        cs.ax.set_title(title)

visualize2DSoftmax(X, y, model)

In [None]:
sns.lineplot(x="epoch", y='train Acc', data=results, label='Train')
#sns.lineplot(x='epoch', y='val Acc', data=results, label='Validation')

In [None]:
sns.lineplot(x='total time', y='train F1', data=results, label='Train')

In [None]:
sns.lineplot(x='total time', y='train loss', data=results, label='Train')

Initially without adding layers we observe in the visualization that few (blue)points are in the other region(orange) and vice versa, and few are added in the blank region, which we can state as error region. After adding layers we observe that the model is trained and the activation functions helps in the proper alignment of the points and the regions, we observe that the loss to be reduced. The addition of layers is helpful and adding more than required layers over trains the model and gives us incorrect results.

#Question 5
Try changing the number of neurons used in the hidden layers of the `make_moons` classification problem. How does it impact performance?

In [None]:
#changing the number of neurons
model = nn.Sequential(
    nn.Linear(2,  40),
    nn.Tanh(),
    nn.Linear(40,  40),
    nn.Tanh(),
    nn.Linear(40,  40),
    nn.Tanh(),
    nn.Linear(40,  40),
    nn.Tanh(),
    nn.Linear(40,  40),
    nn.Tanh(),
    nn.Linear(40,  40),
    nn.Tanh(),
    nn.Linear(40,  40),
    nn.Tanh(),
    nn.Linear(40,  40),
    nn.Tanh(),
    nn.Linear(40, 2),
)

In [None]:
result = train_simple_network(model, loss_func, training_loader, val_loader=None, score_funcs={'Acc':accuracy_score,'F1': f1_score}, 
                         epochs=500, device="cpu", checkpoint_file=None)

In [None]:
def visualize2DSoftmax(X, y, model, title=None):
    x_min = np.min(X[:,0])-0.5
    x_max = np.max(X[:,0])+0.5
    y_min = np.min(X[:,1])-0.5
    y_max = np.max(X[:,1])+0.5
    xv, yv = np.meshgrid(np.linspace(x_min, x_max, num=20), np.linspace(y_min, y_max, num=20), indexing='ij')
    xy_v = np.hstack((xv.reshape(-1,1), yv.reshape(-1,1)))
    with torch.no_grad():
        logits = model(torch.tensor(xy_v, dtype=torch.float32))
        y_hat = F.softmax(logits, dim=1).numpy()

    cs = plt.contourf(xv, yv, y_hat[:,0].reshape(20,20), levels=np.linspace(0,1,num=20), cmap=plt.cm.RdYlBu)
    sns.scatterplot(x=X[:,0], y=X[:,1], hue=y, style=y, ax=cs.ax)
    if title is not None:
        cs.ax.set_title(title)

visualize2DSoftmax(X, y, model)

In [None]:
 sns.lineplot(x="epoch", y='train Acc', data=result, label='Train')

In [None]:
sns.lineplot(x='total time', y='train F1', data=result, label='Train')

In [None]:
sns.lineplot(x='total time', y='train loss', data=result, label='Train')

In [None]:
#increasing the count of the neurons
model = nn.Sequential(
    nn.Linear(2,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50, 2),
)

In [None]:
result1=train_simple_network(model, loss_func, training_loader, val_loader=None, score_funcs={'Acc':accuracy_score,'F1': f1_score}, 
                         epochs=500, device="cpu", checkpoint_file=None)

In [None]:
def visualize2DSoftmax(X, y, model, title=None):
    x_min = np.min(X[:,0])-0.5
    x_max = np.max(X[:,0])+0.5
    y_min = np.min(X[:,1])-0.5
    y_max = np.max(X[:,1])+0.5
    xv, yv = np.meshgrid(np.linspace(x_min, x_max, num=20), np.linspace(y_min, y_max, num=20), indexing='ij')
    xy_v = np.hstack((xv.reshape(-1,1), yv.reshape(-1,1)))
    with torch.no_grad():
        logits = model(torch.tensor(xy_v, dtype=torch.float32))
        y_hat = F.softmax(logits, dim=1).numpy()

    cs = plt.contourf(xv, yv, y_hat[:,0].reshape(20,20), levels=np.linspace(0,1,num=20), cmap=plt.cm.RdYlBu)
    sns.scatterplot(x=X[:,0], y=X[:,1], hue=y, style=y, ax=cs.ax)
    if title is not None:
        cs.ax.set_title(title)

visualize2DSoftmax(X, y, model)

In [None]:
sns.lineplot(x='epoch', y='train Acc', data=result1, label='Train')


In [None]:
sns.lineplot(x='total time', y='train F1', data=result1, label='Train')


In [None]:
sns.lineplot(x='total time', y='train loss', data=result1, label='Train')

By changing the number of neurons I observe that the error region, i.e., the blank space between the two moons has been reduced. More neurons helps us to reduce more error blank regions in the model. The tilt in the region is also seen clearly

# Question 6:
Use scikit-learn to load the breast cancer wisconsin dataset, and convert it into a `TensorDataset` and then split it into 80% for training and 20% for testing. Try to build your own classification neural network for this data. 

In [None]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()

#loading the breast cancer data

In [None]:
X=data.data
y=data.target

#initializing features and Labels

In [None]:
X.shape

In [None]:
y.shape

In [None]:
data_torch = TensorDataset(torch.tensor(X,dtype=torch.float32),torch.tensor(y, dtype=torch.float32))

#converting the data to TensorDataset

In [None]:
data_torch[:][0]

#feature values

In [None]:
data_torch[:][1]

#target values

In [None]:
from sklearn.model_selection import train_test_split

#import train_test_split from sklearn

In [None]:
X_train,X_test,y_train, y_test=train_test_split(data_torch[:][0],data_torch[:][1], test_size=0.2, random_state=3)

# spliting the data into 80% for training and 20% for testing and giving random_state 3

In [None]:
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))
training_loader = DataLoader(train_dataset, shuffle=True)
testing_loader = DataLoader(test_dataset)

In [None]:
model = nn.Sequential(
    nn.Linear(30,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50,  50),
    nn.Tanh(),
    nn.Linear(50, 2),
)

In [None]:
results_pd = train_simple_network(model, loss_func, training_loader, epochs=5, 
                                  val_loader=testing_loader, 
                                  checkpoint_file='model.pt', 
                                  score_funcs={'Acc':accuracy_score,'F1': f1_score})

In [None]:
results_pd

In [None]:
sns.lineplot(x='total time', y='train loss', data=results_pd, label='Train')
sns.lineplot(x='total time', y='val loss', data=results_pd, label='Validation')


Other References:

1) https://towardsdatascience.com/understanding-auc-roc-curve-68b2303cc9c5

2)https://towardsdatascience.com/checkpointing-deep-learning-models-in-keras-a652570b8de6

3)https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_breast_cancer.html

4)https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
