## the update is only in the final class (BIOF050_CNN_Final)

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import random

In [2]:
class BIOF050_CNN:
    
    
    '''
        
        Inside this Net class, we can define what we want our convolutional neural network to look like!
        We will define the convolutional layers AND the linear layers here 
        
        Inputs: 
        
        the number of channels our images have (for black and white images,
        this will be equal to 1, for colored images, this will usually be equal to 3)
        
        the hidden dimension of our linear layers (same as MLP - how many neurons do we want?)

        the size of our 2D convolution kernel (input 3 = 3x3 kernel)

        the number of unique labels in our dataset
        
        Remember, if something has weights that must be optimized (like a linear layer or a convolution layer), 
        it must go in the constructor (__init__)
        
        Otherwise, it can go in either forward or constructor (__init___)
        
    '''
    
    class CNet(nn.Module):
        
        def __init__(self,n_channels,hidden_dimension,kernel_size,n_classes,pool):

            super(BIOF050_CNN.CNet, self).__init__()
            
        
            ### first convolution layer - You need to know # of channels/inputs and number of kernels (outputs)
            self.convolution1 = nn.Conv2d(n_channels,28,kernel_size=kernel_size)
            
            ###2-D max pooling
            self.pool = nn.MaxPool2d(pool, pool)
            
            ### second convolution layer - first term is number of kernels from last conv layer,
            ### second is number of outputs
            self.convolution2 = nn.Conv2d(n_kernels,56,kernel_size=kernel_size)
            
            ### 2-D maxpooling
            self.pool2 = nn.MaxPool2d(pool, pool)
            
            
            '''
            MLP layer - the 1400 represents the output of the final maxpooling (MaxPool2d) 
            after being flattened - you will need to get this value for each dataset you use
            '''
            
            self.layer1 = nn.Linear(1400,10)
            
            '''
            this is the activation function - we will input our data into this function
            and ReLU will be applied
            
            '''
            self.relu = nn.ReLU()

            
        
            

            
    
        '''
        Now, we have to define the forward method, which takes a data point, or, in most cases, a batch, and
        feeds it through all the layers of our neural network until assigning it a layer
        
        nn.Convolution and nn.Linear take one array/tensor as an input, so we will input our data right into each layer, and then input the
        outputs of each layer into the next layer
        
        After each layer, we will apply nn.ReLU to transform our data into a nonlinear space
        
        Finally, after the data has been passed through the output layer, we will convert it into a probaboility
        distribution using the softmax function. 
        
        This probabilty dsistribution will be used to assign a label to our
        data points and to figure out just how well our neural network did, as we learned earlier today
        
        ''' 
        
        def forward(self, batch):
            
            
            ### first convolution
            batch = self.convolution1(batch)
            
            ## activation
            batch = self.relu(batch)
            
            ### pooling
            batch = self.pool(batch)
            
            ### second convolution
            batch = self.convolution2(batch)
            
            ## activation
            batch = self.relu(batch)
            
            batch = self.pool(batch)
            
            ## flatten out each image to be in 1D using this view function
            batch = batch.view(batch.size(0), -1)
            
            ### MLP layer
            batch = self.layer1(batch)
            
            ### probability dist.
            return nn.functional.softmax(batch)
              

## Using our CNN
Now, we will add in our train_test + batchify method from last time to use our CNN on a dataset! 

## The data
we can use torchvision's dataset feature to download large datasets, such as MNIST (handwritten digits)

In [3]:
class BIOF050_CNN_Final:
    
    
    def __init__(self,data,labels):
        self.data = data
        self.labels = labels
    
    '''
        
        Inside this CNet class, we can define what we want our convolutional neural network to look like!
        We will define the convolutional layers AND the linear layers here 
        
        Inputs:
        
        
    '''
    
    class CNet(nn.Module):
        
        def __init__(self,n_channels,hidden_dimension,kernel_size,n_classes,pool):

            super(BIOF050_CNN_Final.CNet, self).__init__()

            ### first convolution layer - You need to know # of channels/inputs and number of kernels (outputs)
            self.convolution1 = nn.Conv2d(n_channels,28,kernel_size=kernel_size)
            
            ###2-D max pooling
            self.pool = nn.MaxPool2d(pool, pool)
            
            ### second convolution layer - first term is number of kernels from last conv layer,
            ### second is number of outputs
            
            self.convolution2 = nn.Conv2d(28,56,kernel_size=kernel_size)
            
            ### 2-D maxpooling
            self.pool2 = nn.MaxPool2d(pool, pool)
            
            
            '''
            MLP layer - the 3584 represents the output of the final maxpooling (MaxPool2d) 
            after being flattened - you will need to get this value for each dataset you use
            '''
            
            self.layer1 = nn.Linear(find_MLP_dim(28,kernel_size,56),10)
            self.relu = nn.ReLU()
            

            
    
        '''
        Now, we have to define the forward method, which takes a data point, or, in most cases, a batch, and
        feeds it through all the layers of our neural network until assigning it a layer
        
        nn.Convolution and nn.Linear take one array/tensor as an input, so we will input our data right into each layer, and then input the
        outputs of each layer into the next layer
        
        After each layer, we will apply nn.ReLU to transform our data into a nonlinear space
        
        Finally, after the data has been passed through the output layer, we will convert it into a probaboility
        distribution using the softmax function. 
        
        This probabilty dsistribution will be used to assign a label to our
        data points and to figure out just how well our neural network did, as we learned earlier today
        
        ''' 
        
        def forward(self, batch):
            
            ### first convolution
            batch = self.convolution1(batch)
            
            ## activation
            batch = self.relu(batch)
            
            ## we could have batch norm if we wanted to 
            ## we could add in dropouts
            
            ### pooling
            batch = self.pool(batch)
            
            ### second convolution
            batch = self.convolution2(batch)
            
            ## activation
            batch = self.relu(batch)
            
            batch = self.pool2(batch)
            
            ## flatten out each image to be in 1D using this view function
            batch = batch.view(batch.size(0), -1)
           
            ### MLP layer
            batch = self.layer1(batch)
            
            ### probability dist.
            return nn.functional.softmax(batch)
              
       

    
           
        
    def train_test(self,test_size,n_epochs,n_channels,hidden_dimensions,batch_size,kernel_size,pool,lr):
            
        ### splitting the data into a training/testing set
        train_data,test_data,train_labels,test_labels = train_test_split(self.data,self.labels, test_size=test_size)
        
        ## creating the batches using the batchify function
        train_batches,train_label_batches = batchify(train_data,train_labels,batch_size=batch_size)
        
        '''
        Here is where we define our neural network model - the Net class is inside BIOF050, so we have to call
        it accordingly 
        
        We use the length of our first data point to set the length of our input data (they are all the same)
        
        The number of class is equal to the number of unique values (the set) of our training labels
        '''
        neural_network = BIOF050_CNN_Final.CNet(n_channels,hidden_dimensions,kernel_size,len(set(train_labels)),pool)
        
        
        '''
        Here, we use the torch.optim package to create our stochastic gradient descent function
        
        neural_network.parameters() reads internal information from our NN 
        (don't worry about that - SGD just requires it)
        
        lr is the learning rate
        '''
        optimizer = optim.SGD(neural_network.parameters(), lr=lr)
        
        
        '''
        Here, we use the nn package to create our cross entropy loss function
        '''
        loss_function = nn.CrossEntropyLoss()
        
                
        '''
        The train function tells the neural network that it is about to be trained and that it 
        will have to calculate the needed information for optimization 
        
        This function should always be called before training
        '''
        neural_network.train()
        
        
        ''' This loop moves through the data once for each epoch'''
        for i in range(n_epochs):
            
            ### track the number we get correct
            correct = 0
            
            ''' This loop moves through each batch and feeds into the neural network'''
            for ii in range(len(train_batches)):
                
                ''' 
                Clears previous gradients from the optimizer - the optimizer,
                in this case, does not need to know what happened last time
                '''
                optimizer.zero_grad()
                
                
                batch = train_batches[ii]
                labels = train_label_batches[ii]

                
                ''' 
                Puts our batch into the neural network after converting it to a tensor
                
                Pytorch wants numeric data to be floats, so we will convert to a float as well 
                using np.float32
                
                Predictions: For each data point in our batch, we would get something that looks like:
                tensor([0.3,0.7]) where each number corresponds to the probability of a class
                '''
                predictions = neural_network(torch.tensor(np.asarray(batch).astype(np.float32)))
                
                
                ''' 
                We put our probabilities into the loss function to calculate the error for this batch
                
                '''
                loss = loss_function(predictions,torch.LongTensor(labels))
                
                '''
                loss.backward calculates the partial derivatives that we need to optimize
                '''
                loss.backward()
                
                
                '''
                optimizer step calculates the weight updates so the neural network can update the weights 
                '''
                optimizer.step()
                
                
                '''
                We extract just the data from our predictions, not other stuff Pytorch includes in that object
                
                We can then use the argmax function to figure out which index corresponds to the highest probability.
                If it is the 0th index, and the label is zero, we add one to correct. 
                If it is the 1st index, and the label is one, we add one to correct.
                
                '''
                for n,pred in enumerate(predictions.data):
                    if labels[n] == torch.argmax(pred):
                        correct += 1
                        
                        
            print("Accuracy for Epoch # " + str(i) + ": " + str(correct/len(train_data)))

        print()
        

                    
        '''
        The eval function tells the neural network that it is about to be tested on blind test data
        and shouldn't change any of its internal parameters
        
        This function should always be called before eval
        '''
        neural_network.eval()
        
        test_correct = 0
        
        ''' input our test data into the neural network'''
        predictions = neural_network(torch.tensor(np.asarray(test_data).astype(np.float32)))
        
        ''' checks how many we got right - very simple!'''
        for n,pred in enumerate(predictions.data):
            if test_labels[n] == torch.argmax(pred):
                    test_correct += 1
                    
        print("Accuracy on test set: " + str(test_correct/len(test_data)))
           
        return neural_network
        
   
   


''' Utility Function - function to turn the data into batches'''

def batchify(data,labels,batch_size=16):
    
    batches= []
    label_batches = []


    for n in range(0,len(data),batch_size):
        if n+batch_size < len(data):
            batches.append(data[n:n+batch_size])
            label_batches.append(labels[n:n+batch_size])

    if len(data)%batch_size > 0:
        batches.append(data[len(data)-(len(data)%batch_size):len(data)])
        label_batches.append(labels[len(data)-(len(data)%batch_size):len(data)])
        
    return batches,label_batches


def find_MLP_dim(image_size,kernel_size,n_kernels):
    
    after_conv1_pool = round((image_size-(kernel_size-1))/2)
    after_conv2_pool = int(np.floor((after_conv1_pool- (kernel_size-1))/2))
    after_conv2_pool *= after_conv2_pool

    return after_conv2_pool*n_kernels

    

In [4]:
data = torchvision.datasets.MNIST(
    root = './data/MNIST',
    download = True)


### get the data and labels from the data object
labels = data.targets
data = data.data


newdata = []
### flatten the data, scale it from 0-1, reshape it back into 2-d form (in this case, 28x28 - varies with dataset)
for image in data:
   image = np.ravel(image).astype(np.float64)
   image *= 1/image.max()
   newdata.append(image.reshape(1,28,28))
 


In [None]:
testclass = BIOF050_CNN_Final(newdata,labels)
model = testclass.train_test(test_size=0.2,n_epochs=3,n_channels=1,hidden_dimensions=100,batch_size=16,
                             kernel_size=5,pool=2,lr=0.01)

  return nn.functional.softmax(batch)
