# Introduction
We are here to help you with the tutorial for pytorch implementation of CNN.
First lets discuss about the packages imported in this notebook:

1. pandas - python library for data manipulation and analysis of data
2. numpy - python library for maths function 
3. tqdm - python library for showing the progress meter
4. torch - pytorch framework for building high level machine learning models
5. cv2 - opencv library from python for image loading

# Dataset 
For this tutorial we will use the chest-xray-pneumonia dataset from kaggle.

We load the data in numpy form by using the two class methods given below in the following three cells.
Running these loads training, validation and the test datasets. 
We are aware that there is a optimized implementation for the same but we are making this for beginners. Hence, the longer implementaion 

In [None]:
import pandas as pd
import numpy as np 
import os 
from tqdm import tqdm
import cv2
class DataLoader():
    Im_size = 50
    NORMAL = '../input/chest-xray-pneumonia/chest_xray/train/NORMAL'
    PNEUMONIA = '../input/chest-xray-pneumonia/chest_xray/train/PNEUMONIA'
    LABELS = {NORMAL:0, PNEUMONIA:1}
    training_data = []
    normalcount = 0
    viralcount = 0
    
    def makedata(self):
        for label in  self.LABELS:
            print(label)
            for f in tqdm(os.listdir(label)):
                if "jpeg" in f:
                    try:
                        path = os.path.join(label, f)
                        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                        img = cv2.resize(img, (self.Im_size, self.Im_size))
                        self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])  # do something like print(np.eye(2)[1]), just makes one_hot 
                        #print(np.eye(2)[self.LABELS[label]])

                        if label == self.NORMAL:
                            self.normalcount += 1
                        elif label == self.PNEUMONIA:
                            self.viralcount += 1

                    except Exception as e:
                        pass
        np.random.shuffle(self.training_data)
        np.save("training_data.npy", self.training_data)
        print('Normal',dataloader.normalcount)
        print('Pneumonia:',dataloader.viralcount)
        
REBUILD_DATA = True
if REBUILD_DATA:
    dataloader = DataLoader()
    dataloader.makedata()

In [None]:
class ValdationData():
    Im_size = 50
    NORMAL = '../input/chest-xray-pneumonia/chest_xray/val/NORMAL'
    PNEUMONIA = '../input/chest-xray-pneumonia/chest_xray/val/PNEUMONIA'
    LABELS = {NORMAL:0, PNEUMONIA:1}
    training_data = []
    normalcount = 0
    viralcount = 0
    
    def makedata(self):
        for label in  self.LABELS:
            print(label)
            for f in tqdm(os.listdir(label)):
                if "jpeg" in f:
                    try:
                        path = os.path.join(label, f)
                        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                        img = cv2.resize(img, (self.Im_size, self.Im_size))
                        self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])  # do something like print(np.eye(2)[1]), just makes one_hot 
                        #print(np.eye(2)[self.LABELS[label]])

                        if label == self.NORMAL:
                            self.normalcount += 1
                        elif label == self.PNEUMONIA:
                            self.viralcount += 1

                    except Exception as e:
                        pass
        np.random.shuffle(self.training_data)
        np.save("val_data.npy", self.training_data)
        print('Normal',dataloader.normalcount)
        print('Pneumonia:',dataloader.viralcount)
REBUILD_DATA = True
if REBUILD_DATA:
    dataloader = ValdationData()
    dataloader.makedata()

In [None]:
class TestData():
    Im_size = 50
    NORMAL = '../input/chest-xray-pneumonia/chest_xray/test/NORMAL'
    PNEUMONIA = '../input/chest-xray-pneumonia/chest_xray/test/PNEUMONIA'
    LABELS = {NORMAL:0, PNEUMONIA:1}
    training_data = []
    normalcount = 0
    viralcount = 0
    
    def makedata(self):
        for label in  self.LABELS:
            print(label)
            for f in tqdm(os.listdir(label)):
                if "jpeg" in f:
                    try:
                        path = os.path.join(label, f)
                        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                        img = cv2.resize(img, (self.Im_size, self.Im_size))
                        self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])  # do something like print(np.eye(2)[1]), just makes one_hot 
                        #print(np.eye(2)[self.LABELS[label]])

                        if label == self.NORMAL:
                            self.normalcount += 1
                        elif label == self.PNEUMONIA:
                            self.viralcount += 1

                    except Exception as e:
                        pass
        np.random.shuffle(self.training_data)
        np.save("test_data.npy", self.training_data)
        print('Normal',dataloader.normalcount)
        print('Pneumonia:',dataloader.viralcount)
REBUILD_DATA = True
if REBUILD_DATA:
    dataloader = TestData()
    dataloader.makedata()

# Training on GPU
The code "device = torch.device("cuda" if torch.cuda.is_available() else "cpu")" helps you to transfer all your required data on the GPU for faster training

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
trainingdata = np.load("training_data.npy", allow_pickle=True)

In [None]:
trainingdata[1][0].shape

In [None]:
X = torch.Tensor([i[0] for i in trainingdata]).view(-1, 50, 50)
X = X/255.
y = torch.Tensor([i[1] for i in trainingdata])

**Transfering X and Y to the GPU**

In [None]:
X = X.to(device)
y = y.to(device)

In [None]:
valdata = np.load("val_data.npy", allow_pickle=True)

In [None]:
Xval = torch.Tensor([i[0] for i in valdata]).view(-1, 50, 50)
Xval = Xval/255.
yval = torch.Tensor([i[1] for i in valdata])

In [None]:
Xval = Xval.to(device)
yval= yval.to(device)

In [None]:
testdata = np.load("test_data.npy", allow_pickle=True)

In [None]:
Xtest = torch.Tensor([i[0] for i in testdata]).view(-1, 50, 50)
Xtest = Xtest/255.
ytest = torch.Tensor([i[1] for i in testdata])

In [None]:
Xtest = Xtest.to(device)
ytest= ytest.to(device)

**We converted the image to gray scale and lets visualize it**

In [None]:
import matplotlib.pyplot as plt
plt.imshow(trainingdata[1][0], cmap='gray')
plt.show()

# Building the Network


In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__() # just run the init of parent class (nn.Module)
        self.conv1 = nn.Conv2d(1, 32, 3) # input is 1 image, 32 output channels, 3x3 kernel / window
        self.conv2 = nn.Conv2d(32, 64, 3) # input is 32, because the first layer output 32. Then we say the output will be 64 channels, 3x3 kernel / window
        self.conv3 = nn.Conv2d(64, 128, 5)

        x = torch.randn(50,50).view(-1,1,50,50)
        self._to_linear = None
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 128) #flattening.
        self.fc2 = nn.Linear(128, 2) # 512 in, 2 out because we're doing 2 classes 

    def convs(self, x):
        # max pooling over 2x2
        x = F.max_pool2d(F.relu(self.conv1(x)), kernel_size=2, stride=2)
        x = F.max_pool2d(F.relu(self.conv2(x)), kernel_size=2, stride=2)
        x = F.max_pool2d(F.relu(self.conv3(x)), kernel_size=2, stride=2)

        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x

    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)  # .view is reshape ... this flattens X before 
        x = F.relu(self.fc1(x))
        x = self.fc2(x) # because this is our output layer. No activation here.
        return F.softmax(x, dim=1)


net = Net().to(device)
print(net)

# Defining the Optimizer and Loss function

In [None]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) #lr = learing rate 
loss_function = nn.MSELoss()

# Training loop 
Feel free to play with the number of Epochs to see what result you get in the end

In [None]:
BATCHSIZE = 32

EPOCHS = 20

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(X), BATCHSIZE)):
        #print(i, i+BATCHSIZE)
        batchx = X[i:i+BATCHSIZE].view(-1, 1, 50, 50)
        batchy = y[i:i+BATCHSIZE]
        
        net.zero_grad()
        outputs = net(batchx)
        
        loss = loss_function(outputs, batchy)
        #net.zero_grad() # you can also use optimizer.zero_grad this helps if you are buildign a moel with 2 different optimizers
        loss.backward()
        optimizer.step()
    if epoch%5 ==0:
        print("The Loss is: ", loss)

In [None]:
loss

# Training accuracy

In [None]:
correct = 0
total = 0
#torch.no_grad() is used to freeze all the learned parameters by the net and to use them to do some prediction 
with torch.no_grad():
    for i in tqdm(range(len(X))):
        realclass = torch.argmax(y[i])
        netout = net(X[i].view(-1, 1, 50, 50))[0]
        #netout = netout[0]
        predictedclass = torch.argmax(netout)
        if predictedclass == realclass:
            correct += 1
        total += 1
print("Accuracy: ", round(correct/total, 3))

# Validation accuracy 

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for i in tqdm(range(len(Xval))):
        realclass = torch.argmax(yval[i])
        netout = net(Xval[i].view(-1, 1, 50, 50))[0]
        #netout = netout[0]
        predictedclass = torch.argmax(netout)
        if predictedclass == realclass:
            correct += 1
        total += 1
print("Accuracy: ", round(correct/total, 3))

# Test accuracy

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for i in tqdm(range(len(Xtest))):
        realclass = torch.argmax(ytest[i])
        netout = net(Xtest[i].view(-1, 1, 50, 50))[0]
        #netout = netout[0]
        predictedclass = torch.argmax(netout)
        if predictedclass == realclass:
            correct += 1
        total += 1
print("Accuracy: ", round(correct/total, 3))

# Thank you 
By the end of this notebook I hope you understood how to implement CNN using the famous Pytorch library.
Feel free to play with it and build your own model with more layers and see different accuracy's that you get