# Moving to the GPU

In [33]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim



REBUILD_DATA = False # set to true to one once, then back to false unless you want to change something in your training data.

class DogsVSCats():
    IMG_SIZE = 50
    CATS = "PetImages/Cat"
    DOGS = "PetImages/Dog"
    TESTING = "PetImages/Testing"
    LABELS = {CATS: 0, DOGS: 1}
    training_data = []

    catcount = 0
    dogcount = 0

    def make_training_data(self):
        # iterating over all the directories
        for label in self.LABELS:
            print(label)
            # iterate over all the jpg images in the directories
            for f in tqdm(os.listdir(label)):
                if "jpg" in f:
                    try:
                        path = os.path.join(label, f)
                        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                        img = cv2.resize(img, (self.IMG_SIZE, self.IMG_SIZE))
                        self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])  # do something like print(np.eye(2)[1]), just makes one_hot 
                        #print(np.eye(2)[self.LABELS[label]])

                        if label == self.CATS:
                            self.catcount += 1
                        elif label == self.DOGS:
                            self.dogcount += 1

                    except Exception as e:
                        # some of the images are no good
                        # some might be corrupt or empty
                        # normally would print(str(e)) but we already know what the error is
                        pass
                        #print(label, f, str(e))

        # after we run the for loop, shuffle data
        # shuffles in-place so we dont have to reassign data
        np.random.shuffle(self.training_data)
        np.save("training_data.npy", self.training_data)
        print('Cats:',dogsvcats.catcount)
        print('Dogs:',dogsvcats.dogcount)

if REBUILD_DATA:
    dogsvcats = DogsVSCats()
    dogsvcats.make_training_data()


training_data = np.load("training_data.npy", allow_pickle=True) # dunno whats up with allow_pickle, but need it to load data in this case
print(len(training_data))

optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()

24946


In [34]:

class Net(nn.Module):
    def __init__(self):
        super().__init__() # just run the init of parent class (nn.Module)
        self.conv1 = nn.Conv2d(1, 32, 5) # input is 1 image, 32 output channels, 5x5 kernel / window
        self.conv2 = nn.Conv2d(32, 64, 5) # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 5x5 kernel / window
        self.conv3 = nn.Conv2d(64, 128, 5)
        #self.fc1 = nn.Linear(????, 512)

        x = torch.randn(50,50).view(-1,1,50,50)
        self._to_linear = None
        self.convs(x)

        # in the initialization of calling the layers, 
        # we have to do a quick forward pass to see what the input number is
        self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
        self.fc2 = nn.Linear(512, 2) # 512 in, 2 out bc we're doing 2 classes (dog vs cat).

    def convs(self, x):
        # max pooling over 2x2
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))

        #print(x[0].shape)

        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x
    
    def forward(self, x):
        x = self.convs(x)
        # run forward pass again to determine number
        x = x.view(-1, self._to_linear)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        #return x
        
        # probably need some sort of activation layer, but thats not really required
        # just a bunch of vectors being multiplied by each other
        # probably should just have one
        
        # Neural Networks are really just trial and error
        return F.softmax(x, dim=1)

net = Net()

In [35]:


X = torch.Tensor([i[0] for i in training_data]).view(-1, 50, 50)
X = X/255.0 # scale imagery from o,255 to be between 0,1

y = torch.Tensor([i[1] for i in training_data])

# separate out some training data

VAL_PCT = 0.1 # test against 10% of out data
val_size = int(len(X)*VAL_PCT)
print(val_size) 

# slicing data into training and testing data

# train up to the negative val_size
train_X = X[:-val_size]
train_y = y[:-val_size]

# train on what is past the negative val_size
test_X = X[-val_size:]
test_y = y[-val_size:]

print(len(train_X))
print(len(test_X))

# quickest and easiest thing to change if hitting mem errors is the batch size
# dont want to go down to one
# go down to 8, and then start tweaking the model

BATCH_SIZE = 100
EPOCHS = 1

2494
22452
2494


In [36]:
def train(net):
    for epoch in range(EPOCHS):
        for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
            #print(i, i+BATCH_SIZE)
            batch_X = train_X[i:i+BATCH_SIZE].view(-1, 1, 50, 50)
            batch_y = train_y[i:i+BATCH_SIZE]

            net.zero_grad() # function to zero the gradients depends (see below)
            outputs = net(batch_X) # now we can calculate loss
            
            loss = loss_function(outputs, batch_y)
            loss.backward()
            optimizer.step() # now we can train the model
            
        print(f"Epoch: {epoch}. Loss: {loss}")

In [37]:
        
def test(net):
    correct = 0
    total = 0
    with torch.no_grad():
        for i in tqdm(range(len(test_X))):
            real_class = torch.argmax(test_y[i])
            net_out = net(test_X[i].view(-1, 1, 50, 50))[0]  # returns a list, 
            predicted_class = torch.argmax(net_out)

            if predicted_class == real_class:
                correct += 1
            total += 1

    print("Accuracy: ", round(correct/total, 3))

In [38]:
train(net)

100%|██████████| 225/225 [00:18<00:00, 12.12it/s]

Epoch: 0. Loss: 0.25035518407821655





In [39]:
test(net)

100%|██████████| 2494/2494 [00:02<00:00, 1030.86it/s]

Accuracy:  0.488





In [40]:
torch.cuda.is_available()

True

In [41]:
device = torch.device("cuda:0")
device

device(type='cuda', index=0)

## CPU vs GPU

- train on GPU, but can test on CPU
- CPU can make alot of quieries a minute
- for training, we are trying to train as many batches per minute

In [42]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Runnning on GPU...")
else:
    device = torch.device("cpu")
    print("Runnning on CPU...")

Runnning on GPU...


## Multiple GPUs

- can easily assign layers to different GPUs
- another common application is having encoder and decoder networks

In [43]:
torch.cuda.device_count()

1

In [44]:
net.to(device) # put our entire network on GPU

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=2, bias=True)
)

## Using Pytorch to put data on the GPU

- going to be constantly moving things back and forth between the cpu and gpu
>
- we could convert out entire dataset and put it on the gpu
>
- realistically, we would convert data and move to gpu per batch
>
- tensors on  the GPU can only interact with tensors on the CPU 

In [45]:
net = Net().to(device) # immediately make the network run on the GPU

## Convert Model to use GPU

- reference for loading data [link](https://www.youtube.com/watch?v=6gk7giKER6s&t=966s)
>
- converting train()

In [68]:
EPOCHS = 10

def train(net):
    optimizer = optim.Adam(net.parameters(), lr=0.001)
    loss_function = nn.MSELoss()
    BATCH_SIZE = 100
    EPOCHS = 3
    for epoch in range(EPOCHS):
        for i in tqdm(range(0, len(train_X), BATCH_SIZE)): # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
            #print(f"{i}:{i+BATCH_SIZE}")
            batch_X = train_X[i:i+BATCH_SIZE].view(-1, 1, 50, 50)
            batch_y = train_y[i:i+BATCH_SIZE]

            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            net.zero_grad()

            optimizer.zero_grad()   # zero the gradient buffers
            outputs = net(batch_X)
            loss = loss_function(outputs, batch_y)
            loss.backward()
            optimizer.step()    # Does the update

        print(f"Epoch: {epoch}. Loss: {loss}")

In [71]:
train(net)

100%|██████████| 225/225 [00:01<00:00, 136.46it/s]


Epoch: 0. Loss: 0.07434610277414322


100%|██████████| 225/225 [00:01<00:00, 140.79it/s]


Epoch: 1. Loss: 0.0679905042052269


100%|██████████| 225/225 [00:01<00:00, 141.17it/s]

Epoch: 2. Loss: 0.03475644811987877





**^ 4.5s vs 18s**

**Also about 150 iterations/s versus 11 iterations/s**
>

- converting test()

In [66]:
def test(net):
    correct = 0
    total = 0
    for i in tqdm(range(0, len(test_X), BATCH_SIZE)):

        batch_X = test_X[i:i+BATCH_SIZE].view(-1, 1, 50, 50).to(device)
        batch_y = test_y[i:i+BATCH_SIZE].to(device)
        batch_out = net(batch_X)

        out_maxes = [torch.argmax(i) for i in batch_out]
        target_maxes = [torch.argmax(i) for i in batch_y]
        for i,j in zip(out_maxes, target_maxes):
            if i == j:
                correct += 1
            total += 1
    print("Accuracy: ", round(correct/total, 3))

In [72]:
test(net)

100%|██████████| 25/25 [00:00<00:00, 176.64it/s]

Accuracy:  0.761





**^ 70+% accurate, and test data is out of sample. not to bad**

- as long as loss is going down, the model is learning
- the real kicker is in-sample accuracy vs out-of-sample data
- 25k samples is not large enough, our model could brute force this
>
- pytorch on the GPU is just easy