# PicTex Text Detection Model with Zach

Finalized on **August 21st, 2020**


### 1. Create labels

In [37]:
from collections import OrderedDict
import numpy as np
import torch
import os

data_dir = "./final/"

classes = os.listdir(data_dir)
num_classes = len(classes)

classes_encode, classes_decode = {}, {}
for i, name in enumerate(classes):
    classes_encode[name] = i
    classes_decode[i] = name

encode_dict, decode_dict = OrderedDict(classes_encode), OrderedDict(classes_encode)

print(f"There are {num_classes} classes")
print(classes_encode)
print(classes_decode)

There are 73 classes
{'(': 0, ')': 1, '+': 2, '-': 3, '0': 4, '1': 5, '2': 6, '3': 7, '4': 8, '5': 9, '6': 10, '7': 11, '8': 12, '9': 13, '=': 14, 'a': 15, 'alpha': 16, 'ast': 17, 'b': 18, 'beta': 19, 'c': 20, 'comma': 21, 'd': 22, 'delta': 23, 'e': 24, 'emptyset': 25, 'f': 26, 'forall': 27, 'full_stop': 28, 'g': 29, 'greater': 30, 'h': 31, 'implies': 32, 'in': 33, 'infty': 34, 'int': 35, 'j': 36, 'k': 37, 'l': 38, 'lambda': 39, 'land': 40, 'leq': 41, 'lesser': 42, 'm': 43, 'mu': 44, 'n': 45, 'nabla': 46, 'Naturals': 47, 'neq': 48, 'o': 49, 'p': 50, 'perp': 51, 'pi': 52, 'q': 53, 'r': 54, 'Reals': 55, 's': 56, 'setminus': 57, 'sigma': 58, 'sim': 59, 'sum': 60, 'supset': 61, 't': 62, 'theta': 63, 'u': 64, 'v': 65, 'varepsilon': 66, 'w': 67, 'x': 68, 'y': 69, 'z': 70, '[': 71, ']': 72}
{0: '(', 1: ')', 2: '+', 3: '-', 4: '0', 5: '1', 6: '2', 7: '3', 8: '4', 9: '5', 10: '6', 11: '7', 12: '8', 13: '9', 14: '=', 15: 'a', 16: 'alpha', 17: 'ast', 18: 'b', 19: 'beta', 20: 'c', 21: 'comma', 22:

### 2. Create the `Dataset` and `Dataloader` objects

In [49]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.utils.data import random_split
import numpy as np
import torch
from PIL import Image
import random
import cv2

# grayscale images have one channel
num_output_channels = 1
normal = (0.5,)

# choose batch size
batch_size = 64

transform = transforms.Compose(
    [transforms.Grayscale(num_output_channels),
     transforms.Resize((32, 32)), 
     transforms.ToTensor(),
     transforms.Normalize(normal, normal)])

class PicTexDataset(Dataset):
    def __init__(self, root_dir, encode_dict, num_classes, transform=None, train=True):
        """
        Args:
            root_dir (string): Directory containing images sorted by class folders
            encode_dict (Ordered dict): Dictionary with class names zipped from 0-(num_classes-1)
            num_classes (int): Number of classes (SHOULD EQUAL LEN of ENCODE_DICT)
            transform (torchvision.transforms): Transforms to be applied to images 
        """
        self.root_dir = root_dir
        self.encode_dict = encode_dict
        self.num_classes = num_classes
        self.transform = transform
        self.is_train = train
        
        """
        Loading images:
            all_paths (string list): Path of every image
            all_paths_class (string list): Bijection with all_paths. Class name for each path
            all_images (string * string list): List of these objects... (class name, path of image)
        """
        all_paths, all_paths_class = [], []
        for name in encode_dict.keys():
            list_classes = os.listdir(root_dir + name)
            all_paths += list(map(lambda s : root_dir + name + "/" + s, list_classes))
            all_paths_class += [name] * len(list_classes)
        
        self.all_images = list(zip(all_paths_class, all_paths))

    def __len__(self):
        return len(self.all_images)

    def __getitem__(self, idx):
        img_class, img_name = self.all_images[idx]
        try:
            image = Image.open(img_name)
            if self.is_train:
                image = transforms.Grayscale(1)(image)
                image = self.random_shift(image)
                image = self.random_enlarge(image)
                image = self.random_noise(image)
            if self.transform:
                image = self.transform(image)
        except OSError:
            print(img_name, img_class)
        
        """
        label:
            If we wanted a tensor with a 1 in the spot of the class
            just uncomment the old label definition
            but NLLoss and CrossEntropyLoss just want the index
        """
        #label = torch.zeros(self.num_classes)
        #label[self.encode_dict[img_class]] = 1 
        label = self.encode_dict[img_class]
        return image, label
    
    def random_shift(self, img):
        if random.random() < 0.5:
            return img
        img = np.array(img)
        h, w = img.shape
        img_out = np.zeros((h, w), dtype=np.double)

        dx = random.uniform(-w*0.15, w*0.15)
        dy = random.uniform(-h*0.15, h*0.15)
        dx, dy = int(dx), int(dy)

        if dx >= 0 and dy >= 0:
            img_out[dy:, dx:] = img[:h-dy, :w-dx]
        elif dx >= 0 and dy < 0:
            img_out[:h+dy, dx:] = img[-dy:, :w-dx]
        elif dx < 0 and dy >= 0:
            img_out[dy:, :w+dx] = img[:h-dy, -dx:]
        elif dx < 0 and dy < 0:
            img_out[:h+dy, :w+dx] = img[-dy:, -dx:]

        img_out = Image.fromarray(img_out) 
        return img_out
    
    def random_enlarge(self, img):
        if random.random() < 0.5:
            return img
        img = np.array(img)
        k = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
        img = cv2.dilate(img, k, iterations=1)
        img = Image.fromarray(img)
        return img
    
    def random_noise(self, img):
        if random.random() < 0.25:
            return img
        img = np.array(img)
        img_copy = img.copy()
        gaussian_noise = img.copy()
        cv2.randn(gaussian_noise, 0, 45)
        img = gaussian_noise + img_copy
        img = Image.fromarray(img)
        return img
            
    
def load_split_train_test(datadir, valid_size = .2):
    dataset = PicTexDataset(data_dir, encode_dict, num_classes, transform)
    num_test = int(valid_size*len(dataset))
    num_train = len(dataset) - num_test
    
    train_data, test_data = random_split(dataset, (num_train, num_test))
    
    trainloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
    testloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)
    return trainloader, testloader

trainloader, testloader = load_split_train_test(data_dir, .2)


### 3. Test `Dataloader`

In [50]:
for image, label in trainloader:
    print(f"Label: {label}")

Label: tensor([50, 14, 52, 36, 35, 58, 65, 48, 16,  0, 62, 52, 52, 36, 27, 16, 52, 70,
        44, 39,  1, 72,  8, 47, 32, 21, 44, 19, 47, 60, 34,  0, 26, 28, 51, 32,
        34, 52, 16, 13, 62, 59, 12, 51, 31, 31, 34, 57, 33, 33, 33, 70, 18, 70,
        44, 35, 30, 71, 31, 29, 38, 26, 12, 27])
Label: tensor([36, 44, 60,  6, 71, 10, 10, 25, 26, 11, 32, 16,  6,  8, 42, 52, 16, 69,
        39,  6,  9, 61, 21, 49, 46, 16, 10, 63, 70, 16,  8, 29,  5, 19, 69,  6,
        54, 46,  4, 66, 48, 63,  4,  7, 14, 16, 25, 34, 65, 21,  4, 50, 41, 52,
        49, 46, 16, 34, 52, 35, 58, 12, 60, 48])
Label: tensor([26, 58, 24, 49,  1, 53,  6, 24, 29, 57, 60, 19, 10, 52, 35, 34, 64, 20,
        46, 34, 51, 12, 51,  4, 37, 25, 46,  7, 27, 38,  5, 46, 28, 67, 60,  5,
         2, 21, 25, 29,  4,  0, 67, 36, 20, 54, 27, 10, 60, 10, 35, 48, 60, 33,
        59,  7, 63, 28, 64, 20,  1, 51, 56, 44])
Label: tensor([20, 36, 35, 33, 35, 22, 60, 34, 33, 10, 12, 58, 64,  4, 16, 49, 54,  7,
         5, 46, 50, 60, 3

Label: tensor([59, 45, 62, 62, 13, 30, 58, 53, 60, 55, 33, 55, 13, 39, 36, 72, 49,  5,
        34, 61, 58,  6,  3, 60, 45, 12, 37, 34, 56,  8,  7, 66,  4, 17, 16,  8,
         0,  4, 17,  5, 19, 35, 12, 71,  6, 61, 55, 12,  3, 26, 10, 46, 24, 12,
         4, 38, 24, 16, 57,  4, 66, 47, 18, 60])
Label: tensor([ 4, 20, 35,  8, 19, 22, 41, 70, 22, 68, 11, 52, 66, 18,  7, 68, 34, 66,
        33, 15, 57,  6, 16, 24,  7, 61,  3, 45, 27, 42, 70,  7, 34, 58, 46, 55,
         5, 60, 25, 51, 42, 44, 52, 55, 33, 33, 34, 14,  4,  6, 65, 25, 27, 13,
        25, 35, 60, 32,  6, 40, 68, 27,  4, 52])
Label: tensor([57, 23, 11,  5, 27,  4, 57, 25, 63, 39,  9, 47, 34, 53, 60, 38, 22, 35,
        44,  7,  8,  7, 34,  4, 37, 49,  1, 53, 55, 42, 46,  7, 16, 57,  4, 20,
        19, 52, 16,  6, 16,  6, 62,  6, 34, 44, 59, 60, 21, 17, 65,  7,  6, 12,
        32,  6, 63, 28, 66, 63, 68, 17, 60, 48])
Label: tensor([63,  3, 72, 25,  4,  7, 38, 60, 51,  7, 58, 35, 35, 31, 64,  7, 10, 52,
        55, 35,  7, 59, 6

Label: tensor([27, 34, 16, 44, 18,  8,  6, 27, 14, 31,  9, 43, 16, 19, 28,  5, 31, 34,
        50, 12,  6, 68, 44, 47, 39, 47, 21, 13, 33, 27, 10, 13, 24, 29, 70, 35,
        33, 19, 33, 24, 33, 39, 16, 28, 35,  8, 56, 16, 57,  4, 66, 33, 26, 60,
        29, 42, 55, 30, 13, 35, 40, 33, 55, 47])
Label: tensor([44, 18, 12, 33,  2,  6, 63, 16, 34,  5, 70, 30, 30, 71, 27, 34,  6, 35,
        42, 46,  0, 67,  8, 72, 67, 39, 69, 31, 24, 36, 66,  9, 19, 48,  1, 53,
        27, 44, 52, 16,  7, 11, 72, 49, 63, 10, 49, 47, 34, 61, 63, 16, 34, 17,
        55,  9, 35, 12,  9, 48, 12, 60, 21, 48])
Label: tensor([ 4,  5, 63, 35, 35, 70,  4, 44, 52, 44, 32, 30, 57, 51, 56, 60,  9, 10,
        48, 60, 70, 11,  1, 25,  6, 44, 60, 48, 10, 51,  5, 68,  3, 47, 57, 65,
        34, 45, 63, 60, 39,  0, 60,  7, 63, 27, 59, 47, 29, 34, 30, 69, 25,  5,
        29, 16, 46, 29, 20,  0, 33, 60,  9, 39])
Label: tensor([44, 55, 21, 54, 34,  9, 49,  5, 33,  4,  9, 19,  6, 14, 63, 19,  4, 13,
        12, 45, 47,  4, 2

Label: tensor([23, 69, 50, 34, 37, 59, 12,  7,  4, 64, 33, 12, 26, 33,  5, 27, 27, 60,
        72, 63, 52, 60, 18,  3, 35, 45, 55, 45, 50, 35, 35, 37, 63, 16, 11, 35,
        62, 10,  7,  6, 35,  7, 22,  4, 35, 49, 55,  8, 10, 60, 72,  5, 16, 10,
        67, 52, 35, 54,  0, 62, 26, 34, 32,  7])
Label: tensor([ 8,  4, 51, 68, 33, 24, 33,  2, 34, 25, 55,  5, 35,  3, 43, 46, 56, 34,
        12, 52, 17, 10, 51,  2, 27, 31, 31, 46, 61, 63, 39, 30, 26,  7, 23, 19,
        55, 72, 19,  0,  9, 15, 51, 24, 59, 32, 33, 13, 10, 15, 35,  2, 33, 26,
        42, 47, 12, 29, 11, 55, 39, 16, 33, 53])
Label: tensor([56, 11, 35, 34, 11, 35, 28, 65, 20, 63, 64, 52, 55, 43, 41, 67, 19,  8,
         7,  9, 34, 56, 16, 47, 28, 12, 19, 21, 62, 34, 14, 12, 34, 12, 24, 60,
        64, 14, 31, 33, 27, 33,  6, 33, 51, 45, 25,  6, 55, 16, 11, 40, 33, 12,
        46, 68, 35,  7, 52, 34, 19,  8,  3, 31])
Label: tensor([ 3, 60, 66, 16, 16, 26, 65,  0,  8, 63, 66, 22, 60, 63, 20,  8, 36,  8,
        55,  4,  2,  7,  

        22, 35, 59,  7, 66, 34, 10, 21, 49, 33])
Label: tensor([60, 63, 68, 46, 60, 34,  4, 36, 22, 39,  4, 34, 66,  5, 16,  0, 25, 27,
         1, 19, 29, 60, 32,  7, 52,  1, 13, 55, 55, 55, 23,  5, 60, 13, 24,  6,
        34, 55, 37,  2, 54, 45, 51, 10, 51, 11, 60, 27, 33,  6,  5, 66, 54, 56,
        40, 52, 26, 52, 39, 21,  8, 19, 11, 17])
Label: tensor([ 5,  8,  8, 60, 33, 63, 34, 19, 62, 62, 12, 60, 30, 36, 23,  5, 55, 55,
        46, 38, 60, 34,  8, 22,  9, 16, 42, 17, 60, 55, 58, 70, 27, 12, 16,  8,
        28, 12, 31, 27,  7, 55, 32, 58, 25, 55, 35, 12, 27, 28, 28, 19,  8, 34,
        54, 65, 60,  3,  9, 26, 24, 57, 60,  4])
Label: tensor([ 5,  8, 10,  4, 35, 33, 47, 53, 15, 35, 14, 47, 33, 35, 55, 39,  2, 12,
        19, 44,  1, 12, 35, 27, 72,  7, 35, 30, 13, 60, 11, 54, 33, 48, 63, 51,
        60, 52,  4, 39, 68, 52, 34, 32,  2, 55, 59, 13, 30, 37, 11,  4, 66, 25,
        35, 27, 37, 27, 19,  6, 47, 10, 49, 19])
Label: tensor([ 5, 55, 58, 66, 35, 60, 16, 47, 36, 20, 54,  5, 

### 4. Everything else from before:

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv11 = nn.Conv2d(in_channels=num_output_channels, out_channels=10, kernel_size=5, padding=2)
        self.conv12 = nn.Conv2d(10, 10, 5, padding=2)
        self.batch1 = nn.BatchNorm2d(10)
        self.pool1  = nn.MaxPool2d(kernel_size=2,stride=2)
        self.conv21 = nn.Conv2d(10, 20, 5, padding=2)
        self.conv22 = nn.Conv2d(20, 20, 5, padding=2)
        self.batch2 = nn.BatchNorm2d(20)
        self.pool2  = nn.MaxPool2d(2,2)
        self.conv31 = nn.Conv2d(20, 40, 5, padding=2)
        self.conv32 = nn.Conv2d(40, 40, 5, padding=2)
        self.batch3 = nn.BatchNorm2d(40)
        self.pool3  = nn.MaxPool2d(2,2)
        
        # Transitioning from Conv ===> Linear
        # 16 is the number of output channels in the previous conv layer.
        
        self.fc1 = nn.Linear(40 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, len(classes))
        self.dropconv = nn.Dropout(0.2)
        self.dropfc = nn.Dropout(0.5)
        self.soft = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = F.relu(self.conv11(x))
        x = F.relu(self.conv12(x))
        x = self.batch1(x)
        x = self.pool1(x)
        x = F.relu(self.conv21(x))
        x = F.relu(self.conv22(x))
        x = self.batch2(x)
        x = self.pool2(x)
        x = F.relu(self.conv31(x))
        x = F.relu(self.conv32(x))
        x = self.batch3(x)
        x = self.pool3(x)
        x = x.view(-1, 40 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.dropfc(x)
        x = self.soft(x)
        return x

# init the class 
model = Net()
print(model)
model.eval()
inp = torch.randn((1,1,32,32))
out = model(inp)

#model.load_state_dict(torch.load('./models/custom_label_1002.pt'))
model.train()
import torch.optim as optim

# set parameters
learning_rate = 0.005
momentum = 0.9

def loss_optim():
    # Loss function: 
    # criterion = nn.CrossEntropyLoss()
    # criterion = nn.MSELoss()
    criterion = nn.NLLLoss()
    
    # Optimizer:
    optimizer = torch.optim.Adamax(model.parameters(), learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
    #optimizer = optim.Adam(model.parameters(), learning_rate, amsgrad = True)
    #optimizer = optim.SGD(model.parameters(), learning_rate, momentum)
    
    return criterion, optimizer

criterion, optimizer = loss_optim()
print(criterion)
print(optimizer)

%matplotlib inline
import matplotlib.pyplot as plt
from torch.autograd import Variable


def train_network():
    
    # Choose parameters
    num_epoch = 10
    mini_batch = 10 # previously batch_size
    train_losses, test_losses = [], []
    running_loss = 0
    for epoch in range(num_epoch):  # loop over the dataset multiple times
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = Variable(inputs), Variable(labels)
            
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            # convert to float bc softmax doesn't work with long
            inputs = torch.tensor(inputs, dtype=torch.float)
            outputs = model(inputs)
            
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            # print statistics
            if i % mini_batch == mini_batch-1:    # print every # of mini-batches
                test_loss = 0
                accuracy = 0
                # begin evaluation of validation loss
                model.eval()
                with torch.no_grad():
                    for inputs_test, labels_test in testloader:
                        if labels_test.size()[0] == batch_size:
                            ps = model.forward(inputs_test)
                            batch_loss = criterion(ps, labels_test)
                            test_loss += batch_loss.item()
                            top_p, top_class = ps.topk(1, dim=1)
                            equals = top_class == labels_test.view(*top_class.shape)
                            accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
                train_losses.append(running_loss/len(trainloader))
                test_losses.append(test_loss/len(testloader))    
                print(f"|  [Epoch: {epoch + 1}, Batch: {i + 1}]   "
                      f"Train loss: {running_loss/len(trainloader):.3f}  |  "
                      f"Test loss: {test_loss/len(testloader):.3f}  |  "
                      f"Test accuracy: {accuracy/len(testloader):.3f}  |")
                running_loss = 0
                model.train()
            
    
    PATH = './models/pictexHELP_100.pt'
    torch.save(model.state_dict(), PATH)
    return train_losses, test_losses, PATH

def visualize_train(train_losses, test_losses):
     plt.plot(train_losses, label='Training loss')
     plt.plot(test_losses, label='Test/Validation loss')
     plt.legend(frameon=False)
     plt.show()

print("========================================BEGIN TRAINING=======================================")
train_losses, test_losses, PATH = train_network()
print("=========================================END TRAINING========================================")
visualize_train(train_losses, test_losses)

Net(
  (conv11): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv12): Conv2d(10, 10, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (batch1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv21): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv22): Conv2d(20, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (batch2): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv31): Conv2d(20, 40, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv32): Conv2d(40, 40, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (batch3): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=F



|  [Epoch: 1, Batch: 10]   Train loss: 0.323  |  Test loss: 4.144  |  Test accuracy: 0.013  |
|  [Epoch: 1, Batch: 20]   Train loss: 0.314  |  Test loss: 4.149  |  Test accuracy: 0.037  |
|  [Epoch: 1, Batch: 30]   Train loss: 0.309  |  Test loss: 4.125  |  Test accuracy: 0.027  |
|  [Epoch: 1, Batch: 40]   Train loss: 0.309  |  Test loss: 3.968  |  Test accuracy: 0.096  |
|  [Epoch: 1, Batch: 50]   Train loss: 0.299  |  Test loss: 3.714  |  Test accuracy: 0.162  |
|  [Epoch: 1, Batch: 60]   Train loss: 0.295  |  Test loss: 3.512  |  Test accuracy: 0.151  |
|  [Epoch: 1, Batch: 70]   Train loss: 0.292  |  Test loss: 3.389  |  Test accuracy: 0.167  |
|  [Epoch: 1, Batch: 80]   Train loss: 0.292  |  Test loss: 3.424  |  Test accuracy: 0.203  |
|  [Epoch: 1, Batch: 90]   Train loss: 0.284  |  Test loss: 3.305  |  Test accuracy: 0.211  |
|  [Epoch: 1, Batch: 100]   Train loss: 0.285  |  Test loss: 3.208  |  Test accuracy: 0.235  |
|  [Epoch: 1, Batch: 110]   Train loss: 0.277  |  Test loss