# PyTorch Third Attempt

In [1]:
# # Check out available CPU and GPU memory
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU

def print_CPU_GPU_info(GPUs):
    process = psutil.Process(os.getpid())
    print(f"\nCPU \tRAM Free: {humanize.naturalsize(psutil.virtual_memory().available)}"
          f"    | Proc size: {humanize.naturalsize(process.memory_info().rss)}")
    if GPUs[0]: 
        for i,gpu in enumerate(GPUs):
            print(f"GPU {i} \tRAM Free: {gpu.memoryFree/1000:.3f} GB  "
                  f"| Used: {gpu.memoryUsed/1000:.3f} GB"
                  f"\t| Utilization: {gpu.memoryUtil*100:3.0f}% | "
                  f"Total Memory: {gpu.memoryTotal/1000:.3f} GB")
    else: print(f"Not on a GPU")


Collecting gputil
  Downloading https://files.pythonhosted.org/packages/ed/0e/5c61eedde9f6c87713e89d794f01e378cfd9565847d4576fa627d758c554/GPUtil-1.4.0.tar.gz
Building wheels for collected packages: gputil
  Building wheel for gputil (setup.py) ... [?25ldone
[?25h  Stored in directory: /root/.cache/pip/wheels/3d/77/07/80562de4bb0786e5ea186911a2c831fdd0018bda69beab71fd
Successfully built gputil
Installing collected packages: gputil
Successfully installed gputil-1.4.0


In [2]:
print_CPU_GPU_info(GPU.getGPUs())


CPU 	RAM Free: 12.9 GB    | Proc size: 143.1 MB
GPU 0 	RAM Free: 11.441 GB  | Used: 0.000 GB	| Utilization:   0% | Total Memory: 11.441 GB


In [3]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) { return false; }
// disable scrollable cells

<IPython.core.display.Javascript object>

In [4]:
! git clone https://github.com/samryan18/chess-ray-vision
! git clone https://github.com/mukundv7/crvdataset
! mv chess-ray-vision/clean_notebooks/* .
! mkdir train_full
! mv crvdataset/chess-positions/train-full/* train_full/

Cloning into 'chess-ray-vision'...
remote: Enumerating objects: 80, done.[K
remote: Counting objects: 100% (80/80), done.[K
remote: Compressing objects: 100% (72/72), done.[K
remote: Total 80 (delta 11), reused 77 (delta 8), pack-reused 0[K
Unpacking objects: 100% (80/80), done.
Cloning into 'crvdataset'...
remote: Enumerating objects: 39509, done.[K
remote: Counting objects: 100% (39509/39509), done.[K
remote: Compressing objects: 100% (39378/39378), done.[K
remote: Total 39509 (delta 150), reused 39484 (delta 128), pack-reused 0[K
Receiving objects: 100% (39509/39509), 770.54 MiB | 32.50 MiB/s, done.
Resolving deltas: 100% (150/150), done.
Checking out files: 100% (39691/39691), done.


In [0]:
# ! mkdir train_mini
# ! mv crvdataset/chess-positions/train-mini/* ./train_mini
# ! mkdir train_mini/train


# Setup Stuff

In [5]:
# Pytorch Colab Setup
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'
!pip3 install https://download.pytorch.org/whl/cu100/torch-1.0.1-cp36-cp36m-linux_x86_64.whl
!pip3 install torchvision
  
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

Collecting torch==1.0.1 from https://download.pytorch.org/whl/cu100/torch-1.0.1-cp36-cp36m-linux_x86_64.whl
[?25l  Downloading https://download.pytorch.org/whl/cu100/torch-1.0.1-cp36-cp36m-linux_x86_64.whl (614.8MB)
[K    100% |████████████████████████████████| 614.8MB 27kB/s 
[31mfastai 1.0.50.post1 has requirement numpy>=1.15, but you'll have numpy 1.14.6 which is incompatible.[0m
[?25hInstalling collected packages: torch
  Found existing installation: torch 1.0.1.post2
    Uninstalling torch-1.0.1.post2:
      Successfully uninstalled torch-1.0.1.post2
Successfully installed torch-1.0.1
cuda:0


In [6]:
## Required packages (Install in Colab)
!pip install tensorflow
!pip install scipy
!pip install numpy
!pip install Pillow
!pip install image



In [7]:
import torchvision
import torch 
import torch.nn as nn
import torchvision.datasets
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np;
from torch.utils.data import Dataset, DataLoader
import time, datetime
from pytorch_general.pytorch_helper import imshow
from pytorch_general.tensorboard_helper import Logger
from tqdm import tqdm_notebook

from random import randint


from PIL import Image
from pathlib import Path

from typing import Callable
import torch
import dill
import torch.optim as optim
device =torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [0]:
import numpy as np
import re
piece_symbols = 'prbnkqPRBNKQ'

def onehot_from_fen(fen):
    eye = np.eye(13)
    output = np.empty((0, 13))
    fen = re.sub('[-]', '', fen)

    for char in fen:
        if(char in '12345678'):
            output = np.append(
              output, np.tile(eye[12], (int(char), 1)), axis=0)
        else:
            idx = piece_symbols.index(char)
            output = np.append(output, eye[idx].reshape((1, 13)), axis=0)

    return output

def fen_from_onehot(one_hot):
    output = ''
    for j in range(8):
        for i in range(8):
            if(one_hot[j][i] == 12):
                output += ' '
            else:
                output += piece_symbols[one_hot[j][i]]
        if(j != 7):
            output += '-'

    for i in range(8, 0, -1):
        output = output.replace(' ' * i, str(i))

    return output

class_prob = onehot_from_fen('4kN1N-B1P5-QQ3B2-R1n1b3-8-1p2P3-1K6-6b1')

# one_hot = np.zeros((64, 13))
# one_hot[np.arange(64,13), class_labels] = 1
# # class_labels
# np.shape(class_prob)

In [0]:
# xz

In [0]:
def load_batch(directory='train_full/', batch_size=32):
    '''
    Probably a better way to do this using something like this:
    https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
    
    This loads a single random batch from the training set.
    '''
    pathlist = list(Path(directory).glob('**/*.jpeg'))
    labels = []
    images = []
    n_files = len(pathlist)
    random_indicies = [randint(0, n_files) for _ in range(batch_size)]

    for path in [pathlist[x] for x in random_indicies]:
        label = str(path).split(directory)[1].split(f'.')[0]
        label = onehot_from_fen(label)

        img = np.asarray(Image.open(str(path))).astype('uint8')
        labels.append(label)
        images.append(img)
        
    test_images, test_labels = (images, labels) # TODO
        
    return images, labels, test_images, test_labels




class CustomChessDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, transform=None, root='train_full/', train=True):
#         train_images, train_labels, test_images, test_labels = load_datasets()
        self.transform = transform

        self._train = train
            
        self.root = root
        self.pathlist = list(Path(self.root).glob('**/*.jpeg'))
        self.n_files = len(self.pathlist)

        self.to_tensor = transforms.ToTensor()


    def __len__(self):
        return self.n_files

    def __getitem__(self, idx):
        path = self.pathlist[idx]
        label = str(path).split(self.root)[1].split(f'.')[0]
        label = onehot_from_fen(label)
        img = np.asarray(Image.open(str(path))).astype('uint8')

        img_as_img = Image.fromarray(img)
        img_as_img = img_as_img.convert('L')
        img_as_img = self.transform(img_as_img)
        
        
#         _,class_labels = torch.max(self.to_tensor(self.labels[idx]).long().to(device),1) 

        return (self.to_tensor(img_as_img), 
                label)


In [0]:
# NEW FROM KERAS VERSION

import glob
from random import shuffle
from skimage.util.shape import view_as_blocks
from skimage import transform as sktransform
from skimage import io




piece_symbols = 'prbnkqPRBNKQ'

def fen_from_filename(filename):
    base = os.path.basename(filename)
    return os.path.splitext(base)[0]

def process_image(img):
    downsample_size = 200
    square_size = int(downsample_size/8)
    img_read = io.imread(img)
    img_read = sktransform.resize(
      img_read, (downsample_size, downsample_size), mode='constant')
    tiles = view_as_blocks(img_read, block_shape=(square_size, square_size, 3))
    tiles = tiles.squeeze(axis=2)
    return tiles.reshape(64, square_size, square_size, 3)

# def train_gen(features, labels, batch_size):
#     for i, img in enumerate(features):
#         y = onehot_from_fen(fen_from_filename(img))
#         x = process_image(img)
#         yield x, y

# def pred_gen(features, batch_size):
#     for i, img in enumerate(features):
#         yield process_image(img)
        

class CustomChessDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, transform=None, root='train_full/', train=True):
#         train_images, train_labels, test_images, test_labels = load_datasets()

        self._train = train
            
        self.root = root
        self.pathlist = list(Path(self.root).glob('**/*.jpeg'))
        self.n_files = len(self.pathlist)

        self.to_tensor = transforms.ToTensor()
        
        self.train_size = 10000
        self.test_size = 3000

        self.train = glob.glob("train_full/*.jpeg")
        self.test = glob.glob("train_full/*.jpeg")

        shuffle(self.train)
        shuffle(self.test)

        self.train = self.train[:self.train_size]
        self.test = self.test[:self.test_size]


    def __len__(self):
        return 10000

    def __getitem__(self, idx):
        img = self.train[idx]
        label = onehot_from_fen(fen_from_filename(img))
        img_as_img = process_image(img)

        
        
#         _,class_labels = torch.max(self.to_tensor(self.labels[idx]).long().to(device),1) 

        return ((torch.from_numpy(img_as_img).float()), 
                label)


In [0]:
# Define a transform to normalize the data
batch_size=10 # this needs to be small ish bc bigger models will scale memory usage exponentially
downsample_size=160
transform = transforms.Compose([transforms.Resize(downsample_size)])

train_dataset = CustomChessDataset(root='train_full/', train=True, transform=transform)
test_dataset = CustomChessDataset(root='train_full/', train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)



# print(f'normalized image example-downsampled to {downsample_size}x{downsample_size}')
# image, label = next(iter(test_loader))
# print(np.shape(image))
# imshow(image[0,:]);
# image, label = next(iter(test_loader))

# imshow(image[0,:]);
# image, label = next(iter(test_loader))

# imshow(image[0,:]);

In [0]:
def train_model(model: nn.Module, 
                log_dir: str,
                train_loader,
                criterion,
                optimizer,
                num_epochs,
               log_freq,
               max_per_epoch=-1) -> nn.Module:
    t = datetime.datetime.now()
    now = time.mktime(t.timetuple()) - 1550000000
    logger = Logger(f'{log_dir} ({now})/')
    
    print(now)

    model = model.to(device)
    model.train()

    total_step = len(train_loader)
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}')
        running_loss = 0
        step = 0
#         for step, (images, labels) in tqdm_notebook(enumerate(train_loader), total=len(train_loader), unit="mini-batches"):
        for step, (images, labels) in enumerate(train_loader):

            images, labels = images.to(device), labels.long().to(device)


            output = model(images).to(device)
#             print(images.size())
            _,class_labels = torch.max(labels,1) 
#             print(class_labels.size())
#             print(output.size())
            loss = criterion(output, class_labels).to(device)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Compute accuracy
            _, argmax = torch.max(output, 1)
            accuracy = float((class_labels == argmax.squeeze()).float().mean().cpu())
#             print(f'Accuracy: {accuracy}')

            running_loss += float(loss.item())
            
            del(images)
            del(labels)
            del(class_labels)
            
            if step % log_freq == 0:

                overall_step = epoch*total_step + step

                # 1. Log scalar values (scalar summary)
                info = { 'loss': loss.item(), 'accuracy': accuracy }

                for key, value in info.items():
                    logger.scalar_summary(key, value, overall_step)

                # 2. Log values and gradients of the parameters (histogram summary)
                for key, value in model.named_parameters():
                    key = key.replace('.', '/')
                    logger.histo_summary(key, value.data.cpu().numpy(), overall_step)
                    try:
                        logger.histo_summary(key+'/grad', value.grad.data.cpu().numpy(), overall_step)
                    except (AttributeError):
                        # During transfer learning some of the variables don't have grads
                        pass
            
            if max_per_epoch > 0 and step > max_per_epoch:
                break
        
        print(f"{epoch}: Training loss: {running_loss/len(train_loader)}")
        print(f"{epoch}: Training accuracy: {accuracy}")
#         print(class_labels)
#         print(argmax)

 
    return model


def test_model(model, criterion, test_loader) -> float:
    model = model.to(device)
    correct = 0
    total = 0
    accuracies = []
    losses = []
    total_step = len(test_loader)
    with torch.no_grad():
        for i in range(total_step):
            for  images, labels in test_loader:
                images, labels = images.to(device), labels.long().to(device)
                _,class_labels = torch.max(labels,1) 

                output = model(images).to(device)
                loss = criterion(output, class_labels)
                losses.append(float(loss.item()))


                # Compute accuracy
                _, argmax = torch.max(output, 1)
                accuracy = float((class_labels == argmax.squeeze()).float().mean().cpu())
                accuracies.append(accuracy)
                
    print(f'Accuracy of the network on test images: {np.average(accuracies)}')
    print(f'Avg. Loss of the network on test images: {np.average(losses)}')

    return np.average(accuracies)

In [0]:
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1) 

class SimpleCNN(torch.nn.Module):
    def __init__(self, batch_size):
        super(SimpleCNN, self).__init__()
        self.name = 'SimpleCNN'
        self.batch_size=batch_size
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1),
            nn.LeakyReLU(negative_slope=0.1))
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1),
            nn.ReLU())
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1),
            nn.ReLU())
        self.flatten = Flatten()
        self.fc1 = nn.Sequential(
            nn.Linear(32*19*19, 256),
            nn.ReLU(),
            nn.Dropout(p = 0.1))
        self.fc2 = nn.Sequential(
            nn.Linear(256, 13))

    def forward(self, x):
        x = x.reshape(self.batch_size*64,3,25,25)

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        
        x = self.flatten(x)
        
        x = self.fc1(x)
        x = self.fc2(x)
#         print(f'xsize: {x.size()}')
        x = x.reshape(self.batch_size,64,13)

        return(x)
    
class BiggerCNN(torch.nn.Module):
    def __init__(self, batch_size):
        super(BiggerCNN, self).__init__()
        self.name = 'BiggerCNN'
        self.batch_size=batch_size
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1),
            nn.LeakyReLU(negative_slope=0.1))
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1),
            nn.ReLU())
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1),
            nn.ReLU())
        self.flatten = Flatten()
        self.fc1 = nn.Sequential(
            nn.Linear(64*19*19, 512),
            nn.ReLU(),
            nn.Dropout(p = 0.1))
        self.fc2 = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(p = 0.1))
        self.fc3 = nn.Sequential(
            nn.Linear(256, 13))

    def forward(self, x):
        x = x.reshape(self.batch_size*64,3,25,25)

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        
        x = self.flatten(x)
        
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)

#         print(f'xsize: {x.size()}')
        x = x.reshape(self.batch_size,64,13)

        return(x)
    
class BatchNormBiggerCNN(torch.nn.Module):
    def __init__(self, batch_size):
        super(BatchNormBiggerCNN, self).__init__()
        self.name = 'BatchNormBiggerCNN'
        self.batch_size=batch_size
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(negative_slope=0.1),
            nn.Dropout(p = 0.1))
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(p = 0.1))
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1),
            nn.ReLU())
        self.flatten = Flatten()
        self.fc1 = nn.Sequential(
            nn.Linear(64*19*19, 512),
            nn.ReLU(),
            nn.Dropout(p = 0.1))
        self.fc2 = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(p = 0.1))
        self.fc3 = nn.Sequential(
            nn.Linear(256, 13))

    def forward(self, x):
        x = x.reshape(self.batch_size*64,3,25,25)

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        
        x = self.flatten(x)
        
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)

#         print(f'xsize: {x.size()}')
        x = x.reshape(self.batch_size,64,13)

        return(x)

In [0]:
# from torchsummary import summary

# summary(net, input_size=(64, 32, 25, 25))

In [21]:
LOG_DIR = './logs'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

!if [ -f ngrok ] ; then echo "Ngrok already installed" ; else wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip > /dev/null 2>&1 && unzip ngrok-stable-linux-amd64.zip > /dev/null 2>&1 ; fi

get_ipython().system_raw('./ngrok http 6006 &')

! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print('Tensorboard Link: ' +str(json.load(sys.stdin)['tunnels'][0]['public_url']))"

Ngrok already installed
Tensorboard Link: http://aec27de6.ngrok.io


In [0]:
num_epochs = 3
log_freq=20

# 0.0005 best so far
learning_rates = [ 0.0004, 0.0002, 0.0007]

for learning_rate in learning_rates:
    
    net = BiggerCNN(batch_size=batch_size)

    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    log_dir = f'./logs/{net.name}_lr{learning_rate}'
    criterion = nn.CrossEntropyLoss().to(device)

    model = train_model(net,
                    log_dir,
                    train_loader,
                    criterion,
                    optimizer,
                    num_epochs, 
                    log_freq,
                    max_per_epoch=-1) # max per epoch is a debugging thing

# final_acc = test_model(model, criterion, test_loader)

4252158.0
Epoch 0
0: Training loss: 1.2905190806314348
0: Training accuracy: 0.9769230484962463
Epoch 1


NameError: ignored

In [0]:

# total_step = len(train_loader)
# num_epochs=10
# for epoch in range(num_epochs):
#     running_loss = 0
#     for step, (images, labels) in enumerate(train_loader):
#         images, labels = images.to(device), labels.long().to(device)
#         imshow(images[0].cpu())

        
# #         print(np.shape(labels))

#         _,class_labels = torch.max(labels,1) 
#         print(class_labels[0])

#         accuracy = (class_labels == class_labels).float().mean()

#         running_loss += 1


#     print_CPU_GPU_info(GPU.getGPUs())

#     print(f"{epoch}: Training loss: {running_loss/len(train_loader)}")
#     print(f"{epoch}: Training accuracy: {accuracy}")

In [0]:


class SimpleCNN(torch.nn.Module):
    
    #Our batch shape for input x is (3, 32, 32)
    
    def __init__(self, batch_size):
        super(SimpleCNN, self).__init__()
        self.batch_size=batch_size
        
        #Input channels = 3, output channels = 18
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=36, kernel_size=20, stride=1, padding=1),
            nn.BatchNorm2d(36),
            nn.MaxPool2d(kernel_size=4, stride=4, padding=0),
            nn.Dropout(p = 0.1),
            nn.ReLU())
        
        
        #4608 input features, 64 output features (see sizing flow below)
        # 200/2, 200/2
        self.fc1 = nn.Sequential(
            nn.Linear(44100, 64*13),
            nn.ReLU())
        
#         self.fc2 = nn.Sequential(
#             nn.Linear(64*26, 64*13),
#             nn.ReLU())
        
        self.fc3 = nn.Sequential(
            nn.Linear(64*13, 64*13),
            nn.ReLU())

        
    def forward(self, x):
        x = self.conv1(x)
        
        #Reshape data to input to the input layer of the neural net
        #-1 infers this dimension from the other given dimension
#         print(x.size())
        x = x.view(-1, 44100)
#         x = x.view(-1, 36*40*40)
    
#         print(x.size())
        
        x = self.fc1(x)
#         x = self.fc2(x)
        x = self.fc3(x)

        
        # Reshape data to output shape
        out = x.reshape(self.batch_size, 64, 13)

        return(out)
    
    


# class BetterCNN(torch.nn.Module):
    
#     #Our batch shape for input x is (3, 32, 32)
    
#     def __init__(self, batch_size):
#         super(BetterCNN, self).__init__()
#         self.batch_size=batch_size
        
#         #Input channels = 3, output channels = 18
        
#         self.conv1 = nn.Sequential(
#             nn.Conv2d(in_channels=1, out_channels=18, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(18),
#             nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
#             nn.Dropout(p = 0.1),
#             nn.ReLU())
#         self.conv2 = nn.Sequential(
#             nn.Conv2d(in_channels=18, out_channels=16, kernel_size=1, stride=3),
#             nn.BatchNorm2d(16),
#             nn.MaxPool2d(kernel_size=4, stride=4, padding=0),
#             nn.Dropout(p = 0.1))
        
#         #4608 input features, 64 output features (see sizing flow below)
#         # 200/2, 200/2
#         self.fc1 = nn.Sequential(
#             nn.Linear(32768, 1024),
#             nn.ReLU())
        
#         self.fc2 = nn.Sequential(
#             nn.Linear(1024, 64*13))

        
#     def forward(self, x):
#         x = F.relu(self.conv1(x))
#         x = F.relu(self.conv2(x))

        
#         #Reshape data to input to the input layer of the neural net
#         #-1 infers this dimension from the other given dimension
#         x = x.view(-1, 32768)
        
#         x = self.fc1(x)
#         x = self.fc2(x)
        
#         # Reshape data to output shape
#         out = x.reshape(self.batch_size, 64, 13)

#         return(out)

In [0]:
# class Net(nn.Module):
#     def __init__(self, batch_size):
#         super(Net, self).__init__()
#         self.batch_size = batch_size
#         self.fc1 = nn.Sequential(
#             nn.Linear(200*200*1, 4096),
#             nn.ReLU())
#         self.fc2 = nn.Sequential(
#             nn.Linear(4096, 2048),
#             nn.ReLU())
#         self.fc3 = nn.Sequential(
#             nn.Linear(2048, 1024),
#             nn.ReLU())
#         self.fc4 = nn.Sequential(
#             nn.Linear(1024, 64*13),
#             nn.ReLU())

#     def forward(self, x):
#         x = x.view(x.shape[0], -1)
#         out = self.fc1(x)
#         out = self.fc2(out)
#         out = self.fc3(out)
#         out = self.fc4(out)
#         out = out.reshape(1, 64, 13)
#         return out

# class TinyNet(nn.Module):
#     def __init__(self, batch_size):
#         super(TinyNet, self).__init__()
#         self.batch_size = batch_size
#         self.fc1 = nn.Sequential(
#             nn.Linear(200*200*1, 64*13),
#             nn.ReLU())

#     def forward(self, x):
#         x = x.view(x.shape[0], -1)
#         out = self.fc1(x)
#         out = out.reshape(self.batch_size, 64, 13)
#         return out

    

    
# class CNN_1(nn.Module):
#     def __init__(self, batch_size):
#         super(CNN_1, self).__init__()
#         self.batch_size = batch_size
#         self.conv1 = nn.Sequential(
#             nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1),
#             nn.BatchNorm2d(32),
#             nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
#             nn.Dropout(p = 0.1))
#         self.conv2 = nn.Sequential(
#             nn.Conv2d(in_channels=32, out_channels=8, kernel_size=1, stride=3),
#             nn.BatchNorm2d(8),
#             nn.MaxPool2d(kernel_size=12, stride=12, padding=0),
#             nn.Dropout(p = 0.1))
#         self.fc1 = nn.Sequential(
#             nn.Linear(1024, 64*13),
#             nn.ReLU(),
#             nn.Dropout(p = 0.1))
        

#     def forward(self, x):
#         x = x.view(x.shape[0], -1)
#         out = self.conv1(x)
#         out = self.conv2(out)
#         out = out.view(out.size(0), -1)
#         out = self.fc1(out)
#         out = out.reshape(self.batch_size, 64, 13)

#         return out

    


In [0]:
# # def load_datasets(directory='train_full/'):
# #     pathlist = Path(directory).glob('**/*.jpeg')
# #     labels = []
# #     images = []
# #     for path in pathlist:
# #         label = str(path).split(directory)[1].split(f'.')[0]
# #         label = onehot_from_fen(label)

# #         img = np.asarray(Image.open(str(path))).astype('uint8')
# #         labels.append(label)
# #         images.append(img)
        
# #     test_images, test_labels = (images, labels) # TODO
        
# #     return images, labels, test_images, test_labels
# # images, labels, test_images, test_labels = load_datasets()


# def load_batch(directory='train_full/', batch_size=32):
#     '''
#     Probably a better way to do this using something like this:
#     https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
    
#     This loads a single random batch from the training set.
#     '''
#     pathlist = list(Path(directory).glob('**/*.jpeg'))
#     labels = []
#     images = []
#     n_files = len(pathlist)
#     random_indicies = [randint(0, n_files) for _ in range(batch_size)]

#     for path in [pathlist[x] for x in random_indicies]:
#         label = str(path).split(directory)[1].split(f'.')[0]
#         label = onehot_from_fen(label)

#         img = np.asarray(Image.open(str(path))).astype('uint8')
#         labels.append(label)
#         images.append(img)
        
#     test_images, test_labels = (images, labels) # TODO
        
#     return images, labels, test_images, test_labels


# class CustomChessDataset(Dataset):
#     """Face Landmarks dataset."""

#     def __init__(self, transform=None, train=True):
# #         train_images, train_labels, test_images, test_labels = load_datasets()
#         self.transform = transform
#         self.to_tensor = transforms.ToTensor()
#         if train:
#             self.images = train_images
#             self.labels = np.asarray(train_labels)
            
#         else:
#             self.images = test_images
#             self.labels = np.asarray(test_labels)

#     def __len__(self):
#         return len(self.labels)

#     def __getitem__(self, idx):
#         i = self.images[idx]
#         img_as_img = Image.fromarray(i)
#         img_as_img = img_as_img.convert('L')
        
# #         _,class_labels = torch.max(self.to_tensor(self.labels[idx]).long().to(device),1) 

#         return (self.to_tensor(img_as_img), 
#                 self.labels[idx])


In [0]:
# %matplotlib inline

# # from matplotlib.pyplot import imshow
# # import matplotlib.pyplot as plt
# from draw_chess_boards import *

# renderer = DrawChessPosition(delimiter='-')
# fen = "r2q1rk1/pp2ppbp/1np2np1/2Q3B1/3PP1b1/2N2N2/PP3PPP/3RKB1R"
# fen = "rnbqkbnr-pppppppp-8-8-8-8-PPPPPPPP-RNBQKBNR"
# board = renderer.draw(fen)
# renderer.show(board)

In [0]:
# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Sequential(
#             nn.Conv2d(400*400*1, 32, 4),
#             nn.BatchNorm2d(32),
#             nn.MaxPool2d(2, 2),
#             nn.Dropout(p = 0.1))
#         self.conv2 = nn.Sequential(
#             nn.Conv2d(400*400*1, 1024, 24),
#             nn.BatchNorm2d(32),
#             nn.MaxPool2d(12, 12),
#             nn.Dropout(p = 0.1))
#         self.fc1 = nn.Sequential(
#             nn.Linear(1024, 64*52),
#             nn.ReLU())
#         self.fc2 = nn.Sequential(
#             nn.Linear(64*52, 64*52),
#             nn.ReLU())
#         self.fc3 = nn.Sequential(
#             nn.Linear(64*52, 64*26),
#             nn.ReLU())
#         self.fc4 = nn.Sequential(
#             nn.Linear(64*26, 64*13),
#             nn.ReLU())

#     def forward(self, x):
#         x = x.view(x.shape[0], -1)
#         out = self.conv1(x)
#         out = self.fc1(out)
#         out = self.fc2(out)
#         out = self.fc3(out)
#         out = self.fc4(out)
#         out = out.reshape(1, 64, 13)
# #         out = F.log_softmax(out, dim=2)

# #         out = F.log_softmax(self.fc6(out.reshape(1, 64, 13)), dim=1)

#         return out
    
    
# # class CNN(nn.Module):
# #     def __init__(self):
# #         super(CNN, self).__init__()

# #         self.conv1 = nn.Conv2d(3, 32, 4)
# #         self.bn1 = nn.BatchNorm2d(32)
# #         self.pool1 = nn.MaxPool2d(2, 2)
# #         self.dropout1 = nn.Dropout(p = 0.1)

# #         self.conv2 = nn.Conv2d(32, 64, 3)
# #         self.bn2 = nn.BatchNorm2d(64)
# #         self.pool2 = nn.MaxPool2d(2, 2)
# #         self.dropout2 = nn.Dropout(p = 0.2)

# #         self.conv3 = nn.Conv2d(64, 128, 2)
# #         self.bn3 = nn.BatchNorm2d(128)
# #         self.pool3 = nn.MaxPool2d(2, 2)
# #         self.dropout3 = nn.Dropout(p = 0.3)

# #         self.conv4 = nn.Conv2d(128, 256, 3)
# #         self.bn4 = nn.BatchNorm2d(256)
# #         self.pool4 = nn.MaxPool2d(2, 2)
# #         self.dropout4 = nn.Dropout(p = 0.4)

# #         self.fc1 = nn.Linear(256*12*12, 1000)
# #         self.dropout5 = nn.Dropout(p = 0.5)
# #         self.fc2 = nn.Linear(1000, 1000)
# #         self.dropout6 = nn.Dropout(p = 0.6)
# #         self.fc3 = nn.Linear(1000, 250)
# #         self.dropout7 = nn.Dropout(p = 0.7)
# #         self.fc4 = nn.Linear(250, 120)


# #     def forward(self, x):
# #         x = self.dropout1(self.pool1(F.relu(self.bn1(self.conv1(x)))))
# #         x = self.dropout2(self.pool2(F.relu(self.bn2(self.conv2(x)))))
# #         x = self.dropout3(self.pool3(F.relu(self.bn3(self.conv3(x)))))
# #         x = self.dropout4(self.pool4(F.relu(self.bn4(self.conv4(x)))))
# #         x = x.view(x.size(0), -1)
# #         x = self.dropout5(self.fc1(x))
# #         x = self.dropout6(self.fc2(x))
# #         x = self.dropout7(self.fc3(x))
# #         x = self.fc4(x)
# #         x = F.log_softmax(x, dim=1)
# #         return x



In [0]:
# def train(model, optimizer, loss_fn, dataloader, metrics, params):
#     """Train the model on `num_steps` batches

#     Args:
#         model: (torch.nn.Module) the neural network
#         optimizer: (torch.optim) optimizer for parameters of model
#         loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch
#         dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data
#         metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch
#         params: (Params) hyperparameters
#         num_steps: (int) number of batches to train on, each of size params.batch_size
#     """

#     # set model to training mode
#     model.train()

#     # summary for current training loop and a running average object for loss
#     summ = []
#     loss_avg = RunningAverage()

#     # Use tqdm for progress bar
#     with tqdm(total=len(dataloader)) as t:
#         for i, (train_batch, labels_batch) in enumerate(dataloader):
#             # move to GPU if available
#             if params.cuda:
#                 train_batch, labels_batch = train_batch.cuda(async=True), labels_batch.cuda(async=True)
#             # convert to torch Variables
#             train_batch, labels_batch = Variable(train_batch), Variable(labels_batch)

#             # compute model output and loss
#             output_batch = model(train_batch)
#             #logger.debug("train output_batch.shape = {}. labels_batch.shape = {}".format(output_batch.shape, labels_batch.shape))

#             # check if predictions are negative
#             logger.info("negative predictions: {}".format((output_batch < 0.0).any()))

#             # compute loss
#             loss = loss_fn(output_batch, labels_batch)
#             logger.debug("loss: {}".format(loss.data.item()))

#             # clear previous gradients, compute gradients of all variables wrt loss
#             optimizer.zero_grad()
#             loss.backward()

#             # performs updates using calculated gradients
#             optimizer.step()

#             # Evaluate summaries only once in a while
#             if i % params.save_summary_steps == 0:
#                 # extract data from torch Variable, move to cpu, convert to numpy arrays
#                 output_batch = output_batch.data.cpu().numpy()
#                 labels_batch = labels_batch.data.cpu().numpy()

#                 # compute all metrics on this batch
#                 summary_batch = {metric:metrics[metric](output_batch, labels_batch)
#                                  for metric in metrics}
#                 summary_batch['loss'] = loss.data.item()
#                 summ.append(summary_batch)

#             # update the average loss
#             loss_avg.update(loss.data.item())

#             t.set_postfix(loss='{:05.3f}'.format(loss_avg()))
#             t.update()

#     # compute mean of all metrics in summary
#     metrics_mean = {metric:np.mean([x[metric] for x in summ]) for metric in summ[0]}
#     metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items())
#     logger.info("- Train metrics: " + metrics_string)


# def train_and_evaluate(model, train_dataloader, val_dataloader, optimizer, loss_fn, metrics, params, model_dir,
#                        restore_file=None):
#     """Train the model and evaluate every epoch.

#     Args:
#         model: (torch.nn.Module) the neural network
#         train_dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data
#         val_dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches validation data
#         optimizer: (torch.optim) optimizer for parameters of model
#         loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch
#         metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch
#         params: (Params) hyperparameters
#         model_dir: (string) directory containing config, weights and log
#         restore_file: (string) optional- name of file to restore from (without its extension .pth.tar)
#     """

#     # reload weights from restore_file if specified
#     if restore_file is not None:
#         restore_path = os.path.join(model_dir, restore_file + '.pth.tar')
#         logger.info("Restoring parameters from {}".format(restore_path))
#         load_checkpoint(restore_path, model, optimizer)

#     best_val_acc = 0.0

#     for epoch in range(params.num_epochs):
#         # Run one epoch
#         logger.info("Epoch {}/{}".format(epoch + 1, params.num_epochs))

#         # compute number of batches in one epoch (one full pass over the training set)
#         train(model, optimizer, loss_fn, train_dataloader, metrics=metrics, params=params)

#         # Evaluate for one epoch on validation set
#         val_metrics = evaluate(model, loss_fn, val_dataloader, metrics=metrics, params=params)

#         # TODO: Fix TypeError: 'NoneType' object is not subscriptable
#         val_acc = val_metrics['accuracy']
#         is_best = val_acc>=best_val_acc

#         # Save weights
#         save_checkpoint({'epoch': epoch + 1,
#                          'state_dict': model.state_dict(),
#                          'optim_dict' : optimizer.state_dict()},
#                           is_best=is_best,
#                           checkpoint=model_dir)

#         # If best_eval, best_save_path
#         if is_best:
#             logger.info("- Found new best accuracy")
#             best_val_acc = val_acc

#             # Save best val metrics in a json file in the model directory
#             best_json_path = os.path.join(model_dir, "metrics_val_best_weights.json")
#             save_dict_to_json(val_metrics, best_json_path)

#         # Save latest val metrics in a json file in the model directory
#         last_json_path = os.path.join(model_dir, "metrics_val_last_weights.json")
#         save_dict_to_json(val_metrics, last_json_path)

# def main():
#     # print some log messages
#     logger.info("DSTL Satellite Imagery Feature Detection - Train U-Net Model")

#     # load parameters
#     # load parameters from configuration file
#     params = Params('experiment/unet_model/params_3ch.yaml', ParameterFileType.YAML, ctx=None)

#     # parameters
#     logger.debug("parameters: \n{}\n".format(pformat(params.dict)))

#     # use GPU if available
#     params.cuda = torch.cuda.is_available()

#     # Set the random seed for reproducible experiments
#     torch.manual_seed(230)
#     if params.cuda:
#         torch.cuda.manual_seed(230)

#     # dataset parameters, which includes download, input, output and mask generation parameters.
#     dataset_params = params.dataset
#     logger.debug("dataset parameters: \n{}\n".format(pformat(dataset_params)))

#     # dataset
#     logger.info("loading datasets...")
#     train_set = DSTLSIFDDataset(dataset_params=dataset_params,
#                                 mode='train',
#                                 transform=True,
#                                 transform_mask=None,
#                                 download=False)

#     dev_set   = DSTLSIFDDataset(dataset_params=dataset_params,
#                                 mode='dev',
#                                 transform=True,
#                                 transform_mask=None,
#                                 download=False)

#     # dataloader
#     logger.debug("train dataloader, batch size: {}, num workers: {}, cuda: {}".format(
#         params.train['batch_size'],
#         params.train['num_workers'],
#         params.cuda));

#     train_dl = DataLoader(dataset=train_set,
#                           batch_size=params.train['batch_size'],
#                           shuffle=True,
#                           num_workers=params.train['num_workers'],
#                           pin_memory=params.cuda)

#     logger.debug("dev dataloader, batch size: {}, num workers: {}, cuda: {}".format(
#         params.valid['batch_size'],
#         params.valid['num_workers'],
#         params.cuda));

#     valid_dl = DataLoader(dataset=dev_set,
#                           batch_size=params.valid['batch_size'],
#                           shuffle=True,
#                           num_workers=params.valid['num_workers'],
#                           pin_memory=params.cuda)

#     logger.info("- done.")

#     # define the model and optimizer
#     #model = UNet()
#     model = UNet().cuda() if params.cuda else UNet()
#     logger.info("using adam optimized with lr = {}".format(float(params.learning_rate)))
#     optimizer = optim.Adam(model.parameters(), lr=float(params.learning_rate))

#     # loss function
#     loss_fn = multi_class_cross_entropy_loss  # nn.MSELoss()  # nn.L1Loss() # nn.CrossEntropyLoss()

#     # maintain all metrics required in this dictionary- these are used in the training and evaluation loops
#     metrics = {
#         'accuracy': accuracy,
#         # could add more metrics such as accuracy for each token type
#     }

#     # train the model
#     logger.info("Starting training for {} epoch(s)".format(params.num_epochs))

#     data_dir = "data/"
#     model_dir = "experiment/unet_model"

#     train_and_evaluate(model=model,
#                        train_dataloader=train_dl,
#                        val_dataloader=valid_dl,
#                        optimizer=optimizer,
#                        loss_fn=loss_fn,
#                        metrics=metrics,
#                        params=params,
#                        model_dir=data_dir,
#                        restore_file=None)


# if __name__ == '__main__':
#     main()

In [0]:
# # Adapted from here: https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py

# import torch.utils.data as data

# from PIL import Image

# import os
# import os.path
# import sys


# def has_file_allowed_extension(filename, extensions):
#     """Checks if a file is an allowed extension.
#     Args:
#         filename (string): path to a file
#         extensions (iterable of strings): extensions to consider (lowercase)
#     Returns:
#         bool: True if the filename ends with one of given extensions
#     """
#     filename_lower = filename.lower()
#     return any(filename_lower.endswith(ext) for ext in extensions)


# def is_image_file(filename):
#     """Checks if a file is an allowed image extension.
#     Args:
#         filename (string): path to a file
#     Returns:
#         bool: True if the filename ends with a known image extension
#     """
#     return has_file_allowed_extension(filename, IMG_EXTENSIONS)


# def make_dataset(dir, class_to_idx, extensions):
#     images = []
#     dir = os.path.expanduser(dir)
#     for target in sorted(class_to_idx.keys()):
#         d = os.path.join(dir, target)
#         if not os.path.isdir(d):
#             continue

#         for root, _, fnames in sorted(os.walk(d)):
#             for fname in sorted(fnames):
#                 if has_file_allowed_extension(fname, extensions):
#                     path = os.path.join(root, fname)
#                     item = (path, class_to_idx[target])
#                     images.append(item)

#     return images


# class DatasetFolder(data.Dataset):
#     """A generic data loader where the samples are arranged in this way: ::
#         root/class_x/xxx.ext
#         root/class_x/xxy.ext
#         root/class_x/xxz.ext
#         root/class_y/123.ext
#         root/class_y/nsdf3.ext
#         root/class_y/asd932_.ext
#     Args:
#         root (string): Root directory path.
#         loader (callable): A function to load a sample given its path.
#         extensions (list[string]): A list of allowed extensions.
#         transform (callable, optional): A function/transform that takes in
#             a sample and returns a transformed version.
#             E.g, ``transforms.RandomCrop`` for images.
#         target_transform (callable, optional): A function/transform that takes
#             in the target and transforms it.
#      Attributes:
#         classes (list): List of the class names.
#         class_to_idx (dict): Dict with items (class_name, class_index).
#         samples (list): List of (sample path, class_index) tuples
#         targets (list): The class_index value for each image in the dataset
#     """

#     def __init__(self, root, loader, extensions, transform=None, target_transform=None):
#         classes, class_to_idx = self._find_classes(root)
#         samples = make_dataset(root, class_to_idx, extensions)
#         if len(samples) == 0:
#             raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n"
#                                "Supported extensions are: " + ",".join(extensions)))

#         self.root = root
#         self.loader = loader
#         self.extensions = extensions

#         self.classes = classes
#         self.class_to_idx = class_to_idx
#         self.samples = samples
#         self.targets = [s[1] for s in samples]

#         self.transform = transform
#         self.target_transform = target_transform

#     def _find_classes(self, dir):
#         """
#         Finds the class folders in a dataset.
#         Args:
#             dir (string): Root directory path.
#         Returns:
#             tuple: (classes, class_to_idx) where classes are relative to (dir), and class_to_idx is a dictionary.
#         Ensures:
#             No class is a subdirectory of another.
#         """
#         if sys.version_info >= (3, 5):
#             # Faster and available in Python 3.5 and above
#             classes = [d.name.split('.jpeg')[0] for d in os.scandir(dir) if not d.is_dir()]
#         else:
#             classes = [d.name.split('.jpeg')[0] for d in os.listdir(dir) if not os.path.isdir(os.path.join(dir, d))]
#         classes.sort()
#         class_to_idx = {classes[i]: i for i in range(len(classes))}
#         return classes, class_to_idx

#     def __getitem__(self, index):
#         """
#         Args:
#             index (int): Index
#         Returns:
#             tuple: (sample, target) where target is class_index of the target class.
#         """
#         path, target = self.samples[index]
#         sample = self.loader(path)
#         if self.transform is not None:
#             sample = self.transform(sample)
#         if self.target_transform is not None:
#             target = self.target_transform(target)

#         return sample, target

#     def __len__(self):
#         return len(self.samples)

#     def __repr__(self):
#         fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
#         fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
#         fmt_str += '    Root Location: {}\n'.format(self.root)
#         tmp = '    Transforms (if any): '
#         fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
#         tmp = '    Target Transforms (if any): '
#         fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
#         return fmt_str


# IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', 'webp']


# def pil_loader(path):
#     # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
#     with open(path, 'rb') as f:
#         img = Image.open(f)
#         return img.convert('RGB')


# def accimage_loader(path):
#     import accimage
#     try:
#         return accimage.Image(path)
#     except IOError:
#         # Potentially a decoding problem, fall back to PIL.Image
#         return pil_loader(path)


# def default_loader(path):
#     from torchvision import get_image_backend
#     if get_image_backend() == 'accimage':
#         return accimage_loader(path)
#     else:
#         return pil_loader(path)


# class ImageFolder(DatasetFolder):
#     """A generic data loader where the images are arranged in this way: ::
#         root/dog/xxx.png
#         root/dog/xxy.png
#         root/dog/xxz.png
#         root/cat/123.png
#         root/cat/nsdf3.png
#         root/cat/asd932_.png
#     Args:
#         root (string): Root directory path.
#         transform (callable, optional): A function/transform that  takes in an PIL image
#             and returns a transformed version. E.g, ``transforms.RandomCrop``
#         target_transform (callable, optional): A function/transform that takes in the
#             target and transforms it.
#         loader (callable, optional): A function to load an image given its path.
#      Attributes:
#         classes (list): List of the class names.
#         class_to_idx (dict): Dict with items (class_name, class_index).
#         imgs (list): List of (image path, class_index) tuples
#     """
#     def __init__(self, root, transform=None, target_transform=None,
#                  loader=default_loader):
#         super(ImageFolder, self).__init__(root, loader, IMG_EXTENSIONS,
#                                           transform=transform,
#                                           target_transform=target_transform)
#         self.imgs = self.samples
