In [40]:
import chainer
import chainer.functions as F
import chainer.links as L
import numba
from numba import jit
from chainer import training
from chainer.training import extensions
from chainer.datasets import TupleDataset
from chainer.dataset import DatasetMixin
import pandas as pd 
import PIL 
from PIL import Image 

In [25]:
def source_image_data(data_file, label_col, image_col, remove_nas): 
    
    if remove_nas:
        data_file = data_file.dropna(subset=[image_col, label_col])  # remove missing data for now 
    
    image_list = data_file[image_col].tolist()
    label_list = data_file[label_col].tolist()
    
    return image_list, label_list 


In [26]:
def make_paths(path_name, image_list): 
    
    return [path_name + x for x in image_list]

In [31]:
def format_for_chainer(images, labels): 

    return TupleDataset(images, labels)

In [32]:
# hardcoded garbage that gets the csv and the appropriate labels/image data 
data_file = pd.read_csv("/home/ccaggian/bachelor_data/bachelor_females_images.csv") 
image_folder_path = "/home/ccaggian/bachelor_data/images/"

label_col = "ElimWeek"  # column with labels (week eliminated)
image_col = "image"  # column with data (jpegs)

images, labels = source_image_data(data_file, label_col, image_col, True)  # get appropriate data in list for chainer, remove nans

images = make_paths(image_folder_path, images)  # give appropriate absolute path of image data 

train_data = format_for_chainer(images, labels)
train_data

<chainer.datasets.tuple_dataset.TupleDataset at 0x7fdcaa0d4fd0>

In [33]:
class BachelorData(DatasetMixin):

    def __init__(self, train=True):
        # TODO: FILL IN HERE.
    
        if train:
            self.data = train_data
        else:
            self.data = test_data
        self.train = train
        self.random_crop = 4

    def __len__(self):
        return len(self.data)

    def get_example(self, i):
        # NOTE: THIS SHOULD BE FINE TO KEEP FOR RIGHT NOW, PERHAPS REMOVE THE TRANPOSE AND AUGMENTATION STEPS
        
        x, t = self.data[i]
        if self.train:
            x = x.transpose(1, 2, 0)
            h, w, _ = x.shape
            x_offset = np.random.randint(self.random_crop)
            y_offset = np.random.randint(self.random_crop)
            x = x[y_offset:y_offset + h - self.random_crop,
                  x_offset:x_offset + w - self.random_crop]
            if np.random.rand() > 0.5:
                x = np.fliplr(x)
            x = x.transpose(2, 0, 1)
        return x, t

In [34]:
class ConvBlock(chainer.Chain):
    
    def __init__(self, n_ch, pool_drop=False):
        w = chainer.initializers.HeNormal()
        super(ConvBlock, self).__init__()
        with self.init_scope():
            self.conv=L.Convolution2D(None, n_ch, 3, 1, 1, nobias=True, initialW=w)
            self.bn=L.BatchNormalization(n_ch)
        
        self.pool_drop = pool_drop
        
    def __call__(self, x):
        h = F.relu(self.bn(self.conv(x)))
        if self.pool_drop:
            h = F.max_pooling_2d(h, 2, 2)
            h = F.dropout(h, ratio=0.25)
        return h
    
class LinearBlock(chainer.Chain):
    
    def __init__(self):
        w = chainer.initializers.HeNormal()
        super(LinearBlock, self).__init__()
        with self.init_scope():
            self.fc=L.Linear(None, 1024, initialW=w)
        
    def __call__(self, x):
        return F.dropout(F.relu(self.fc(x)), ratio=0.5)

In [35]:
class DeepCNN(chainer.ChainList):

    def __init__(self, n_output):
        super(DeepCNN, self).__init__(
            ConvBlock(64),
            ConvBlock(64, True),
            ConvBlock(128),
            ConvBlock(128, True),
            ConvBlock(256),
            ConvBlock(256),
            ConvBlock(256),
            ConvBlock(256, True),
            LinearBlock(),
            LinearBlock(),
            L.Linear(None, n_output)
        )
        

    def __call__(self, x):
        for f in self.children():
            x = f(x)
        return x

In [42]:
model = L.Classifier(DeepCNN(1))
    
# Setup an optimizer
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)
    
# LOAD OUR DATASET HERE INSTEAD OF MNIST
train_iter = chainer.iterators.SerialIterator(train_data, batch_size=100)
# test_iter = chainer.iterators.SerialIterator(test, batch_size=100, repeat=False, shuffle=False)
    
# Set up a trainer
updater = training.StandardUpdater(train_iter, optimizer, device=-1)
trainer = training.Trainer(updater, (5, 'epoch'), out='result')
    
# trainer.extend(extensions.Evaluator(test_iter, model, device=-1))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy']))
#trainer.extend(extensions.ProgressBar())
trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png'))
trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png'))
    
# Run the training
trainer.run()


  $ pip install matplotlib

Exception in main training loop: string indices must be integers
Traceback (most recent call last):
  File "/home/ccaggian/miniconda2/envs/py36/lib/python3.6/site-packages/chainer/training/trainer.py", line 306, in run
    update()
  File "/home/ccaggian/miniconda2/envs/py36/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 149, in update
    self.update_core()
  File "/home/ccaggian/miniconda2/envs/py36/lib/python3.6/site-packages/chainer/training/updaters/standard_updater.py", line 154, in update_core
    in_arrays = self.converter(batch, self.device)
  File "/home/ccaggian/miniconda2/envs/py36/lib/python3.6/site-packages/chainer/dataset/convert.py", line 133, in concat_examples
    [example[i] for example in batch], padding[i])))
  File "/home/ccaggian/miniconda2/envs/py36/lib/python3.6/site-packages/chainer/dataset/convert.py", line 163, in _concat_arrays
    return xp.concatenate([array[None] for array in arrays])
  File 

TypeError: string indices must be integers