[View in Colaboratory](https://colab.research.google.com/github/kywch/deep-learning-practice/blob/master/fastai_lesson7.ipynb)

## References

For tips related colab, see https://medium.com/deep-learning-turkey/google-colab-free-gpu-tutorial-e113627b9f5d

For setting up colab to use pytorch and fastai, see https://towardsdatascience.com/fast-ai-lesson-1-on-google-colab-free-gpu-d2af89f53604

The below codes are taken from http://course.fast.ai/lessons/lesson7.html, http://forums.fast.ai/t/wiki-lesson-7/9405 

# Setting up the playground


## To make sure if notebook is running on GPU background

In [0]:
import tensorflow as tf
tf.test.gpu_device_name()

In [0]:
!/opt/bin/nvidia-smi

## Installing pytorch

In [0]:
import sys
print (sys.version)

In [0]:
#!pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl && pip install torchvision
#!pip install http://download.pytorch.org/whl/cu75/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl  && pip install torchvision
!pip3 install torch torchvision

### Workaround for an issue with PIL
For *"AttributeError: module ‘PIL.Image’ has no attribute ‘register_extensions"*, refer this [link](http://forums.fast.ai/t/attributeerror-module-pil-image-has-no-attribute-register-extensions/10689/3) for information related to this issue and workaround.

In [0]:
!pip install Pillow==4.0.0
!pip install PIL
!pip install image

## Installing fastai

In [0]:
!pip install https://github.com/fastai/fastai/archive/master.zip

In [0]:
import fastai


# Download the dataset - CIFAR 10

In [0]:
from fastai import io
import os
import tarfile
import shutil
def download_cifar10(data_path):
    # (AS) TODO: put this into the fastai library
    def untar_file(file_path, save_path):
        if file_path.endswith('.tar.gz') or file_path.endswith('.tgz'):
            obj = tarfile.open(file_path)
            obj.extractall(save_path)
            obj.close()
            os.remove(file_path)

    cifar_url = 'http://files.fast.ai/data/cifar10.tgz' # faster download
    # cifar_url = 'http://pjreddie.com/media/files/cifar.tgz'
    io.get_data(cifar_url, data_path+'/cifar10.tgz')
    untar_file(data_path+'/cifar10.tgz', data_path)
    # Loader expects train and test folders to be outside of cifar10 folder
    shutil.move(data_path+'/cifar10/train', data_path)
    shutil.move(data_path+'/cifar10/test', data_path)


In [0]:
data_dir = 'cifar10_data'
!mkdir {data_dir}
download_cifar10(data_dir)

In [0]:
# make sure the data is here
!ls {data_dir}/train

In [0]:
files = !ls {data_dir}/train/airplane  | head
files

# Looking at the dataset

In [0]:
# just run these in every notebook
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [0]:
# getting the image stats from torchvision

import torchvision.datasets as dset
import torchvision.transforms as transforms

tmp_data = dset.CIFAR10(root='cifar', train=True, download=True,
                    transform=transforms.ToTensor()).train_data
tmp_data = tmp_data.astype(np.float32)/255.

print("data size: {}".format(tmp_data.shape))

means = []
stdevs = []
for i in range(3):
    pixels = tmp_data[:,:,:,i].ravel()
    means.append(np.mean(pixels))
    stdevs.append(np.std(pixels))

print("means: {}".format(means))
print("stdevs: {}".format(stdevs))

stats = ( np.mean(pixels), np.std(pixels))

Image stats should come from all images in the training folder. 

For each RGB, get mean and std: e.g., stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

In [0]:
#classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
classes = !ls {data_dir}/train
classes

In [0]:
os.path.join(data_dir, 'train', '*.png')

In [0]:
# load all images from the training folder
import os
import glob
from PIL import Image, ImageStat

image_list = []

for filename in glob.iglob( os.path.join(data_dir, 'train', '*', '*.png'),  recursive=True):
  im = Image.open(filename)
  image_list.append(im)

len(image_list)

In [0]:
plt.imshow(image_list[0])

In [0]:
from fastai.learner import *

def get_data(image_size, batch_size):
    tfms = tfms_from_stats(stats, image_size, aug_tfms=[RandomFlip()], pad=image_size//8)
    return ImageClassifierData.from_paths(data_dir, val_name='test', tfms=tfms, bs=batch_size)

In [0]:
# example of get_data with different batch sizes
data = get_data(32, 4)
[data, len(data.trn_dl), len(data.val_dl)]

# The simplest fully connected model

In [0]:
batch_size = 256

data = get_data(32,batch_size)
[data, len(data.trn_dl), len(data.val_dl)]

In [0]:
class SimpleNet(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = nn.ModuleList([
            nn.Linear(layers[i], layers[i + 1]) for i in range(len(layers) - 1)])
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        for l in self.layers:
            l_x = l(x)
            x = F.relu(l_x)
        return F.log_softmax(l_x, dim=-1)

In [0]:
# Learner is from learner.py
# create a simple net with..
#  -- input layer: 32 x 32 px x 3 RGB channels, flattened
#  -- fully connected hidden layer, from input to 40 units
#  -- fully connected output layer, from 40 units to 10

learn = Learner.from_model_data(SimpleNet([32*32*3, 40,10]), data)
learn, [o.numel() for o in learn.model.parameters()]

In [0]:
#torch.set_default_tensor_type('torch.cuda.FloatTensor')

learn.summary()

In [0]:
# Bumped into a problem when trying to run learn.lr_find() and learn.fit()
# SEE ISSUE https://github.com/pytorch/pytorch/issues/6874 
# for 'RuntimeError: randperm is only implemented for CPU'


learn.lr_find()

%time learn.fit(.01, 2)
