In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
from utils import *
from fastai.conv_learner import *
from torch.nn.init import xavier_normal

In [3]:
path = 'data'
get_mnist(path)

In [4]:
# Read in data
trn_x = np.load(os.path.join(path, 'mnist', 'train_x.npy'))
trn_y = np.load(os.path.join(path, 'mnist', 'train_y.npy'))

test_x = np.load(os.path.join(path, 'mnist', 'test_x.npy'))
test_y = np.load(os.path.join(path, 'mnist', 'test_y.npy'))

In [5]:
# Prepare val set
n = len(trn_x)
trn_idxs = np.arange(n)
np.random.shuffle(trn_idxs)

val_x = trn_x[trn_idxs[:int(0.2 * n)]]
val_y = trn_y[trn_idxs[:int(0.2 * n)]]

trn_x = trn_x[trn_idxs[int(0.2 * n):]]
trn_y = trn_y[trn_idxs[int(0.2 * n):]]

In [6]:
avg = np.mean(trn_x)
std = np.std(trn_x)

In [7]:
trn_x = (trn_x - avg) / std
val_x = (val_x - avg) / std
test_x = (test_x - avg) / std

In [8]:
data = ImageClassifierData.from_arrays(os.path.join(path, 'mnist'), (trn_x, trn_y), (val_x, val_y), bs=64)

In [9]:
class FCNet(nn.Module):
    def __init__(self, ps, activation=F.relu):
        super().__init__()
        self.ps = ps
        self.activation = activation
        
        self.layers = nn.ModuleList([
            nn.Linear(784, 1024),
            nn.Linear(1024, 1024),
        ])
        for l in self.layers:
            xavier_normal(l.weight.data)
        
        self.l_out = nn.Linear(1024, 10)
        xavier_normal(self.l_out.weight.data)

        
    def forward(self, x):
        x = x.view(-1, 784)
        
        x = F.dropout(x, self.ps[0])
        for l, p in zip(self.layers, self.ps[1:]):
            x = l(x)
            x = self.activation(x)
            x = F.dropout(x, p)
        return F.log_softmax(F.dropout(self.l_out(x),0.05))

In [10]:
m = FCNet((0.75, 0.5, 0.15))
learn = ConvLearner.from_model_data(m, data, opt_fn=optim.RMSprop)

In [11]:
%%time
learn.fit(1e-3, 4, cycle_len=1, cycle_mult=2)

epoch      trn_loss   val_loss   accuracy         
    0      0.108203   0.083719   0.974235  
    1      0.086218   0.082675   0.973986         
    2      0.034079   0.055759   0.982048         
    3      0.073335   0.100408   0.972739         
    4      0.038372   0.070023   0.981383         
    5      0.011027   0.063617   0.983793         
    6      0.002493   0.062457   0.984375         
    7      0.065316   0.11281    0.974069         
    8      0.039693   0.098724   0.977394         
    9      0.023218   0.095109   0.981051         
    10     0.009109   0.108155   0.981549         
    11     0.004278   0.101964   0.982463         
    12     0.000806   0.097023   0.985705         
    13     0.000166   0.09941    0.986785         
    14     1.5e-05    0.101724   0.987035         

CPU times: user 36.8 s, sys: 13.8 s, total: 50.6 s
Wall time: 32 s


[0.1017239, 0.98703457446808507]

In [12]:
def RMSprop_Momentum(momentum):
    return lambda *args, **kwargs: optim.SGD(*args, momentum=momentum, **kwargs)

In [13]:
m = FCNet((0.1, 0.25, 0.25), F.elu)
learn = ConvLearner.from_model_data(m, data, opt_fn=RMSprop_Momentum(0.99))

In [14]:
%%time
learn.fit(3e-3, 4, cycle_len=1, cycle_mult=2)

epoch      trn_loss   val_loss   accuracy         
    0      0.203993   0.176433   0.950133  
    1      0.254044   0.192983   0.952876         
    2      0.072411   0.106379   0.974235         
    3      0.174421   0.240243   0.962766         
    4      0.133116   0.166678   0.969415         
    5      0.031721   0.100086   0.979471         
    6      0.020055   0.096397   0.980552         
    7      0.111623   0.230344   0.969498         
    8      0.116134   0.229525   0.968418         
    9      0.085529   0.193438   0.974152         
    10     0.031725   0.158465   0.980967         
    11     0.005764   0.145115   0.98238          
    12     0.001006   0.138001   0.983544         
    13     0.00063    0.138406   0.983627         
    14     0.000585   0.138398   0.983544         

CPU times: user 36.3 s, sys: 13 s, total: 49.3 s
Wall time: 30.7 s


[0.13839807, 0.98354388297872342]