Initialize shallow feedforward fully-connected network with V1 type weights and classify __subset of mnist__ dataset using __Gradient descent__. The partial dataset will have 5 samples/class.

In [18]:
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from estimator import classical_weights, V1_inspired_weights
from data_fns import load_mnist
from sklearn.model_selection import train_test_split
import pickle

In [19]:
torch.cuda.set_device(1)
train, train_labels, test, test_labels = load_mnist('./data/mnist/')

num_train = 50
train_few, _, train_labels_few, _ = train_test_split(train, train_labels, train_size=num_train, stratify=train_labels, 
                                          random_state=42)


X_train = torch.from_numpy(train_few).float().to('cuda')
X_test = torch.from_numpy(test).float().to('cuda')
y_train = torch.from_numpy(train_labels_few).long().to('cuda')
y_test = torch.from_numpy(test_labels).long().to('cuda')
n, d = X_train.shape

In [20]:
class V1_net(nn.Module):
    def __init__(self, hidden_size, scale):
        super().__init__()
        self.fc1 = nn.Linear(d, hidden_size)
        self.fc1.weight.data = torch.FloatTensor(V1_inspired_weights(hidden_size, d, t=5, l=2, scale=scale))
        self.output = nn.Linear(hidden_size, 10)
        
    def forward(self, inputs):
        x = torch.relu(self.fc1(inputs))
        return self.output(x)
    
class He_net(nn.Module):
    def __init__(self, hidden_size, scale):
        super().__init__()
        self.fc1 = nn.Linear(d, hidden_size)
        torch.nn.init.kaiming_normal_(self.fc1.weight)
        self.output = nn.Linear(hidden_size, 10)
        
    def forward(self, inputs):
        x = torch.relu(self.fc1(inputs))
        return self.output(x)
    
class RF_net(nn.Module):
    def __init__(self, hidden_size, scale):
        super().__init__()
        self.fc1 = nn.Linear(d, hidden_size)
        self.fc1.weight.data = torch.FloatTensor(classical_weights(hidden_size, d, scale=scale))
        self.output = nn.Linear(hidden_size, 10)
        
    def forward(self, inputs):
        x = torch.relu(self.fc1(inputs))
        return self.output(x)

In [21]:
def predict(model, X):
    return model(X).data.max(1)[1]

def error(model, X, y):
    y_pred = predict(model, X)
    accuracy = 1.0 * torch.sum(y_pred == y) / len(y)
    return 1 - accuracy

In [22]:
n_trials, n_epochs = 10, 7001
models = {'V1': V1_net, 'He': He_net}
h_list = [50, 100, 400, 1000]
lr_list = [1e-3, 1e-2, 1e-1, 1e0]

train_err = {m: {h: {lr: np.zeros((n_trials, n_epochs)) for lr in lr_list} for h in h_list} for m in models.keys()}
test_err = {m: {h: {lr: np.zeros((n_trials, n_epochs)) for lr in lr_list} for h in h_list} for m in models.keys()}
loss_list = {m: {h: {lr: np.zeros((n_trials, n_epochs)) for lr in lr_list} for h in h_list} for m in models.keys()}

In [None]:
scale = 2/d
t, l = 5, 2
loss_func = nn.CrossEntropyLoss()

for h in h_list:
    for lr in lr_list:
        for m, network in models.items():
            for i in range(n_trials):
                model = network(h, scale).to('cuda')
                optim = torch.optim.SGD(model.parameters(), lr=lr)
                for j in range(n_epochs):
                    optim.zero_grad()
                    loss = loss_func(model(X_train), y_train)
                    loss.backward()
                    optim.step()

                    train_err[m][h][lr][i, j] = error(model, X_train, y_train)
                    test_err[m][h][lr][i, j] = error(model, X_test, y_test)
                    loss_list[m][h][lr][i, j] = loss.data

                    if (j % 1000 == 0):
                        print('Trial %d, Epoch: %d, %s model, h=%d, lr=%0.5f, Loss=%0.5f, test err=%0.3f' % (i,j, m, h, lr, loss.data, 
                                                                                                  test_err[m][h][lr][i, j]))
results = {'test_err': test_err, 'train_err': train_err, 'loss': loss_list}
with open('results/initialize_mnist/fewshot_data/clf_t=%0.2f_l=%0.2f.pickle' % (t, l), 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

Trial 0, Epoch: 0, V1 model, h=50, lr=0.00100, Loss=2.54700, test err=0.906
Trial 0, Epoch: 1000, V1 model, h=50, lr=0.00100, Loss=1.55330, test err=0.523
Trial 0, Epoch: 2000, V1 model, h=50, lr=0.00100, Loss=1.00266, test err=0.406
Trial 0, Epoch: 3000, V1 model, h=50, lr=0.00100, Loss=0.65458, test err=0.374
Trial 0, Epoch: 4000, V1 model, h=50, lr=0.00100, Loss=0.44057, test err=0.359
Trial 0, Epoch: 5000, V1 model, h=50, lr=0.00100, Loss=0.30759, test err=0.350
Trial 0, Epoch: 6000, V1 model, h=50, lr=0.00100, Loss=0.22350, test err=0.346
Trial 0, Epoch: 7000, V1 model, h=50, lr=0.00100, Loss=0.16893, test err=0.342
Trial 1, Epoch: 0, V1 model, h=50, lr=0.00100, Loss=2.46790, test err=0.918
Trial 1, Epoch: 1000, V1 model, h=50, lr=0.00100, Loss=1.43175, test err=0.495
Trial 1, Epoch: 2000, V1 model, h=50, lr=0.00100, Loss=0.91704, test err=0.431
Trial 1, Epoch: 3000, V1 model, h=50, lr=0.00100, Loss=0.60955, test err=0.396
Trial 1, Epoch: 4000, V1 model, h=50, lr=0.00100, Loss=0.4

## plot results

In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt

t, l, n_epochs= 5, 2, 7001
models = ['V1', 'He']
h_list = [50, 100, 400, 1000]
lr_list = [1e-3, 1e-2, 1e-1, 1e0]

for h in h_list:
    for lr in lr_list:
        with open('results/initialize_mnist/fewshot_data/clf_t=%0.2f_l=%0.2f.pickle' % (t, l), 'rb') as handle:
            sims = pickle.load(handle)

        fig = plt.figure(figsize=(12, 5))
        plt.suptitle(r'Shallow FFW FC net w/ GD. h=%d, lr=%0.4f, '% (h, lr), fontsize=16)
        
        # loss
        ax = fig.add_subplot(131)
        plt.title('Network loss', fontsize=16)
        for m in models:
            avg_loss = np.mean(sims['loss'][m][h][lr], axis=0)
            std_loss = np.std(sims['loss'][m][h][lr], axis=0)                       
            plt.plot(np.arange(n_epochs), avg_loss, label=m, lw=3)
            plt.fill_between(np.arange(n_epochs), avg_loss - std_loss, avg_loss + std_loss, alpha=0.2)
        plt.xlabel('Epoch', fontsize=20)
        plt.ylabel('Training loss', fontsize=20)
        ax.tick_params(axis = 'both', which = 'major', labelsize = 14, width=2, length=6)
        plt.yscale('log')
        plt.legend(fontsize=18)
        
        # train err
        ax = fig.add_subplot(132)
        plt.title('Train error', fontsize=16)
        for m in models:
            avg_train_err = np.mean(sims['train_err'][m][h][lr], axis=0)
            std_train_err = np.std(sims['train_err'][m][h][lr], axis=0)
            plt.plot(np.arange(n_epochs), avg_train_err, label=m, lw=3)
            plt.fill_between(np.arange(n_epochs), avg_train_err - std_train_err, avg_train_err + std_train_err,  alpha=0.2)
        
        plt.xlabel('Epoch', fontsize=20)
        plt.ylabel('Training error', fontsize=20)
        ax.tick_params(axis = 'both', which = 'major', labelsize = 14, width=2, length=6)
        plt.yticks(np.arange(0, 1, 0.2))
        plt.yscale('log')
        plt.legend(fontsize=18)

        # test err
        ax = fig.add_subplot(133)
        plt.title('Test error', fontsize=16)
        for m in models:
            avg_test_err = np.mean(sims['test_err'][m][h][lr], axis=0)
            std_test_err = np.std(sims['test_err'][m][h][lr], axis=0)
            plt.plot(np.arange(n_epochs), avg_test_err, label=m, lw=3)
            plt.fill_between(np.arange(n_epochs), avg_test_err - std_test_err, avg_test_err + std_test_err, alpha=0.2)

        plt.xlabel('Epoch', fontsize=20)
        plt.ylabel('Test error', fontsize=20)
        ax.tick_params(axis = 'both', which = 'major', labelsize = 14, width=2, length=6)
        plt.yticks(np.arange(0, 1, 0.2))
        plt.yscale('log')
        plt.legend(fontsize=18)

        plt.tight_layout()
        plt.subplots_adjust(top=0.8)    

        print(h, lr)
        plt.savefig('results/initialize_mnist/fewshot_data/init_t=%0.2f_l=%0.2f_h=%d_lr=%0.4f.png' % (t, l, h, lr))
        plt.close()

50 0.001
50 0.01
50 0.1
50 1.0
100 0.001
100 0.01
100 0.1
100 1.0
400 0.001
400 0.01
400 0.1
400 1.0
1000 0.001
1000 0.01
1000 0.1
1000 1.0


In [17]:
!nvidia-smi

Fri Oct 23 07:07:47 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.100      Driver Version: 440.100      CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN Xp            Off  | 00000000:01:00.0 Off |                  N/A |
| 53%   83C    P2   177W / 250W |    891MiB / 12196MiB |     98%      Default |
+-------------------------------+----------------------+----------------------+
|   1  TITAN Xp            Off  | 00000000:02:00.0 Off |                  N/A |
| 53%   83C    P2   247W / 250W |   1929MiB / 12196MiB |     98%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-------

In [14]:
np.random.multivariate_normal((5, 3), np.eye(2))

array([5.1084552 , 3.62522326])

In [16]:
np.random.uniform(2, 8, (4, 2))

array([[2.37671816, 4.76704288],
       [6.09796859, 7.535352  ],
       [3.59056167, 6.16788591],
       [4.89921963, 7.49935599]])