In [1]:
#imports
%matplotlib inline
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch
from sklearn.datasets import load_digits
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.utils.data as torch_data
import sklearn
from sklearn.metrics import accuracy_score

In [2]:
# fetch the dataset.
digits, targets = load_digits(return_X_y=True)
digits = digits.astype(np.float32) / 255

digits_train, digits_test, targets_train, targets_test = train_test_split(digits, targets, random_state=0)

train_size = digits_train.shape[0]

input_size = 8*8
classes_n = 10

In [3]:
class MNISTData(torch_data.Dataset):
    def __init__(self, X, y):
        super(MNISTData, self).__init__()
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return (self.X[idx],self.y[idx])

In [4]:
train_dset = MNISTData(digits_train, targets_train) 
val_dset = MNISTData(digits_test, targets_test) 

## Nonconvex fullyconnected

In [5]:
class FCN(nn.Module):

    def __init__(self):
        super(FCN, self).__init__()
        self.fc0 = nn.Linear(64, 40)
        self.fc1 = nn.Linear(40, 20)
        self.fc2 = nn.Linear(20, 10)

    def forward(self, input_):
        h1 = F.relu(self.fc0(input_))
        h2 = F.relu(self.fc1(h1))
        h3 = self.fc2(h2)
        return h3

In [6]:
device = 'cpu'

net = FCN()  
criterion = F.cross_entropy
optimizer = torch.optim.Adam(net.parameters())
scheduler = None

train_loader = torch_data.DataLoader(train_dset, batch_size=30, shuffle=True) 
val_loader = torch_data.DataLoader(val_dset, batch_size=100, shuffle=False) 

In [7]:
def train(epochs, net, criterion, optimizer, train_loader, val_loader,scheduler=None, verbose=True, save_dir=None):
    net.to(device)
    for epoch in range(1, epochs+1):
        net.train()
        loss = []
        for X, y in train_loader:
            nn_outputs = net(X)
            loss1 = criterion(nn_outputs, y)
            optimizer.zero_grad()
            loss1.backward()
            loss.append(loss1.item())
            optimizer.step()
        net.eval()
        val_loss = []
        for X, y in val_loader:
            X = X.to(device)
            nn_outputs = net(X)
            val_loss1 = criterion(nn_outputs,y)
            val_loss.append(val_loss1.item())
         
        if scheduler is not None:
            scheduler.step()
        freq = max(epochs//20,1)
        if verbose and epoch%freq==0:
            print('Epoch {}/{} || Loss:  Train {:.4f} | Validation {:.4f}'.format(epoch, epochs, np.mean(loss), np.mean(val_loss)))

In [62]:
train(200, net, criterion, optimizer, train_loader, val_loader, scheduler)

Epoch 10/200 || Loss:  Train 1.4704 | Validation 1.4501
Epoch 20/200 || Loss:  Train 0.8151 | Validation 0.8570
Epoch 30/200 || Loss:  Train 0.5679 | Validation 0.6149
Epoch 40/200 || Loss:  Train 0.4585 | Validation 0.4937
Epoch 50/200 || Loss:  Train 0.3886 | Validation 0.4285
Epoch 60/200 || Loss:  Train 0.3411 | Validation 0.3846
Epoch 70/200 || Loss:  Train 0.2992 | Validation 0.3491
Epoch 80/200 || Loss:  Train 0.2614 | Validation 0.3102
Epoch 90/200 || Loss:  Train 0.2327 | Validation 0.2889
Epoch 100/200 || Loss:  Train 0.2098 | Validation 0.2849
Epoch 110/200 || Loss:  Train 0.1893 | Validation 0.2655
Epoch 120/200 || Loss:  Train 0.1747 | Validation 0.2528
Epoch 130/200 || Loss:  Train 0.1592 | Validation 0.2422
Epoch 140/200 || Loss:  Train 0.1495 | Validation 0.2362
Epoch 150/200 || Loss:  Train 0.1381 | Validation 0.2313
Epoch 160/200 || Loss:  Train 0.1250 | Validation 0.2329
Epoch 170/200 || Loss:  Train 0.1172 | Validation 0.2208
Epoch 180/200 || Loss:  Train 0.1094 | V

In [70]:
test_loader = torch_data.DataLoader(val_dset,batch_size = len(val_dset)) 

In [76]:
net.eval()
for X,y in test_loader:
    X = X.to(device)
    nn_outputs = net(X).detach().numpy().argmax(axis = 1)
    print(accuracy_score(nn_outputs,y.detach().numpy()))

0.9422222222222222


## ICNN

In [80]:
class ICNN(nn.Module):

    def __init__(self):
        super(ICNN, self).__init__()
        self.Wy0 = nn.Linear(64, 40)
        self.Wy1 = nn.Linear(64, 20)
        self.Wy2 = nn.Linear(64, 10)
        self.Wz1 = nn.Linear(40,20, bias = False)
        self.Wz2 = nn.Linear(20,10, bias = False)
        self.Wz3 = nn.Linear(10,10)

    def forward(self, input_):
        h1 = F.relu(self.Wy0(input_))
        h2 = F.relu(self.Wz1(h1) + self.Wy1(input_))
        h3 = F.relu(self.Wz2(h2) + self.Wy2(input_))
        h4 = self.Wz3(h3)
        return h4

In [100]:
device = 'cpu'

net = ICNN()  
criterion = F.cross_entropy
optimizer = torch.optim.Adam(net.parameters())
scheduler = None

train_loader = torch_data.DataLoader(train_dset, batch_size=30, shuffle=True) 
val_loader = torch_data.DataLoader(val_dset, batch_size=100, shuffle=False) 

In [101]:
def train(epochs, net, criterion, optimizer, train_loader, val_loader,scheduler=None, verbose=True, save_dir=None):
    net.to(device)
    for epoch in range(1, epochs+1):
        net.train()
        loss = []
        for X, y in train_loader:
            nn_outputs = net(X)
            loss1 = criterion(nn_outputs, y)
            optimizer.zero_grad()
            loss1.backward()
            loss.append(loss1.item())
            optimizer.step()
            net.Wz1.weight.data = F.relu(net.Wz1.weight.data)
            net.Wz2.weight.data = F.relu(net.Wz2.weight.data)
        net.eval()
        val_loss = []
        for X, y in val_loader:
            X = X.to(device)
            nn_outputs = net(X)
            val_loss1 = criterion(nn_outputs,y)
            val_loss.append(val_loss1.item())
         
        if scheduler is not None:
            scheduler.step()
        freq = max(epochs//20,1)
        if verbose and epoch%freq==0:
            print('Epoch {}/{} || Loss:  Train {:.4f} | Validation {:.4f}'.format(epoch, epochs, np.mean(loss), np.mean(val_loss)))

In [102]:
train(200, net, criterion, optimizer, train_loader, val_loader, scheduler)

Epoch 10/200 || Loss:  Train 1.7722 | Validation 1.7767
Epoch 20/200 || Loss:  Train 0.8505 | Validation 0.9000
Epoch 30/200 || Loss:  Train 0.4824 | Validation 0.5488
Epoch 40/200 || Loss:  Train 0.3310 | Validation 0.3996
Epoch 50/200 || Loss:  Train 0.2409 | Validation 0.3215
Epoch 60/200 || Loss:  Train 0.1783 | Validation 0.2697
Epoch 70/200 || Loss:  Train 0.1400 | Validation 0.2466
Epoch 80/200 || Loss:  Train 0.1103 | Validation 0.2328
Epoch 90/200 || Loss:  Train 0.0886 | Validation 0.2321
Epoch 100/200 || Loss:  Train 0.0715 | Validation 0.2200
Epoch 110/200 || Loss:  Train 0.0581 | Validation 0.2257
Epoch 120/200 || Loss:  Train 0.0465 | Validation 0.2108
Epoch 130/200 || Loss:  Train 0.0364 | Validation 0.2171
Epoch 140/200 || Loss:  Train 0.0306 | Validation 0.2139
Epoch 150/200 || Loss:  Train 0.0238 | Validation 0.2231
Epoch 160/200 || Loss:  Train 0.0201 | Validation 0.2156
Epoch 170/200 || Loss:  Train 0.0162 | Validation 0.2193
Epoch 180/200 || Loss:  Train 0.0120 | V

In [103]:
test_loader = torch_data.DataLoader(val_dset,batch_size = len(val_dset)) 

In [104]:
net.eval()
for X,y in test_loader:
    X = X.to(device)
    nn_outputs = net(X).detach().numpy().argmax(axis = 1)
    print(accuracy_score(nn_outputs,y.detach().numpy()))

0.9511111111111111


In [105]:
Wz1 = net.Wz1.weight.data.detach().numpy()
Wz2 = net.Wz2.weight.data.detach().numpy()

In [110]:
print('Sparsity Wz1: {}'.format(np.sum(Wz1==0)/(Wz1.shape[0]*Wz1.shape[1])))
print('Sparsity Wz2: {}'.format(np.sum(Wz2==0)/(Wz2.shape[0]*Wz2.shape[1])))

Sparsity Wz1: 0.28625
Sparsity Wz2: 0.1


## L1-regularization

In [165]:
device = 'cpu'

net = ICNN()  
criterion = F.cross_entropy
optimizer = torch.optim.Adam(net.parameters())
scheduler = None

train_loader = torch_data.DataLoader(train_dset, batch_size=30, shuffle=True) 
val_loader = torch_data.DataLoader(val_dset, batch_size=100, shuffle=False) 

In [166]:
def train(epochs, net, criterion, optimizer, train_loader, val_loader,scheduler=None, verbose=True, save_dir=None):
    net.to(device)
    for epoch in range(1, epochs+1):
        net.train()
        loss = []
        for X, y in train_loader:
            nn_outputs = net(X)
            loss1 = criterion(nn_outputs, y)
            optimizer.zero_grad()
            L1_reg = torch.tensor(0., requires_grad=True)
            for name, param in net.named_parameters():
                if 'weight' in name:
                    L1_reg = L1_reg + torch.norm(param, 1)

            loss1 += 1e-5 * L1_reg
            loss1.backward()
            loss.append(loss1.item())
            optimizer.step()
            net.Wz1.weight.data = F.relu(net.Wz1.weight.data)
            net.Wz2.weight.data = F.relu(net.Wz2.weight.data)
        net.eval()
        val_loss = []
        for X, y in val_loader:
            X = X.to(device)
            nn_outputs = net(X)
            val_loss1 = criterion(nn_outputs,y)
            val_loss.append(val_loss1.item())
         
        if scheduler is not None:
            scheduler.step()
        freq = max(epochs//20,1)
        if verbose and epoch%freq==0:
            print('Epoch {}/{} || Loss:  Train {:.4f} | Validation {:.4f}'.format(epoch, epochs, np.mean(loss), np.mean(val_loss)))

In [167]:
train(200, net, criterion, optimizer, train_loader, val_loader, scheduler)

Epoch 10/200 || Loss:  Train 1.7201 | Validation 1.6733
Epoch 20/200 || Loss:  Train 0.8588 | Validation 0.8536
Epoch 30/200 || Loss:  Train 0.5394 | Validation 0.5549
Epoch 40/200 || Loss:  Train 0.3818 | Validation 0.4001
Epoch 50/200 || Loss:  Train 0.2831 | Validation 0.3144
Epoch 60/200 || Loss:  Train 0.2192 | Validation 0.2586
Epoch 70/200 || Loss:  Train 0.1785 | Validation 0.2370
Epoch 80/200 || Loss:  Train 0.1467 | Validation 0.2092
Epoch 90/200 || Loss:  Train 0.1254 | Validation 0.1894
Epoch 100/200 || Loss:  Train 0.1087 | Validation 0.1867
Epoch 110/200 || Loss:  Train 0.0953 | Validation 0.1760
Epoch 120/200 || Loss:  Train 0.0829 | Validation 0.1699
Epoch 130/200 || Loss:  Train 0.0720 | Validation 0.1712
Epoch 140/200 || Loss:  Train 0.0631 | Validation 0.1684
Epoch 150/200 || Loss:  Train 0.0549 | Validation 0.1612
Epoch 160/200 || Loss:  Train 0.0495 | Validation 0.1631
Epoch 170/200 || Loss:  Train 0.0427 | Validation 0.1596
Epoch 180/200 || Loss:  Train 0.0391 | V

In [168]:
test_loader = torch_data.DataLoader(val_dset,batch_size = len(val_dset)) 

In [169]:
net.eval()
for X,y in test_loader:
    X = X.to(device)
    nn_outputs = net(X).detach().numpy().argmax(axis = 1)
    print(accuracy_score(nn_outputs,y.detach().numpy()))

0.9444444444444444


In [170]:
Wz1 = net.Wz1.weight.data.detach().numpy()
Wz2 = net.Wz2.weight.data.detach().numpy()

In [171]:
print('Sparsity Wz1: {}'.format(np.sum(Wz1==0)/(Wz1.shape[0]*Wz1.shape[1])))
print('Sparsity Wz2: {}'.format(np.sum(Wz2==0)/(Wz2.shape[0]*Wz2.shape[1])))

Sparsity Wz1: 0.57
Sparsity Wz2: 0.35
