In [3]:
%matplotlib inline
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch
from sklearn.datasets import load_digits
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.utils.data as torch_data
import sklearn
from sklearn.metrics import accuracy_score

In [4]:
from poslayers.poslayers import *

In [5]:
# fetch the dataset.
digits, targets = load_digits(return_X_y=True)
digits = digits.astype(np.float32) / 255

digits_train, digits_test, targets_train, targets_test = train_test_split(digits, targets, random_state=0)

train_size = digits_train.shape[0]

input_size = 8*8
classes_n = 10

In [6]:
class MNISTData(torch_data.Dataset):
    def __init__(self, X, y):
        super(MNISTData, self).__init__()
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return (self.X[idx],self.y[idx])

In [7]:
def get_accuracy(net, val_dset):
    test_loader = torch_data.DataLoader(val_dset,batch_size = len(val_dset)) 
    net.eval()
    for X,y in test_loader:
        X = X.to(device)
        nn_outputs = net(X).detach().numpy().argmax(axis = 1)
    return accuracy_score(nn_outputs,y.detach().numpy())

In [8]:
train_dset = MNISTData(digits_train, targets_train) 
val_dset = MNISTData(digits_test, targets_test) 

## Non-convex FCN

In [9]:
class FCN(nn.Module):

    def __init__(self):
        super(FCN, self).__init__()
        self.fc0 = Dense(64, 40)
        self.fc1 = Dense(40, 20)
        self.fc2 = Dense(20, 10)

    def forward(self, input_):
        h1 = F.relu(self.fc0(input_))
        h2 = F.relu(self.fc1(h1))
        h3 = self.fc2(h2)
        return h3
    
    def get_sparsities(self):
        return {
                'fc0': self.fc0.get_sparsity(),
                'fc1': self.fc1.get_sparsity(),
                'fc2': self.fc2.get_sparsity(),
        }
    
    def l1reg(self):
        return self.fc0.l1reg() + self.fc1.l1reg() + self.fc2.l1reg()

In [10]:
device = 'cpu'

net = FCN()  
criterion = F.cross_entropy
optimizer = torch.optim.Adam(net.parameters())
scheduler = None

train_loader = torch_data.DataLoader(train_dset, batch_size=30, shuffle=True) 
val_loader = torch_data.DataLoader(val_dset, batch_size=100, shuffle=False) 

In [11]:
def train_fcn(epochs, net, criterion, optimizer, train_loader, val_loader,scheduler=None, verbose=True, save_dir=None, l1alpha=0):
    net.to(device)
    for epoch in range(1, epochs+1):
        net.train()
        loss = []
        for X, y in train_loader:
            nn_outputs = net(X)
            loss1 = criterion(nn_outputs, y) + l1alpha * net.l1reg()
            optimizer.zero_grad()
            loss1.backward()
            loss.append(loss1.item())
            optimizer.step()
        net.eval()
        val_loss = []
        for X, y in val_loader:
            X = X.to(device)
            nn_outputs = net(X)
            val_loss1 = criterion(nn_outputs,y)
            val_loss.append(val_loss1.item())
         
        if scheduler is not None:
            scheduler.step()
        freq = max(epochs//20,1)
        if verbose and epoch%freq==0:
            print('Epoch {}/{} || Loss:  Train {:.4f} | Validation {:.4f}'.format(epoch, epochs, np.mean(loss), np.mean(val_loss)))

In [12]:
train_fcn(200, net, criterion, optimizer, train_loader, val_loader, scheduler)

Epoch 10/200 || Loss:  Train 1.3953 | Validation 1.3722
Epoch 20/200 || Loss:  Train 0.7349 | Validation 0.7527
Epoch 30/200 || Loss:  Train 0.4920 | Validation 0.5178
Epoch 40/200 || Loss:  Train 0.3856 | Validation 0.4194
Epoch 50/200 || Loss:  Train 0.3188 | Validation 0.3567
Epoch 60/200 || Loss:  Train 0.2747 | Validation 0.3101
Epoch 70/200 || Loss:  Train 0.2418 | Validation 0.2793
Epoch 80/200 || Loss:  Train 0.2168 | Validation 0.2630
Epoch 90/200 || Loss:  Train 0.1982 | Validation 0.2468
Epoch 100/200 || Loss:  Train 0.1807 | Validation 0.2402
Epoch 110/200 || Loss:  Train 0.1671 | Validation 0.2274
Epoch 120/200 || Loss:  Train 0.1580 | Validation 0.2220
Epoch 130/200 || Loss:  Train 0.1493 | Validation 0.2193
Epoch 140/200 || Loss:  Train 0.1406 | Validation 0.2191
Epoch 150/200 || Loss:  Train 0.1336 | Validation 0.2184
Epoch 160/200 || Loss:  Train 0.1261 | Validation 0.2175
Epoch 170/200 || Loss:  Train 0.1208 | Validation 0.2154
Epoch 180/200 || Loss:  Train 0.1153 | V

In [13]:
get_accuracy(net, val_dset)

0.9377777777777778

In [14]:
net.get_sparsities()

{'fc0': 0.0, 'fc1': 0.0, 'fc2': 0.0}

## POSDENSE = CNN WITHOUT ADDITIONAL LAYERS

In [15]:
class ICNN(nn.Module):

    def __init__(self):
        super(ICNN, self).__init__()
        self.fcpos0 = PosDense(64, 40)
        self.fcpos1 = PosDense(40, 20)
        self.fcpos2 = PosDense(20, 10)
        self.fc = Dense(10, 10)

    def forward(self, input_):
        h1 = F.relu(self.fcpos0(input_))
        h2 = F.relu(self.fcpos1(h1))
        h3 = F.relu(self.fcpos2(h2))
        h4 = self.fc(h3)
        return h4
    
    def positivate(self):
        self.fcpos0.positivate()
        self.fcpos1.positivate()
        self.fcpos2.positivate()
    
    def get_sparsities(self):
        get_sparsity = lambda layer : (layer.weight.data == 0).sum().item() / (layer.weight.data.shape[0] * layer.weight.data.shape[1])
        return {
                'fcpos0': get_sparsity(self.fcpos0),
                'fcpos1': get_sparsity(self.fcpos1),
                'fcpos2': get_sparsity(self.fcpos2),
                'fc' : get_sparsity(self.fc)
        }
    
    def l1reg(self):
        return self.fcpos0.l1reg() + self.fcpos1.l1reg() + self.fcpos2.l1reg() + self.fc.l1reg()

In [16]:
device = 'cpu'

net = ICNN()  
criterion = F.cross_entropy
optimizer = torch.optim.Adam(net.parameters())
scheduler = None

train_loader = torch_data.DataLoader(train_dset, batch_size=30, shuffle=True) 
val_loader = torch_data.DataLoader(val_dset, batch_size=100, shuffle=False) 

In [17]:
def train_icnn(epochs, net, criterion, optimizer, train_loader, val_loader,scheduler=None, verbose=True, save_dir=None, l1alpha=0):
    net.to(device)
    for epoch in range(1, epochs+1):
        net.train()
        loss = []
        for X, y in train_loader:
            nn_outputs = net(X)
            loss1 = criterion(nn_outputs, y) + l1alpha * net.l1reg()
            optimizer.zero_grad()
            loss1.backward()
            loss.append(loss1.item())
            optimizer.step()
            net.positivate()
        net.eval()
        val_loss = []
        for X, y in val_loader:
            X = X.to(device)
            nn_outputs = net(X)
            val_loss1 = criterion(nn_outputs,y)
            val_loss.append(val_loss1.item())
         
        if scheduler is not None:
            scheduler.step()
        freq = max(epochs//20,1)
        if verbose and epoch%freq==0:
            print('Epoch {}/{} || Loss:  Train {:.4f} | Validation {:.4f}'.format(epoch, epochs, np.mean(loss), np.mean(val_loss)))

In [18]:
train_icnn(300, net, criterion, optimizer, train_loader, val_loader, scheduler)

Epoch 15/300 || Loss:  Train 1.9431 | Validation 1.9599
Epoch 30/300 || Loss:  Train 1.7851 | Validation 1.8151
Epoch 45/300 || Loss:  Train 1.5791 | Validation 1.6146
Epoch 60/300 || Loss:  Train 1.2900 | Validation 1.3047
Epoch 75/300 || Loss:  Train 1.0000 | Validation 1.0520
Epoch 90/300 || Loss:  Train 0.7966 | Validation 0.8885
Epoch 105/300 || Loss:  Train 0.5972 | Validation 0.7218
Epoch 120/300 || Loss:  Train 0.5055 | Validation 0.6492
Epoch 135/300 || Loss:  Train 0.4568 | Validation 0.5881
Epoch 150/300 || Loss:  Train 0.4142 | Validation 0.5609
Epoch 165/300 || Loss:  Train 0.3866 | Validation 0.5354
Epoch 180/300 || Loss:  Train 0.3500 | Validation 0.5169
Epoch 195/300 || Loss:  Train 0.3197 | Validation 0.5063
Epoch 210/300 || Loss:  Train 0.2905 | Validation 0.4770
Epoch 225/300 || Loss:  Train 0.2715 | Validation 0.4805
Epoch 240/300 || Loss:  Train 0.2474 | Validation 0.4487
Epoch 255/300 || Loss:  Train 0.2277 | Validation 0.4421
Epoch 270/300 || Loss:  Train 0.2154 

In [19]:
get_accuracy(net, val_dset)

0.9

In [20]:
net.get_sparsities()

{'fcpos0': 0.31328125, 'fcpos1': 0.18, 'fcpos2': 0.24, 'fc': 0.0}

## L1-regularization

In [23]:
device = 'cpu'

net = ICNN()  
criterion = F.cross_entropy 
optimizer = torch.optim.Adam(net.parameters())
scheduler = None

train_loader = torch_data.DataLoader(train_dset, batch_size=30, shuffle=True) 
val_loader = torch_data.DataLoader(val_dset, batch_size=100, shuffle=False) 

In [24]:
train_icnn(350, net, criterion, optimizer, train_loader, val_loader, scheduler, l1alpha=1e-5)

Epoch 17/350 || Loss:  Train 1.5012 | Validation 1.5382
Epoch 34/350 || Loss:  Train 1.1631 | Validation 1.1798
Epoch 51/350 || Loss:  Train 0.9267 | Validation 0.9423
Epoch 68/350 || Loss:  Train 0.8362 | Validation 0.8901
Epoch 85/350 || Loss:  Train 0.7647 | Validation 0.7593
Epoch 102/350 || Loss:  Train 0.7192 | Validation 0.7279
Epoch 119/350 || Loss:  Train 0.6949 | Validation 0.6816
Epoch 136/350 || Loss:  Train 0.6695 | Validation 0.6848
Epoch 153/350 || Loss:  Train 0.6397 | Validation 0.6734
Epoch 170/350 || Loss:  Train 0.6124 | Validation 0.6449
Epoch 187/350 || Loss:  Train 0.5960 | Validation 0.6226
Epoch 204/350 || Loss:  Train 0.5760 | Validation 0.6136
Epoch 221/350 || Loss:  Train 0.5618 | Validation 0.6020
Epoch 238/350 || Loss:  Train 0.5347 | Validation 0.5716
Epoch 255/350 || Loss:  Train 0.5006 | Validation 0.5428
Epoch 272/350 || Loss:  Train 0.4660 | Validation 0.5073
Epoch 289/350 || Loss:  Train 0.4157 | Validation 0.4775
Epoch 306/350 || Loss:  Train 0.3723

In [25]:
get_accuracy(net, val_dset)

0.8955555555555555

In [26]:
net.get_sparsities()

{'fcpos0': 0.33046875, 'fcpos1': 0.37875, 'fcpos2': 0.215, 'fc': 0.0}

In [27]:
device = 'cpu'

net = FCN()  
criterion = F.cross_entropy
optimizer = torch.optim.Adam(net.parameters())
scheduler = None

train_loader = torch_data.DataLoader(train_dset, batch_size=30, shuffle=True) 
val_loader = torch_data.DataLoader(val_dset, batch_size=100, shuffle=False) 

In [None]:
train_fcn(300, net, criterion, optimizer, train_loader, val_loader, scheduler, l1alpha=1e-5)

Epoch 15/300 || Loss:  Train 0.7673 | Validation 0.7889
Epoch 30/300 || Loss:  Train 0.4360 | Validation 0.4594
Epoch 45/300 || Loss:  Train 0.3227 | Validation 0.3555
Epoch 60/300 || Loss:  Train 0.2584 | Validation 0.3157
Epoch 75/300 || Loss:  Train 0.2135 | Validation 0.2613
Epoch 90/300 || Loss:  Train 0.1820 | Validation 0.2569
Epoch 105/300 || Loss:  Train 0.1588 | Validation 0.2351
Epoch 120/300 || Loss:  Train 0.1427 | Validation 0.2264
Epoch 135/300 || Loss:  Train 0.1252 | Validation 0.2179
Epoch 150/300 || Loss:  Train 0.1125 | Validation 0.2329
Epoch 165/300 || Loss:  Train 0.1075 | Validation 0.2210
Epoch 180/300 || Loss:  Train 0.0922 | Validation 0.2213
Epoch 195/300 || Loss:  Train 0.0848 | Validation 0.2197
Epoch 210/300 || Loss:  Train 0.0762 | Validation 0.2288
Epoch 225/300 || Loss:  Train 0.0699 | Validation 0.2216
Epoch 240/300 || Loss:  Train 0.0656 | Validation 0.2264


In [None]:
get_accuracy(net, val_dset)

In [None]:
net.get_sparsities()