In [275]:
import numpy as np
from math import log
import matplotlib.pyplot as plt

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader, WeightedRandomSampler
from torch_utils import set_gpu_mode, get_numpy, from_numpy

In [276]:
set_gpu_mode(True)

In [277]:
data = np.load('data/1/data.npz')
colors = data.get('colors')
shapes = data.get('shapes')

colors_ohe = OneHotEncoder(categories='auto').fit_transform(colors.reshape(-1, 1)).toarray()
shapes_ohe = OneHotEncoder(categories='auto').fit_transform(shapes.reshape(-1, 1)).toarray()

In [278]:
X = np.c_[colors_ohe, shapes_ohe]
y = data.get('is_buttons').reshape(-1, 1)

In [279]:
pos_neg_weight_ratio = 1 / np.mean(y)

In [280]:
X_tr, X_t, y_tr, y_t = train_test_split(X, y, test_size=0.3)

ds_tr = TensorDataset(from_numpy(X_tr), from_numpy(y_tr))
ds_t = TensorDataset(from_numpy(X_t), from_numpy(y_t))

weights_tr = np.ones(len(y_tr)) 
weights_tr[y_tr.flatten() == 1] = pos_neg_weight_ratio
weights_t = np.ones(len(y_t))
weights_t[y_t.flatten() == 1] = pos_neg_weight_ratio

dl_tr = DataLoader(ds_tr, batch_size=16, sampler=WeightedRandomSampler(weights_tr, len(weights_tr)))
dl_t = DataLoader(ds_t, batch_size=16, sampler=WeightedRandomSampler(weights_t, len(weights_t)))

In [736]:
def to_var(x, requires_grad=False, volatile=False):
    """
    Varialbe type that automatically choose cpu or cuda
    """
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x, requires_grad=requires_grad, volatile=volatile)

class MaskedLinear(nn.Linear):
    def __init__(self, in_features, out_features, bias=True):
        super(MaskedLinear, self).__init__(in_features, out_features, bias)
        self.mask_flag = False
    
    def set_mask(self, mask):
        self.mask = to_var(mask, requires_grad=False)
        self.weight.data = self.weight.data*self.mask.data
        self.mask_flag = True
    
    def get_mask(self):
        return self.mask
    
    def forward(self, x):
        if self.mask_flag == True:
            weight = self.weight*self.mask
            return F.linear(x, weight, self.bias)
        else:
            return F.linear(x, self.weight, self.bias)

class SEM(nn.Module):
    
    def __init__(self, input_size, output_size, fcs=[]):
        super().__init__()
        
        self.fcs = nn.ModuleList()
        for i, fc_size in enumerate(fcs):
            self.fcs.append(MaskedLinear(input_size if i == 0 else fcs[i-1], fc_size))
            
        self.fcs.append(MaskedLinear(input_size if len(fcs) == 0 else fcs[-1], output_size))
        
    def apply_layer_mask(self, layer, n):
        fc = self.fcs[layer]
        if not fc.mask_flag:
            fc.set_mask(torch.ones(fc.weight.shape))
        mask = fc.get_mask()
        
        mask_np_1d = get_numpy(mask).flatten()
        w = get_numpy(fc.weight)
        w_abs_1d = np.abs(w.flatten())
        w_abs_1d[mask_np_1d == 0] = np.inf
        
        idx_1d = w_abs_1d.argsort()[:n]
        x_idx, y_idx = np.unravel_index(idx_1d, w.shape)
        
        mask[x_idx, y_idx] = 0
        
        fc.set_mask(mask)
        
#         print(mask)
#         print(w)
            
        print('Pruned {}/{} weights'.format(np.sum(1 - get_numpy(mask)), np.prod(mask.shape)))
        
    def forward(self, x):        
        for fc in self.fcs[:-1]:
            x = fc(x)
            x = F.relu(x)
            
        x = self.fcs[-1](x)
        return x

In [737]:
def train(model, dl, opt, criterion):
    for batch in dl:
        opt.zero_grad()
        loss = criterion(model(batch[0]), batch[1])
        loss.backward()
        opt.step()

def acc(pred, gt):
    pred, gt = get_numpy(pred), get_numpy(gt)
    pred_labels = pred > 0

    return np.mean(pred_labels == gt)

def eval_loss_acc(model, dl, criterion, acc):
    total_loss = 0
    total_acc = 0
    for batch in dl:
        pred = model(batch[0])
        loss = criterion(pred, batch[1])
        total_loss += get_numpy(loss)
        total_acc += acc(pred, batch[1])

    return total_loss / len(dl), total_acc / len(dl)

In [976]:
model = SEM(X_tr.shape[1], 1, fcs=[]).cuda()
model_weights = []
for name, param in model.named_parameters():
    if 'bias' not in name:
        model_weights.append(param)
        
for fc in model.fcs:
    fc.bias.requires_grad = False
        
opt = torch.optim.Adam(model_weights, lr=0.0001)

def init_bias(m):
    if type(m) == MaskedLinear:
        m.bias.data.fill_(-0.1)
model.apply(init_bias)

def get_criterion(l1_weight, params):
    bce = nn.BCEWithLogitsLoss()
    l1_crit = nn.L1Loss(reduction='sum')
    
    def criterion(X, y):
        bce_loss = bce(X, y)
        l1_loss = 0
        for param in params:
            l1_loss += l1_crit(param, torch.zeros(param.shape).cuda())
        
        return bce_loss + l1_weight * l1_loss

    return criterion
        
l1_weight = 0.5 # log(len(X_tr))
print('l1 weight', l1_weight)
criterion = get_criterion(l1_weight, model_weights)
# criterion = nn.BCEWithLogitsLoss()

l1 weight 0.5


In [984]:
epochs = 20

loss_trs, acc_trs, loss_ts, acc_ts = [], [], [], []
for e in range(epochs):
    train(model, dl_tr, opt, criterion)
    
    loss_tr, acc_tr = eval_loss_acc(model, dl_tr, criterion, acc)
    loss_t, acc_t = eval_loss_acc(model, dl_t, criterion, acc)
    
    loss_trs.append(loss_tr)
    acc_trs.append(acc_tr)
    loss_ts.append(loss_t)
    acc_ts.append(acc_t)
    
    print('{}/{} | Loss tr {:.3f} t {:.3f} | Acc tr {:.2f} t {:.2f}'.format(e+1, epochs, loss_tr, loss_t, acc_tr, acc_t))

1/20 | Loss tr 1.147 t 1.157 | Acc tr 0.90 t 0.94
2/20 | Loss tr 1.130 t 1.137 | Acc tr 0.94 t 0.80
3/20 | Loss tr 1.135 t 1.141 | Acc tr 0.95 t 0.86
4/20 | Loss tr 1.140 t 1.154 | Acc tr 0.89 t 0.90
5/20 | Loss tr 1.129 t 1.143 | Acc tr 0.91 t 0.93
6/20 | Loss tr 1.128 t 1.162 | Acc tr 0.96 t 0.87
7/20 | Loss tr 1.128 t 1.129 | Acc tr 0.90 t 0.90
8/20 | Loss tr 1.130 t 1.137 | Acc tr 0.95 t 0.94
9/20 | Loss tr 1.119 t 1.147 | Acc tr 0.94 t 0.80
10/20 | Loss tr 1.122 t 1.150 | Acc tr 0.94 t 0.83
11/20 | Loss tr 1.132 t 1.146 | Acc tr 0.90 t 0.87
12/20 | Loss tr 1.129 t 1.142 | Acc tr 0.90 t 0.90
13/20 | Loss tr 1.128 t 1.101 | Acc tr 0.96 t 0.97
14/20 | Loss tr 1.139 t 1.136 | Acc tr 0.85 t 0.90
15/20 | Loss tr 1.126 t 1.125 | Acc tr 0.88 t 0.97
16/20 | Loss tr 1.124 t 1.177 | Acc tr 0.88 t 0.77
17/20 | Loss tr 1.117 t 1.113 | Acc tr 0.94 t 1.00
18/20 | Loss tr 1.116 t 1.125 | Acc tr 0.92 t 0.90
19/20 | Loss tr 1.098 t 1.118 | Acc tr 0.99 t 0.96
20/20 | Loss tr 1.107 t 1.109 | Acc tr 0

In [985]:
for name, param in model.named_parameters():
    print(name)
    print(param)

fcs.0.weight
Parameter containing:
tensor([[ 0.1681, -0.0860, -0.2387,  0.3097,  0.0221, -0.2845]],
       device='cuda:0', requires_grad=True)
fcs.0.bias
Parameter containing:
tensor([-0.1000], device='cuda:0')


In [798]:
model.apply_layer_mask(0, 10)

Pruned 6.0/6 weights


In [421]:
epochs = 5

loss_trs, acc_trs, loss_ts, acc_ts = [], [], [], []
for e in range(epochs):
    train(model, dl_tr, opt, criterion)
    
    loss_tr, acc_tr = eval_loss_acc(model, dl_tr, criterion, acc)
    loss_t, acc_t = eval_loss_acc(model, dl_t, criterion, acc)
    
    loss_trs.append(loss_tr)
    acc_trs.append(acc_tr)
    loss_ts.append(loss_t)
    acc_ts.append(acc_t)
    
    print('{}/{} | Loss tr {:.3f} t {:.3f} | Acc tr {:.2f} t {:.2f}'.format(e+1, epochs, loss_tr, loss_t, acc_tr, acc_t))

1/5 | Loss tr 0.064 t 0.048 | Acc tr 1.00 t 1.00
2/5 | Loss tr 0.053 t 0.034 | Acc tr 1.00 t 1.00
3/5 | Loss tr 0.042 t 0.034 | Acc tr 1.00 t 1.00
4/5 | Loss tr 0.030 t 0.037 | Acc tr 1.00 t 1.00
5/5 | Loss tr 0.032 t 0.023 | Acc tr 1.00 t 1.00


In [364]:
model.apply_mask(0.1)

Pruned 15.0/60 weights


In [365]:
epochs = 5

loss_trs, acc_trs, loss_ts, acc_ts = [], [], [], []
for e in range(epochs):
    train(model, dl_tr, opt, criterion)
    
    loss_tr, acc_tr = eval_loss_acc(model, dl_tr, criterion, acc)
    loss_t, acc_t = eval_loss_acc(model, dl_t, criterion, acc)
    
    loss_trs.append(loss_tr)
    acc_trs.append(acc_tr)
    loss_ts.append(loss_t)
    acc_ts.append(acc_t)
    
    print('{}/{} | Loss tr {:.3f} t {:.3f} | Acc tr {:.2f} t {:.2f}'.format(e+1, epochs, loss_tr, loss_t, acc_tr, acc_t))

1/5 | Loss tr 0.346 t 0.384 | Acc tr 1.00 t 1.00
2/5 | Loss tr 0.295 t 0.404 | Acc tr 1.00 t 1.00
3/5 | Loss tr 0.265 t 0.255 | Acc tr 1.00 t 1.00
4/5 | Loss tr 0.220 t 0.213 | Acc tr 1.00 t 1.00
5/5 | Loss tr 0.177 t 0.162 | Acc tr 1.00 t 1.00


In [366]:
model.apply_mask(0.1)

Pruned 14.0/60 weights


## SK Learn Logistic Regression

In [563]:
lreg_model = LogisticRegression(C=3e-1, penalty='l1', solver='liblinear', class_weight='balanced')
lreg_model.fit(X_tr, y_tr.ravel())
lreg_model.coef_ 

array([[ 0.35578257, -0.65637182, -0.58970924,  0.99510783, -1.01403316,
        -1.00012471]])

In [564]:
lreg_model.intercept_

array([-2.73168457e-05])

In [566]:
lreg_model.coef_ = np.array([1, 1, 1, 1, 1, 1])

In [567]:
y_tr_pred = lreg_model.predict(X_tr)
print(np.mean(y_tr_pred == y_tr.flatten()))

y_t_pred = lreg_model.predict(X_t)
print(np.mean(y_t_pred == y_t.flatten()))

IndexError: tuple index out of range