In [1]:
import torch
from tqdm.auto import tqdm, trange
import random
from matplotlib import pyplot as plt
import numpy as np
import copy
from torch.nn import functional as F

In [2]:
dataset_name = 'iris.csv'
dt = np.genfromtxt('data/' + dataset_name, delimiter=',', skip_header=1)
xs, ys = dt[:, :-1], dt[:, -1].astype(int)
xs = (xs - np.mean(xs, axis=0)) / np.std(xs, axis=0)
data = list(zip([torch.from_numpy(x).float() for x in xs], [torch.tensor([y]).long() for y in ys]))

In [3]:
class InspLinear(torch.nn.Linear):
    def __init__(self, in_features: int, out_features: int, bias: bool = True):
        super(InspLinear, self).__init__(in_features, out_features, bias)
        
        self.clear_acc()
        self.dropout = torch.ones_like(self.weight)
        
    def update_acc(self):
        self.w_grad_acc.append(self.weight.grad.detach().clone())
        self.b_grad_acc.append(self.bias.grad.detach().clone())
        
    def clear_acc(self):
        self.w_grad_acc= []
        self.b_grad_acc = []
        
    def forward(self, x):
        return F.linear(x, self.dropout*self.weight, self.bias)
        
    def get_acc(self):
        return self.w_grad_acc, self.b_grad_acc

In [17]:
def train(dataset, model_=None, forcing=False):
    model = copy.deepcopy(model_)
    layer = 2
    #model.to('cuda')

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

    results = []
    for epoch in range(100):
        fr = forcing and epoch%3 == 0
        if fr:
            # testing gradients
            model.train()
            model[layer].clear_acc()

            for i, (x, y) in enumerate(dataset):
                prediction = model(x.unsqueeze(dim=0))
                train_loss = criterion(prediction, y)
                optimizer.zero_grad()
                train_loss.backward()
                model[layer].update_acc()
                
            w_acc, b_acc = model[layer].get_acc()
            w_mean = torch.mean(torch.stack(w_acc), dim=0)
            b_mean = torch.mean(torch.stack(b_acc), dim=0)

            std = torch.sum(torch.std(torch.stack(w_acc), dim=0), dim=-1) + torch.std(torch.stack(b_acc), dim=0)
            chosen = torch.argsort(std)[-2:]
            #print(std, chosen)

            mask = []
            for i, (x, y) in enumerate(dataset):
                prediction = model(x.unsqueeze(dim=0))
                train_loss = criterion(prediction, y)
                optimizer.zero_grad()
                train_loss.backward()
                mask.append(1 if torch.sum(model[0].weight.grad[chosen[0].item()]) > torch.sum(w_mean[chosen[0].item()]) else 0)
            splits = [0, 1]
        else:
            mask = [0]*len(dataset)
            splits=[0]
                
        # training
        random.shuffle(dataset)
        model.train()
        acc = 0.
        for split in splits:
            for i, (x, y) in enumerate(dataset):
                if fr:
                    model[layer].dropout = chosen[split]*torch.ones_like(model[layer].weight)
                    model[layer].dropout[chosen[split]] = abs(chosen[split].item() - 1.)
                else:
                    model[layer].dropout = torch.ones_like(model[layer].weight)
                if mask[i] == split:
                    prediction = model(x.unsqueeze(dim=0))
                    train_loss = criterion(prediction, y)
                    optimizer.zero_grad()
                    train_loss.backward()
                    optimizer.step()
                    #losses.append(train_loss.item())
                    if torch.argmax(prediction.squeeze()) == y:
                        acc += 1.
        acc /= len(dataset)
        results.append(acc)
        if acc == 1.:
            break
    return results

In [18]:
winners = []
winners_v = []
winners_a=[]
winners_b=[]
dataset = data
for i in trange(100):
    model_ = torch.nn.Sequential(InspLinear(4, 10), torch.nn.Sigmoid(), 
                                 InspLinear(10, 10), torch.nn.Sigmoid(), 
                                 InspLinear(10, 3))
    epochs = []
    for forcing in [False, True]:
        res = train(dataset, model_, forcing)
        #plt.plot(range(1, len(res)+1), res)
        #print('Forcing:', forcing, np.argmax(res)+1, np.max(res))
        epochs.append(res)
        
    winners.append(-1. if np.argmax(epochs[0]) < np.argmax(epochs[1]) else (1. if np.argmax(epochs[0]) > np.argmax(epochs[1]) else 0.))
    winners_v.append(-1. if np.max(epochs[0]) > np.max(epochs[1]) else (1. if np.max(epochs[0]) < np.max(epochs[1]) else 0.))
    
    winners_a.append(1. if winners[-1]>0. or winners_v[-1]>0. else 0.)
    winners_b.append(1. if winners[-1]<0. or winners_v[-1]<0. else 0.)
    #plt.legend(['Standard', 'With grad forcing'])
    #plt.show()
    

HBox(children=(FloatProgress(value=0.0), HTML(value='')))




In [19]:
print(np.mean(winners))
print(np.unique(winners, return_counts=True))
print(np.mean(winners_v))
print(np.unique(winners_v, return_counts=True))
print(np.mean(winners_a))
print(np.mean(winners_b))

0.08
(array([-1.,  0.,  1.]), array([44,  4, 52], dtype=int64))
0.66
(array([-1.,  0.,  1.]), array([ 5, 24, 71], dtype=int64))
0.91
0.48
