In [1]:
import pandas as pd
import numpy as np
import torch
import torch.utils.data as torch_data
from torch.optim.lr_scheduler import _LRScheduler
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader
import time

import random
random.seed(42)
torch.manual_seed(42)
np.random.seed(42)
torch.cuda.manual_seed(42)

In [2]:
class ShallowNet(torch.nn.Module):
    def __init__(self, input_dim, num_class):
        super(ShallowNet, self).__init__()
        self.fc1 = torch.nn.Linear(input_dim, 100)
        self.fc2 = torch.nn.Linear(100, num_class)

    def forward(self, x):
        out = self.fc1(x)
        out = torch.nn.ReLU()(out)
        out = self.fc2(out)
        return out 

In [3]:
class Dna(torch_data.Dataset):

    def __init__(self, X, y):
        super(Dna, self).__init__()
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    
    def __len__(self):

        return len(self.X)
    
    def __getitem__(self, idx):
        
        return self.X[idx], self.y[idx]


dna = pd.read_csv("dna.csv")
dna_target = dna["class"] - 1 # because otherwise we get classes 0,1,2,3
dna_features = dna.iloc[:, :-1]

train_dna = Dna(np.array(dna_features.iloc[:1401, :]), np.array(dna_target[:1401]))
val_dna = Dna(np.array(dna_features.iloc[1400:2001, :]), np.array(dna_target[1400:2001]))
test_dna = Dna(np.array(dna_features.iloc[2000:, :]), np.array(dna_target[2000:]))

In [4]:
def compute_grad_train(observations, theta, input_dim, n_classes):
  '''
  observations[0]: features, observations[1]: target
  '''
  feats = observations[0].reshape(1, input_dim)
  target = observations[1].reshape(1,)
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  net = ShallowNet(input_dim, n_classes)
  criterion = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(net.parameters(), lr=1e-3)
  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 40)

  # do the forward pass
  epochs=200
  for epoch in range(1, epochs+1):
    net.train()
    optimizer.zero_grad()
    x = net(feats)
    train_loss = criterion(x, target)
    train_grad = torch.autograd.grad(train_loss, net.fc2.weight, retain_graph=True)[0]
    train_loss.backward()
    optimizer.step()

  return train_grad.reshape(1, n_classes*100), net

In [5]:
def compute_grad_val(net, observations, theta, input_dim, n_classes):
  '''
  observations[0]: features, observations[1]: target
  '''
  feats = observations[:][0]
  target = observations[:][1]
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  net = net
  criterion = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(net.parameters(), lr=1e-3)
  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 40)
  # do the forward pass
  epochs=200
  for epoch in range(1, epochs+1):
    net.eval()
    optimizer.zero_grad()
    x = net(feats)
    val_loss = criterion(x, target)
    val_grad = torch.autograd.grad(val_loss, net.fc2.weight, retain_graph=True)[0]
    val_loss.backward()
    optimizer.step()

  return val_grad.reshape(1, n_classes*100), val_loss

In [6]:
def GreedyDSS(U, Val, theta_prev, eta, k, r, lambd, R, sel):
  '''
    Implementation of GreedyDSS (Algorithm 2) from GLISTER paper

    Attributes:
    ---
    U: torch.tensor
      Training data.
    Val: torch.tensor
      Validation data.
    theta_0: torch.tensor
      Model parameters initialization.
    eta: float
      Learning rate.
    k: int
      Number of point for which the model would be trained.
    r: int
      Number of Taylor approximations.
    lambd: float
      Regularization coefficient.
    R: function
      Regularization function.
    sel: str
      Selection method.

    Returns
    ---
    S: ndarray
      Coreset.
  '''
  eps=800
  t = 0
  S = U[[np.random.randint(len(U)), np.random.randint(len(U))]] # choose one random observation of training set
  theta = theta_prev

  while t < r:
    if sel == "naive_greedy":
      V = U
    elif sel == "stochastic_greedy":
      V = random.sample(list(U), round((len(U)/r) * (1/np.log(eps))))

    g_hats = np.array([])
    for e in V:
      grad_train, net = compute_grad_train(e, theta, input_dim, n_classes)
      theta_t_e = theta + eta *  grad_train
      grads_s = np.array([])
      for i, j in zip(S[0], S[1]):
        grad_s, _ = compute_grad_train((i, j), theta, input_dim, n_classes)
        grads_s = np.append(grads_s, grad_s)
      grads_s = np.array(grads_s).reshape(grads_s.shape[0]//(n_classes*100), n_classes*100)
      theta_s = theta + eta * torch.Tensor(np.sum(grads_s, axis=0))

      grad_val, val_loss = compute_grad_val(net, Val, theta_s, input_dim, n_classes)
      g_hats = np.append(g_hats, val_loss.detach().numpy() + eta * torch.matmul(grad_train, grad_val.T).detach().numpy()[0][0]) + lambd*R# * R(torch.cat(e, S))) # g hats is np.array # the largest values
    
    g_hats = np.array(g_hats)
    
    best_indices = np.argpartition(np.array(g_hats), -round(k/r))[-round(k/r):]

    
    S_t = (torch.clone(S[:][0]), torch.clone(S[:][1]))
    
    for i in best_indices:
      if sel == "stochastic_greedy":
        S_t = (torch.vstack((S_t[:][0], V[:][i][0])), torch.hstack((S_t[:][1], V[:][i][1])))
      else:
        S_t = (torch.vstack((S_t[:][0], V[:][0][i])), torch.hstack((S_t[:][1], V[:][1][i])))

    S = S_t

    for elem in S_t:
      if elem in U:
        rowindex = numpy.where(U==elem)[0][0] # index of the row
        U = np.delete(U, rowindex, 0)

    grads_theta = torch.zeros(n_classes*100).reshape(1, n_classes*100)
    for i, j in zip(S_t[0], S_t[1]):
      grad, _ = compute_grad_train((i, j), theta, input_dim, n_classes)

      grads_theta += grad

    theta = theta + grads_theta

    t += 1

  return S
    


In [7]:
def glister_online(U, Val, S_0, k, theta_prev, eta, T, L, r, lambd, R, sel):
  '''
  Attributes:
  ---
  U: torch.tensor
    Training data.
  Val: torch.tensor
    Validation data.
  S_0: torch.tensor
    Initial subset.
  k: int
    Size of the initial subset.
  theta_prev: torch.tensor
    Model parameter initialization.
  eta: float
    Learning rate.
  T: int
    Total epochs.
  L: int
    Epoch interval for selection.
  r: int
    Number of Taylor approximations.
  lambd: float
    Regularization coefficient.
  R: function
    Regularization function.
  sel: str
    Selection Method.
  
  Returns:
  ---
  S_T: torch.tensor
    Final subset
  theta_T: torch.tensor
    Parameters.
  '''
  theta = theta_prev
  S_t = S_0

  for t in range(T):
    print("Epoch: ", t)
    if t % L == 0:
      S_t = GreedyDSS(U=U, Val=V, theta_prev=theta, eta=eta, k=k, r=r, lambd=0, R=0, sel=sel)

    model = ShallowNet(input_dim, n_classes)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 40)
    
    loader = DataLoader(Dna(S_t[:][0], S_t[:][1]), batch_size=10, shuffle=True)

    for X, y in loader:
      model.train()
      optimizer.zero_grad()
      x = model(S_t[:][0])
      train_loss = criterion(x, S_t[:][1])
      train_loss.backward()
      optimizer.step()

      if scheduler is not None:
        scheduler.step()
      
    theta = model.fc2.weight.reshape(1, n_classes*100)

  return S_t, model.fc2.weight


    

In [8]:
U = train_dna
V = val_dna
theta_prev = torch.rand(1, 300)
eta = 0.05
k = 5
r = 5
lambd = 0
sel = "stochastic_greedy"
input_dim = 180
n_classes = 3

start = time.perf_counter()
subset = glister_online(U=U, Val=V, theta_prev=theta_prev, S_0 = U[[np.random.randint(len(U)), np.random.randint(len(U))]], eta=eta, k=k, r=r, lambd=0, R=0, sel=sel, T=200, L=20)
end = time.perf_counter()
print("time elapsed: ", end-start)

Epoch:  0


  """
  


Epoch:  1
Epoch:  2
Epoch:  3
Epoch:  4
Epoch:  5
Epoch:  6
Epoch:  7
Epoch:  8
Epoch:  9
Epoch:  10
Epoch:  11
Epoch:  12
Epoch:  13
Epoch:  14
Epoch:  15
Epoch:  16
Epoch:  17
Epoch:  18
Epoch:  19
Epoch:  20
Epoch:  21
Epoch:  22
Epoch:  23
Epoch:  24
Epoch:  25
Epoch:  26
Epoch:  27
Epoch:  28
Epoch:  29
Epoch:  30
Epoch:  31
Epoch:  32
Epoch:  33
Epoch:  34
Epoch:  35
Epoch:  36
Epoch:  37
Epoch:  38
Epoch:  39
Epoch:  40
Epoch:  41
Epoch:  42
Epoch:  43
Epoch:  44
Epoch:  45
Epoch:  46
Epoch:  47
Epoch:  48
Epoch:  49
Epoch:  50
Epoch:  51
Epoch:  52
Epoch:  53
Epoch:  54
Epoch:  55
Epoch:  56
Epoch:  57
Epoch:  58
Epoch:  59
Epoch:  60
Epoch:  61
Epoch:  62
Epoch:  63
Epoch:  64
Epoch:  65
Epoch:  66
Epoch:  67
Epoch:  68
Epoch:  69
Epoch:  70
Epoch:  71
Epoch:  72
Epoch:  73
Epoch:  74
Epoch:  75
Epoch:  76
Epoch:  77
Epoch:  78
Epoch:  79
Epoch:  80
Epoch:  81
Epoch:  82
Epoch:  83
Epoch:  84
Epoch:  85
Epoch:  86
Epoch:  87
Epoch:  88
Epoch:  89
Epoch:  90
Epoch:  91
Epoch:  

In [9]:
# testing the model:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = ShallowNet(input_dim, n_classes)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 40)


torch.manual_seed(0)

train_loader = DataLoader(Dna(subset[0][0], subset[0][1]), batch_size=10, shuffle=True)

val_loader = DataLoader(val_dna, batch_size=10, shuffle=True)


def train(epochs, net, criterion, optimizer, train_loader, val_loader, scheduler=None, verbose=True, device='cpu'):
    net.to(device)
    freq = max(epochs//15,1)
 
    for epoch in range(1, epochs+1):
        net.train()

        losses_train = []
        for X, target in train_loader:

            X, target = X.to(device), target.to(device)
            
            optimizer.zero_grad()
            x = net(X)
          
            train_loss = criterion(x, target)
            train_loss.backward()
            optimizer.step()
            losses_train.append(train_loss.item())
            
            
        if scheduler is not None:
            scheduler.step()
        
        if verbose and epoch%freq==0:
            y_pred_val =  []
            y_true_val = []
            net.eval()
            for X, target in val_loader:
                X, target = X.to(device), target.to(device)
                losses_val = []  

                optimizer.zero_grad()
                x = net(X)
                target_hat_val = torch.nn.Softmax(1)(x)

                val_loss = criterion(x, target)
                val_loss.backward()
                optimizer.step()
                losses_val.append(val_loss.item())
                                
                y_pred_val.extend(target_hat_val.argmax(1).tolist())
                y_true_val.extend(target.tolist())

            mean_val = sum(losses_val)/len(losses_val)
            mean_train = sum(losses_train)/len(losses_train)

            print('Val epoch {}'.format(epoch), \
              ', Loss : {:.3}'.format(mean_train), \
              ', Accuracy on test: {:.3}'.format(accuracy_score(y_true_val, y_pred_val)) )


  """
  


In [10]:
train(200, net, criterion, optimizer, train_loader, val_loader, scheduler)

Val epoch 13 , Loss : 1.1 , Accuracy on test: 0.238
Val epoch 26 , Loss : 1.1 , Accuracy on test: 0.26
Val epoch 39 , Loss : 1.1 , Accuracy on test: 0.268
Val epoch 52 , Loss : 1.1 , Accuracy on test: 0.273
Val epoch 65 , Loss : 1.1 , Accuracy on test: 0.278
Val epoch 78 , Loss : 1.09 , Accuracy on test: 0.311
Val epoch 91 , Loss : 1.09 , Accuracy on test: 0.368
Val epoch 104 , Loss : 1.09 , Accuracy on test: 0.419
Val epoch 117 , Loss : 1.09 , Accuracy on test: 0.434
Val epoch 130 , Loss : 1.09 , Accuracy on test: 0.441
Val epoch 143 , Loss : 1.08 , Accuracy on test: 0.466
Val epoch 156 , Loss : 1.08 , Accuracy on test: 0.493
Val epoch 169 , Loss : 1.07 , Accuracy on test: 0.501
Val epoch 182 , Loss : 1.06 , Accuracy on test: 0.516
Val epoch 195 , Loss : 1.06 , Accuracy on test: 0.522


In [11]:
U = train_dna
V = val_dna
theta_prev = torch.rand(1, 300)
eta = 0.05
k = 3
r = 30

lambd = 0
sel = "stochastic_greedy"
input_dim = 180
n_classes = 3


random_sample = random.sample(list(U), 5)
random_samp_loader = DataLoader(random_sample, batch_size=10, shuffle=True)

In [12]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = ShallowNet(input_dim, n_classes)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 40)
train(200, net, criterion, optimizer, random_samp_loader, val_loader, scheduler)

Val epoch 13 , Loss : 1.11 , Accuracy on test: 0.474
Val epoch 26 , Loss : 1.1 , Accuracy on test: 0.493
Val epoch 39 , Loss : 1.09 , Accuracy on test: 0.502
Val epoch 52 , Loss : 1.09 , Accuracy on test: 0.506
Val epoch 65 , Loss : 1.09 , Accuracy on test: 0.511
Val epoch 78 , Loss : 1.07 , Accuracy on test: 0.516
Val epoch 91 , Loss : 1.05 , Accuracy on test: 0.511
Val epoch 104 , Loss : 1.04 , Accuracy on test: 0.511
Val epoch 117 , Loss : 1.04 , Accuracy on test: 0.511
Val epoch 130 , Loss : 1.04 , Accuracy on test: 0.511
Val epoch 143 , Loss : 1.03 , Accuracy on test: 0.512
Val epoch 156 , Loss : 1.02 , Accuracy on test: 0.512
Val epoch 169 , Loss : 1.01 , Accuracy on test: 0.512
Val epoch 182 , Loss : 0.994 , Accuracy on test: 0.512
Val epoch 195 , Loss : 0.989 , Accuracy on test: 0.512
