In [1]:
import pandas as pd
import numpy as np
import torch
import torch.utils.data as torch_data
from torch.optim.lr_scheduler import _LRScheduler
from sklearn.metrics import accuracy_score
import time

import random
random.seed(42)
torch.manual_seed(42)
np.random.seed(42)
torch.cuda.manual_seed(42)

In [2]:
class ShallowNet(torch.nn.Module):
    def __init__(self, input_dim, num_class):
        super(ShallowNet, self).__init__()
        self.fc1 = torch.nn.Linear(input_dim, 100)
        self.fc2 = torch.nn.Linear(100, num_class)

    def forward(self, x):
        out = self.fc1(x)
        out = torch.nn.ReLU()(out)
        out = self.fc2(out)
        return out 

In [4]:
class Dna(torch_data.Dataset):

    def __init__(self, X, y):
        super(Dna, self).__init__()
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    
    def __len__(self):

        return len(self.X)
    
    def __getitem__(self, idx):
        
        return self.X[idx], self.y[idx]


dna = pd.read_csv("dna.csv")
dna_target = dna["class"] - 1 # because otherwise we get classes 0,1,2,3
dna_features = dna.iloc[:, :-1]

train_dna = Dna(np.array(dna_features.iloc[:1401, :]), np.array(dna_target[:1401]))
val_dna = Dna(np.array(dna_features.iloc[1400:2001, :]), np.array(dna_target[1400:2001]))
test_dna = Dna(np.array(dna_features.iloc[2000:, :]), np.array(dna_target[2000:]))

In [5]:
def compute_grad_train(observations, theta, input_dim, n_classes):
  '''
  observations[0]: features, observations[1]: target
  '''
  feats = observations[0].reshape(1, input_dim)
  target = observations[1].reshape(1,)
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  net = ShallowNet(input_dim, n_classes)
  criterion = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(net.parameters(), lr=1e-3)
  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 40)

  # do the forward pass
  epochs=200
  for epoch in range(1, epochs+1):
    net.train()
    optimizer.zero_grad()
    x = net(feats)
    train_loss = criterion(x, target)
    train_grad = torch.autograd.grad(train_loss, net.fc2.weight, retain_graph=True)[0]
    train_loss.backward()
    optimizer.step()

  return train_grad.reshape(1, n_classes*100), net

In [6]:
def compute_grad_val(net, observations, theta, input_dim, n_classes):
  '''
  observations[0]: features, observations[1]: target
  '''
  feats = observations[:][0]
  target = observations[:][1]
  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  net = net
  criterion = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(net.parameters(), lr=1e-3)
  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 40)
  # do the forward pass
  epochs=200
  for epoch in range(1, epochs+1):
    net.eval()
    optimizer.zero_grad()
    x = net(feats)
    val_loss = criterion(x, target)
    val_grad = torch.autograd.grad(val_loss, net.fc2.weight, retain_graph=True)[0]
    val_loss.backward()
    optimizer.step()

  return val_grad.reshape(1, n_classes*100), val_loss

In [35]:
def GreedyDSS(U, Val, theta_prev, eta, k, r, lambd, R, sel):
  '''
    Implementation of GreedyDSS (Algorithm 2) from GLISTER paper

    Attributes:
    ---
    U: torch.tensor
      Training data.
    Val: torch.tensor
      Validation data.
    theta_0: torch.tensor
      Model parameters initialization.
    eta: float
      Learning rate.
    k: int
      Number of point for which the model would be trained.
    r: int
      Number of Taylor approximations.
    lambd: float
      Regularization coefficient.
    R: function
      Regularization function.
    sel: str
      Selection method.

    Returns
    ---
    S: ndarray
      Coreset.
  '''
  eps=800
  t = 0
  S = U[[np.random.randint(len(U)), np.random.randint(len(U))]] # choose one random observation of training set
  theta = theta_prev

  while t < r:
    if sel == "naive_greedy":
      V = U
    elif sel == "stochastic_greedy":
      V = random.sample(list(U), round((len(U)/r) * (1/np.log(eps))))

    g_hats = np.array([])
    for e in V:
      grad_train, net = compute_grad_train(e, theta, input_dim, n_classes)
      theta_t_e = theta + eta *  grad_train
      grads_s = np.array([])
      for i, j in zip(S[0], S[1]):
        grad_s, _ = compute_grad_train((i, j), theta, input_dim, n_classes)
        grads_s = np.append(grads_s, grad_s)
      grads_s = np.array(grads_s).reshape(grads_s.shape[0]//(n_classes*100), n_classes*100)
      theta_s = theta + eta * torch.Tensor(np.sum(grads_s, axis=0))

      grad_val, val_loss = compute_grad_val(net, Val, theta_s, input_dim, n_classes)
      g_hats = np.append(g_hats, val_loss.detach().numpy() + eta * torch.matmul(grad_train, grad_val.T).detach().numpy()[0][0]) + lambd*R# * R(torch.cat(e, S))) # g hats is np.array # the largest values
    
    g_hats = np.array(g_hats)
    
    best_indices = np.argpartition(np.array(g_hats), -round(k/r))[-round(k/r):]

    
    S_t = (torch.clone(S[:][0]), torch.clone(S[:][1]))
    
    for i in best_indices:
      if sel == "stochastic_greedy":
        S_t = (torch.vstack((S_t[:][0], V[:][i][0])), torch.hstack((S_t[:][1], V[:][i][1])))
      else:
        S_t = (torch.vstack((S_t[:][0], V[:][0][i])), torch.hstack((S_t[:][1], V[:][1][i])))

    S = S_t

    for elem in S_t:
      if elem in U:
        rowindex = numpy.where(U==elem)[0][0] # index of the row
        U = np.delete(U, rowindex, 0)

    grads_theta = torch.zeros(n_classes*100).reshape(1, n_classes*100)
    for i, j in zip(S_t[0], S_t[1]):
      grad, _ = compute_grad_train((i, j), theta, input_dim, n_classes)

      grads_theta += grad

    theta = theta + grads_theta

    t += 1

  return S
    


In [None]:
U = train_dna
V = val_dna
theta_prev = torch.rand(1, 300)
eta = 0.05
k = 50
r = 2

lambd = 0
sel = "stochastic_greedy"
input_dim = 180
n_classes = 3

start = time.perf_counter()
subset = GreedyDSS(U=U, Val=V, theta_prev=theta_prev, eta=eta, k=k, r=r, lambd=0, R=0, sel=sel)
end = time.perf_counter()
print("time elapsed: ", end-start)

In [32]:
subset

(tensor([[0., 0., 1., 0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0.,
          0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1.,
          1., 0., 0., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1., 0., 0.,
          1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0.,
          0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 1.,
          0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0.,
          0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0.,
          0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1.,
          0., 0., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1.,
          0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1.],
         [1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 0.,
          0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 0., 0.,
          0., 0