In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import torch
import torch.utils.data as torch_data

from Glister import GlisterOnline

In [3]:
class DNA_DATA(torch_data.Dataset):
    
    def __init__(self, X, y):
        super(DNA_DATA, self).__init__()
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long) 
    
    def __len__(self):
        return list(self.X.size())[0]
    
    def __getitem__(self, idx):
        return (self.X[idx], self.y[idx])

In [4]:
class TwoLayerNet(torch.nn.Module):
    def __init__(self):
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(180, 20)
        self.linear2 = torch.nn.Linear(20, 10)
    
    def forward(self, x, last=False):
        l1scores = torch.nn.functional.relu(self.linear1(x))
        scores = self.linear2(l1scores)
        if last:
            return scores, l1scores
        else:
            return scores

In [6]:
data = pd.read_csv('dna.csv')
X_train, X_test, y_train, y_test = train_test_split(data.drop('class', axis = 1), data['class'], test_size = 0.3)

In [7]:
fullset = DNA_DATA(np.array(X_train), np.array(y_train))
valset = DNA_DATA(np.array(X_train), np.array(y_train))
testset = DNA_DATA(np.array(X_test), np.array(y_test))

In [8]:
glister = GlisterOnline(
    fullset = fullset,
    valset = valset,
    testset = testset,
    device = "cpu",
    validation_set_fraction = 0.1,
    trn_batch_size = 20,
    val_batch_size = 50,
    tst_batch_size = 50,
    dss_batch_size = 50,
    model = TwoLayerNet(),
    num_epochs = 10,
    learning_rate = 1.0,
    num_classes = 10,
    n_channels = 1,
    bud = 100,
    lam = 0.1)

In [9]:
val_acc, tst_acc, subtrn_acc, full_trn_acc,\
val_loss, test_loss, subtrn_loss, full_trn_loss,\
val_losses, substrn_losses, fulltrn_losses,\
idxs, time = glister.random_greedy_train_model_online_taylor(np.arange(20))

Epoch: 1 SubsetTrn,FullTrn,ValLoss: 2.356973648071289 182.6209760904312 8.997533202171326
selEpoch: 0, Starting Selection: 2021-03-17 22:49:26.412681
numSelected: 0 Time for 1: 0.0065190792083740234
selEpoch: 0, Selection Ended at: 2021-03-17 22:49:26.510849
Epoch: 2 SubsetTrn,FullTrn,ValLoss: 8.832827806472778 118.9138697385788 5.650245547294617
selEpoch: 1, Starting Selection: 2021-03-17 22:49:26.538914
numSelected: 0 Time for 1: 0.006198883056640625


  batch_wise_indices = np.array([list(BatchSampler(SequentialSampler(np.arange(self.N_trn)), self.batch_size, drop_last=False))][0])


selEpoch: 1, Selection Ended at: 2021-03-17 22:49:26.664714
Epoch: 3 SubsetTrn,FullTrn,ValLoss: 7.936232924461365 123.01679730415344 6.082277417182922
selEpoch: 2, Starting Selection: 2021-03-17 22:49:26.692660
numSelected: 0 Time for 1: 0.0061948299407958984
selEpoch: 2, Selection Ended at: 2021-03-17 22:49:26.782780
Epoch: 4 SubsetTrn,FullTrn,ValLoss: 6.559903621673584 161.0219497680664 8.12591803073883
selEpoch: 3, Starting Selection: 2021-03-17 22:49:26.810105
numSelected: 0 Time for 1: 0.006143093109130859
selEpoch: 3, Selection Ended at: 2021-03-17 22:49:26.902968
Epoch: 5 SubsetTrn,FullTrn,ValLoss: 6.746968626976013 103.71712267398834 5.071864187717438
selEpoch: 4, Starting Selection: 2021-03-17 22:49:26.931174
numSelected: 0 Time for 1: 0.0062618255615234375
selEpoch: 4, Selection Ended at: 2021-03-17 22:49:27.023385
Epoch: 6 SubsetTrn,FullTrn,ValLoss: 6.6032832860946655 122.4283155798912 6.002121806144714
selEpoch: 5, Starting Selection: 2021-03-17 22:49:27.051608
numSelected:

In [10]:
len(idxs)

100