In [1]:
# load modules and set configurations
import numpy as np
import pandas as pd

import os, copy, random, pickle, gc
from itertools import product
from tqdm import tqdm

pd.set_option('display.max_columns', None)

import torch

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

SEED = 42

# 5. LSTM Autoencoder

In [2]:
import torch
from torch import nn, optim
import torch.nn.functional as F 
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import DataLoader, TensorDataset

torch.set_default_dtype(torch.float32)

class Encoder(nn.Module):
  def __init__(self, seq_len, n_features, embedding_dim=64):
    super(Encoder, self).__init__()
    self.seq_len, self.n_features = seq_len, n_features
    self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim
    self.rnn1 = nn.LSTM(
      input_size=n_features,
      hidden_size=self.hidden_dim,
      num_layers=1,
      batch_first=True
    )
    self.rnn2 = nn.LSTM(
      input_size=self.hidden_dim,
      hidden_size=embedding_dim,
      num_layers=1,
      batch_first=True
    )

  def forward(self, x):
    x, (_, _) = self.rnn1(x)
    x, (hidden_n, _) = self.rnn2(x)
    return hidden_n.reshape((-1,1, self.embedding_dim))


class Decoder(nn.Module):
  def __init__(self, seq_len, input_dim=64, n_features=114):
    super(Decoder, self).__init__()
    self.seq_len, self.input_dim = seq_len, input_dim
    self.hidden_dim, self.n_features = 2 * input_dim, n_features
    self.rnn1 = nn.LSTM(
      input_size=input_dim,
      hidden_size=input_dim,
      num_layers=1,
      batch_first=True
    )
    self.rnn2 = nn.LSTM(
      input_size=input_dim,
      hidden_size=self.hidden_dim,
      num_layers=1,
      batch_first=True
    )
    self.output_layer = nn.Linear(self.hidden_dim, n_features)

  def forward(self, x):
    x = x.repeat(1,self.seq_len, 1)
    x, (hidden_n, cell_n) = self.rnn1(x)
    x, (hidden_n, cell_n) = self.rnn2(x)
    return self.output_layer(x)

class RecurrentAutoencoder(nn.Module):
  def __init__(self, seq_len, n_features, embedding_dim=64):
    super(RecurrentAutoencoder, self).__init__()
    self.encoder = Encoder(seq_len, n_features, embedding_dim).to(device)
    self.decoder = Decoder(seq_len, embedding_dim, n_features).to(device)

  def forward(self, x):
    x = self.encoder(x)
    x = self.decoder(x)

    return x
  
with open(f'data-dict-for_lstm_ae.pkl', 'rb') as f:
    data_dict = pickle.load(f)

In [9]:
ver = 1 # 1, 2, 3
low_esi = 1 # 0, 1, 'all'
data = data_dict[ver][low_esi]

In [18]:
data['trn']['n_seq'][0]

array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15],
       [16],
       [17],
       [18],
       [19],
       [20],
       [21],
       [22],
       [23]])

In [22]:
# select data
ver = 1 # 1, 2, 3
low_esi = 1 # 0, 1, 'all'
data = data_dict[ver][low_esi]
_, max_seq_len, n_var = data['trn']['X'].shape

# make it as data loaders
# building data loader
data_loaders = {i:{} for i in ['trn', 'val_tr', 'val_th', 'tst']}
for i in tqdm(['trn', 'val_tr', 'val_th', 'tst']):
    tmp_X = torch.tensor(data[i]['X'].astype(np.float32))
    tmp_y = torch.tensor(data[i]['y'].astype(int))
    tmp_ids = torch.tensor(data[i]['ids'].astype(int))
    tmp_n_seq = torch.tensor(data[i]['n_seq'].astype(int))
    
    batch_size = 256 # 256, 128, 64
    data_loaders[i] = DataLoader(dataset=TensorDataset(tmp_X, tmp_y, tmp_ids, tmp_n_seq), batch_size=batch_size if i=='trn' else tmp_X.shape[0] if i=='var_tr' else 1, shuffle=False)

100%|██████████| 4/4 [00:01<00:00,  2.66it/s]


In [19]:
data_loaders = {i:{} for i in ['trn', 'val_tr', 'val_th', 'tst']}

In [20]:
data_loaders.keys()

dict_keys(['trn', 'val_tr', 'val_th', 'tst'])

In [27]:
data_loaders['val_tr']


<torch.utils.data.dataloader.DataLoader at 0x25c9501d1c0>

In [23]:
# training setting
seed_everything(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

hidden_unit = 256 # 64, 128, 256, 512
n_epochs, factor, patience, min_lr = (5000, 0.1, 100, 1e-6)
loss_reduction = 'global_mean'

gc.collect()
torch.cuda.empty_cache()
model = RecurrentAutoencoder(max_seq_len, n_var, hidden_unit)
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=factor, patience=patience, min_lr=min_lr, verbose=True)
criterion = nn.MSELoss(reduction='none').to(device)

history = dict(train=[], val=[])

best_model_wts = copy.deepcopy(model.state_dict())
best_loss = float('inf')
early_stopping_counter = 0

In [24]:
# training
for epoch in range(1, n_epochs+1):
    model = model.train()

    train_losses = []
    for seq_true, _, _, _ in data_loaders['trn']:
        seq_true = seq_true.to(device)
        mask = ~torch.all(seq_true==0, axis=2)
        seq_pred = model(seq_true)

        if loss_reduction == 'stay_wise_mean':
            l = criterion(seq_pred[mask], seq_true[mask]).sum(axis=1)
            lens = mask.sum(axis=1).detach().cpu()
            c_lens = lens.cumsum(dim=0)
            loss = 0
            for idx, i in enumerate(c_lens):
                s = 0 if idx == 0 else c_lens[idx-1]
                loss += l[s:i].sum()/lens[idx]
        elif loss_reduction == 'global_mean':
            l = criterion(seq_pred[mask], seq_true[mask]).sum()
            loss = l/mask.sum()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    val_losses = []
    model = model.eval()
    with torch.no_grad():
        for seq_true, _, _, _ in data_loaders['val_tr']:
            seq_true = seq_true.to(device)
            mask = ~torch.all(seq_true==0, axis=2)
            seq_pred = model(seq_true)

            if loss_reduction == 'stay_wise_mean':
                l = criterion(seq_pred[mask], seq_true[mask]).sum(axis=1)
                lens = mask.sum(axis=1).detach().cpu()
                c_lens = lens.cumsum(dim=0)
                loss = 0
                for idx, i in enumerate(lens):
                    s = 0 if idx == 0 else c_lens[idx-1]
                    loss += l[idx].sum()/lens[idx]
            elif loss_reduction == 'global_mean':
                l = criterion(seq_pred[mask], seq_true[mask]).sum()
                loss = l/mask.sum()

            val_losses.append(loss.item())

    train_loss = np.mean(train_losses)
    val_loss = np.mean(val_losses)

    history['train'].append(train_loss)
    history['val'].append(val_loss)

    print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')

    scheduler.step(val_loss)

    print("Current learning rate:", optimizer.param_groups[0]['lr'])

    if val_loss < best_loss:
        best_loss = val_loss
        best_model_wts = copy.deepcopy(model.state_dict())
        early_stopping_counter = 0
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= patience:
            print(f'Early stopping at epoch {epoch} due to no improvement in validation loss.')
            break
    
    print(f'early_stopping_counter: {early_stopping_counter} ')
    

model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), f'model_best-lstm_ae-low_esi{ver}-{low_esi}.pth')

with open(f'model_history-lstm_ae-low_esi{ver}-{low_esi}.pkl', 'wb') as f:
    pickle.dump(history, f)

Epoch 1: train loss 20.742730674973455 val loss 12.271102830585246
Current learning rate: 0.001
early_stopping_counter: 0 


KeyboardInterrupt: 

In [8]:
# evaluation data
gc.collect()
torch.cuda.empty_cache()

seed_everything(SEED)
hidden_unit = 256

model = RecurrentAutoencoder(max_seq_len, n_var, hidden_unit)
model = model.to(device)
model.load_state_dict(torch.load(f'model_best-lstm_ae-low_esi{ver}-{low_esi}.pth'))
model = model.eval()

criterion = nn.MSELoss(reduction='mean').to(device)

# loss calculation
eval_split = 'val_th' #tst val_th

eval_data = []
with torch.no_grad():
    for seq_true, y, id, n_seq in tqdm(data_loaders[eval_split]):
        id = id.cpu().numpy().ravel()[0]
        y = y.cpu().numpy().ravel()[0]
        seq_true = seq_true.to(device)
        seq_pred = model(seq_true)
        loss=criterion(seq_pred, seq_true)
        
        eval_data.append([id, y, loss.item(), n_seq])

eval_data = pd.DataFrame(eval_data, columns=['id', 'true', 'score', 'n_seq'])
eval_data.to_csv(f"eval_data-low_esi{ver}-{low_esi}-lstm_ae-{eval_split}.csv", index=False)

100%|██████████| 40092/40092 [01:33<00:00, 426.83it/s]


In [9]:
# evaluation data
gc.collect()
torch.cuda.empty_cache()

seed_everything(SEED)
hidden_unit = 256

model = RecurrentAutoencoder(max_seq_len, n_var, hidden_unit)
model = model.to(device)
model.load_state_dict(torch.load(f'model_best-lstm_ae-low_esi{ver}-{low_esi}.pth'))
model = model.eval()

criterion = nn.MSELoss(reduction='mean').to(device)

# loss calculation
eval_split = 'tst' #tst val_th

eval_data = []
with torch.no_grad():
    for seq_true, y, id, n_seq in tqdm(data_loaders[eval_split]):
        id = id.cpu().numpy().ravel()[0]
        y = y.cpu().numpy().ravel()[0]
        seq_true = seq_true.to(device)
        seq_pred = model(seq_true)
        loss=criterion(seq_pred, seq_true)
        
        eval_data.append([id, y, loss.item(), n_seq])

eval_data = pd.DataFrame(eval_data, columns=['id', 'true', 'score', 'n_seq'])
eval_data.to_csv(f"eval_data-low_esi{ver}-{low_esi}-lstm_ae-{eval_split}.csv", index=False)

100%|██████████| 39344/39344 [01:32<00:00, 424.11it/s]


In [10]:
eval_split = 'val_th'
def conf_mat(true, pred):
    tp = ((pred == 1) & (true == 1)).sum()
    fp = ((pred == 1) & (true == 0)).sum()
    fn = ((pred == 0) & (true == 1)).sum()
    tn = ((pred == 0) & (true == 0)).sum()
    return tp, fp, fn, tn

eval_result = []
eval_data = pd.read_csv(f"eval_data-low_esi{ver}-{low_esi}-lstm_ae-{eval_split}.csv")
scores = eval_data['score'].unique()

for s in tqdm(scores):
    eval_data['pred'] = np.where(eval_data['score']>=s, 1, 0)
    tmp = eval_data.groupby('id').agg({'true': lambda x: x.values[0], 'pred': 'max'}).reset_index()
    tp, fp, fn, tn = conf_mat(tmp['true'], tmp['pred'])

    eval_result.append([s, tp/(tp+fn), tp/(tp+fp), 2*tp/(fp+2*tp+fn)])

eval_result = pd.DataFrame(eval_result, columns=['score', 'rec', 'prec', 'f1'])
eval_result.to_csv(f'eval_result-low_esi{ver}-{low_esi}-lstm_ae-{eval_split}.csv', index=False)

100%|██████████| 40034/40034 [1:02:16<00:00, 10.71it/s]


## ver 3, low_esi 1

In [11]:
# select data
ver = 3 # 1, 2, 3
low_esi = 1 # 0, 1, 'all'
data = data_dict[ver][low_esi]
_, max_seq_len, n_var = data['trn']['X'].shape

# make it as data loaders
# building data loader
data_loaders = {i:{} for i in ['trn', 'val_tr', 'val_th', 'tst']}
for i in tqdm(['trn', 'val_tr', 'val_th', 'tst']):
    tmp_X = torch.tensor(data[i]['X'].astype(np.float32))
    tmp_y = torch.tensor(data[i]['y'].astype(int))
    tmp_ids = torch.tensor(data[i]['ids'].astype(int))
    tmp_n_seq = torch.tensor(data[i]['n_seq'].astype(int))
    
    batch_size = 256 # 256, 128, 64
    data_loaders[i] = DataLoader(dataset=TensorDataset(tmp_X, tmp_y, tmp_ids, tmp_n_seq), batch_size=batch_size if i=='trn' else tmp_X.shape[0] if i=='var_tr' else 1, shuffle=False)

100%|██████████| 4/4 [00:00<00:00, 36.93it/s]


In [12]:
# training setting
seed_everything(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

hidden_unit = 256 # 64, 128, 256, 512
n_epochs, factor, patience, min_lr = (5000, 0.1, 100, 1e-6)
loss_reduction = 'global_mean'

gc.collect()
torch.cuda.empty_cache()
model = RecurrentAutoencoder(max_seq_len, n_var, hidden_unit)
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=factor, patience=patience, min_lr=min_lr, verbose=True)
criterion = nn.MSELoss(reduction='none').to(device)

history = dict(train=[], val=[])

best_model_wts = copy.deepcopy(model.state_dict())
best_loss = float('inf')
early_stopping_counter = 0

In [13]:
# training
for epoch in range(1, n_epochs+1):
    model = model.train()

    train_losses = []
    for seq_true, _, _, _ in data_loaders['trn']:
        seq_true = seq_true.to(device)
        mask = ~torch.all(seq_true==0, axis=2)
        seq_pred = model(seq_true)

        if loss_reduction == 'stay_wise_mean':
            l = criterion(seq_pred[mask], seq_true[mask]).sum(axis=1)
            lens = mask.sum(axis=1).detach().cpu()
            c_lens = lens.cumsum(dim=0)
            loss = 0
            for idx, i in enumerate(c_lens):
                s = 0 if idx == 0 else c_lens[idx-1]
                loss += l[s:c_lens[idx]].sum()/lens[idx]
        elif loss_reduction == 'global_mean':
            l = criterion(seq_pred[mask], seq_true[mask]).sum()
            loss = l/mask.sum()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    val_losses = []
    model = model.eval()
    with torch.no_grad():
        for seq_true, _, _, _ in data_loaders['val_tr']:
            seq_true = seq_true.to(device)
            mask = ~torch.all(seq_true==0, axis=2)
            seq_pred = model(seq_true)

            if loss_reduction == 'stay_wise_mean':
                l = criterion(seq_pred[mask], seq_true[mask]).sum(axis=1)
                lens = mask.sum(axis=1).detach().cpu()
                c_lens = lens.cumsum(dim=0)
                loss = 0
                for idx, i in enumerate(c_lens):
                    s = 0 if idx == 0 else c_lens[idx-1]
                    loss += l[s:c_lens[idx]].sum()/lens[idx]
            elif loss_reduction == 'global_mean':
                l = criterion(seq_pred[mask], seq_true[mask]).sum()
                loss = l/mask.sum()

            val_losses.append(loss.item())

    train_loss = np.mean(train_losses)
    val_loss = np.mean(val_losses)

    history['train'].append(train_loss)
    history['val'].append(val_loss)

    print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')

    scheduler.step(val_loss)

    print("Current learning rate:", optimizer.param_groups[0]['lr'])

    if val_loss < best_loss:
        best_loss = val_loss
        best_model_wts = copy.deepcopy(model.state_dict())
        early_stopping_counter = 0
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= patience:
            print(f'Early stopping at epoch {epoch} due to no improvement in validation loss.')
            break
    
    print(f'early_stopping_counter: {early_stopping_counter} ')
    

model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), f'model_best-lstm_ae-low_esi{ver}-{low_esi}.pth')

with open(f'model_history-lstm_ae-low_esi{ver}-{low_esi}.pkl', 'wb') as f:
    pickle.dump(history, f)

Epoch 1: train loss 29.29897666558987 val loss 25.937636117243397
Current learning rate: 0.001
early_stopping_counter: 0 
Epoch 2: train loss 28.305903737137957 val loss 25.699893199306285
Current learning rate: 0.001
early_stopping_counter: 0 
Epoch 3: train loss 28.2498651597558 val loss 25.696878304444446
Current learning rate: 0.001
early_stopping_counter: 0 
Epoch 4: train loss 26.824043390227526 val loss 21.962290358563983
Current learning rate: 0.001
early_stopping_counter: 0 
Epoch 5: train loss 23.34777094678181 val loss 19.326372412305215
Current learning rate: 0.001
early_stopping_counter: 0 
Epoch 6: train loss 20.41100878831817 val loss 16.79308662708971
Current learning rate: 0.001
early_stopping_counter: 0 
Epoch 7: train loss 18.16588385512189 val loss 15.117013239798768
Current learning rate: 0.001
early_stopping_counter: 0 
Epoch 8: train loss 15.668107800367402 val loss 12.228517960956472
Current learning rate: 0.001
early_stopping_counter: 0 
Epoch 9: train loss 14.

In [14]:
# evaluation data
gc.collect()
torch.cuda.empty_cache()

seed_everything(SEED)
hidden_unit = 256

model = RecurrentAutoencoder(max_seq_len, n_var, hidden_unit)
model = model.to(device)
model.load_state_dict(torch.load(f'model_best-lstm_ae-low_esi{ver}-{low_esi}.pth'))
model = model.eval()

criterion = nn.MSELoss(reduction='mean').to(device)

# loss calculation
eval_split = 'val_th' #tst val_th

eval_data = []
with torch.no_grad():
    for seq_true, y, id, n_seq in tqdm(data_loaders[eval_split]):
        id = id.cpu().numpy().ravel()[0]
        y = y.cpu().numpy().ravel()[0]
        seq_true = seq_true.to(device)
        seq_pred = model(seq_true)
        loss=criterion(seq_pred, seq_true)
        
        eval_data.append([id, y, loss.item(), n_seq])

eval_data = pd.DataFrame(eval_data, columns=['id', 'true', 'score', 'n_seq'])
eval_data.to_csv(f"eval_data-low_esi{ver}-{low_esi}-lstm_ae-{eval_split}.csv", index=False)

100%|██████████| 114/114 [00:00<00:00, 409.66it/s]


In [15]:
# evaluation data
gc.collect()
torch.cuda.empty_cache()

seed_everything(SEED)
hidden_unit = 256

model = RecurrentAutoencoder(max_seq_len, n_var, hidden_unit)
model = model.to(device)
model.load_state_dict(torch.load(f'model_best-lstm_ae-low_esi{ver}-{low_esi}.pth'))
model = model.eval()

criterion = nn.MSELoss(reduction='mean').to(device)

# loss calculation
eval_split = 'tst' #tst val_th

eval_data = []
with torch.no_grad():
    for seq_true, y, id, n_seq in tqdm(data_loaders[eval_split]):
        id = id.cpu().numpy().ravel()[0]
        y = y.cpu().numpy().ravel()[0]
        seq_true = seq_true.to(device)
        seq_pred = model(seq_true)
        loss=criterion(seq_pred, seq_true)
        
        eval_data.append([id, y, loss.item(), n_seq])

eval_data = pd.DataFrame(eval_data, columns=['id', 'true', 'score', 'n_seq'])
eval_data.to_csv(f"eval_data-low_esi{ver}-{low_esi}-lstm_ae-{eval_split}.csv", index=False)

100%|██████████| 79/79 [00:00<00:00, 429.62it/s]


In [23]:
eval_split = 'val_th'
def conf_mat(true, pred):
    tp = ((pred == 1) & (true == 1)).sum()
    fp = ((pred == 1) & (true == 0)).sum()
    fn = ((pred == 0) & (true == 1)).sum()
    tn = ((pred == 0) & (true == 0)).sum()
    return tp, fp, fn, tn

eval_result = []
eval_data = pd.read_csv(f"eval_data-low_esi{ver}-{low_esi}-lstm_ae-{eval_split}.csv")
scores = eval_data['score'].unique()

for s in tqdm(scores):
    eval_data['pred'] = np.where(eval_data['score']>=s, 1, 0)
    tmp = eval_data.groupby('id').agg({'true': lambda x: x.values[0], 'pred': 'max'}).reset_index()
    tp, fp, fn, tn = conf_mat(tmp['true'], tmp['pred'])

    eval_result.append([s, tp/(tp+fn), tp/(tp+fp), 2*tp/(fp+2*tp+fn)])

eval_result = pd.DataFrame(eval_result, columns=['score', 'rec', 'prec', 'f1'])
eval_result.to_csv(f'eval_result-low_esi{ver}-{low_esi}-lstm_ae-{eval_split}.csv', index=False)

  0%|          | 75/40034 [00:07<1:06:57,  9.95it/s]


KeyboardInterrupt: 

In [24]:
eval_data

Unnamed: 0,id,true,score,n_seq,pred
0,30000368,1,0.316510,"tensor([[[0],\n [0],\n [0],\n ...",1
1,30000368,1,0.332049,"tensor([[[ 0],\n [ 1],\n [ 2],...",1
2,30003172,1,0.398140,"tensor([[[0],\n [0],\n [0],\n ...",1
3,30003172,1,0.583818,"tensor([[[0],\n [1],\n [0],\n ...",1
4,30003172,1,0.498034,"tensor([[[ 0],\n [ 1],\n [ 2],...",1
...,...,...,...,...,...
40087,39999414,1,0.491358,"tensor([[[0],\n [1],\n [2],\n ...",1
40088,39999414,1,0.440783,"tensor([[[0],\n [1],\n [2],\n ...",1
40089,39999414,1,0.389693,"tensor([[[ 0],\n [ 1],\n [ 2],...",1
40090,39999833,0,0.166329,"tensor([[[0],\n [0],\n [0],\n ...",0


In [21]:
ver=2
eval_split = 'tst'
def conf_mat(true, pred):
    tp = ((pred == 1) & (true == 1)).sum()
    fp = ((pred == 1) & (true == 0)).sum()
    fn = ((pred == 0) & (true == 1)).sum()
    tn = ((pred == 0) & (true == 0)).sum()
    return tp, fp, fn, tn

eval_result = []
eval_data = pd.read_csv(f"eval_data-low_esi{ver}-{low_esi}-lstm_ae-{eval_split}.csv")
scores = eval_data['score'].unique()

for s in tqdm(scores):
    eval_data['pred'] = np.where(eval_data['score']>=s, 1, 0)
    tmp = eval_data.groupby('id').agg({'true': lambda x: x.values[0], 'pred': 'max'}).reset_index()
    tp, fp, fn, tn = conf_mat(tmp['true'], tmp['pred'])

    eval_result.append([s, tp/(tp+fn), tp/(tp+fp), 2*tp/(fp+2*tp+fn)])

eval_result = pd.DataFrame(eval_result, columns=['score', 'rec', 'prec', 'f1'])
eval_result.to_csv(f'eval_result-low_esi{ver}-{low_esi}-lstm_ae-{eval_split}.csv', index=False)

  0%|          | 38/39290 [00:03<1:05:12, 10.03it/s]


KeyboardInterrupt: 

In [22]:
eval_data

Unnamed: 0,id,true,score,n_seq,pred
0,30001684,0,0.354099,"tensor([[[0],\n [0],\n [0],\n ...",1
1,30001684,0,0.385090,"tensor([[[0],\n [1],\n [0],\n ...",1
2,30001684,0,0.368411,"tensor([[[0],\n [1],\n [2],\n ...",1
3,30001684,0,0.313171,"tensor([[[0],\n [1],\n [2],\n ...",1
4,30001684,0,0.331349,"tensor([[[0],\n [1],\n [2],\n ...",1
...,...,...,...,...,...
39339,39997251,0,0.277656,"tensor([[[0],\n [0],\n [0],\n ...",1
39340,39997251,0,0.271088,"tensor([[[0],\n [1],\n [0],\n ...",1
39341,39997251,0,0.252475,"tensor([[[0],\n [1],\n [2],\n ...",0
39342,39997251,0,0.227602,"tensor([[[0],\n [1],\n [2],\n ...",0
