In [21]:
import numpy as np
import random
import datetime
import time
import os
from collections import defaultdict
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from tqdm import tqdm
import argparse
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, utils, datasets
from torch.utils.data import Dataset, DataLoader

from ember_utils import *
from ember_model import *
from ember_pjr_utils import *



def create_parent_folder(file_path):
    if not os.path.exists(os.path.dirname(file_path)):
        os.makedirs(os.path.dirname(file_path))




def get_family_labeled_month_data(data_dir, month, train=True):
    
    if train:
        data_dir = data_dir + str(month) + '/'
        XY_train = np.load(data_dir + 'XY_train.npz')
        X_tr, Y_tr, Y_tr_family = XY_train['X_train'], XY_train['Y_train'], XY_train['Y_family_train']

        #print(f'X_train {X_tr.shape} Y_train {Y_tr.shape} Y_tr_family {Y_tr_family.shape}')
        
        return X_tr, Y_tr, Y_tr_family
    else:
        data_dir = data_dir + str(month) + '/'
        XY_test = np.load(data_dir + 'XY_test.npz')
        X_test, Y_test, Y_test_family = XY_test['X_test'], XY_test['Y_test'], XY_test['Y_family_test']

        return X_test, Y_test, Y_test_family

    
    
def get_family_labeled_task_test_data(data_dir, task_months, mlp_net=False):
    
    X_te, Y_te, Y_te_family = get_family_labeled_month_data(data_dir, task_months[-1], train=False)
    
    for month in task_months[:-1]:
        pre_X_te, pre_Y_te, pre_Y_te_family = get_family_labeled_month_data(data_dir, month, train=False)
        X_te, Y_te, Y_te_family = np.concatenate((X_te, pre_X_te)), np.concatenate((Y_te, pre_Y_te)),\
                                np.concatenate((Y_te_family, pre_Y_te_family))
        

    X_test, Y_test, Y_test_family = X_te, Y_te, Y_te_family
    #print(f'X_test {X_test.shape} Y_test {Y_test.shape} Y_te_family {Y_te_family.shape}')
    
    return X_test, Y_test, Y_test_family


def make_family_based_dict(X_train, Y_train, Y_train_family, task_month, global_family_dict):
    count = 0
    for x_ind, x_sample in enumerate(X_train):
        count += 1
        #print(x_ind, Y_train[x_ind])

        if Y_train[x_ind] == 0:
            global_family_dict["goodware"].append(x_sample)
        if Y_train[x_ind] == 1:
            if Y_train_family[x_ind] == '':
                global_family_dict["others_family"].append(x_sample)
            else:
                global_family_dict[Y_train_family[x_ind]].append(x_sample)

    print(f'Task {task_month} and #-of new samples stored {count}')
    
    return global_family_dict



def get_replay_samples(global_family_dict, num_samples_per_malware_family):
    pre_malware_samples = []

    cnt = 0
    for k in global_family_dict.keys():
        if k != 'goodware':
            cnt += 1
            if num_samples_per_malware_family > len(global_family_dict[k]):
                selected_family_samples = random.sample(global_family_dict[k], len(global_family_dict[k]))
            else:
                selected_family_samples = random.sample(global_family_dict[k], num_samples_per_malware_family)

            #print(selected_family_samples)
            for sample in selected_family_samples:
                pre_malware_samples.append(sample)
                
    if len(global_family_dict['goodware']) < len(pre_malware_samples):
        pre_goodware_samples = random.sample(global_family_dict['goodware'], len(global_family_dict['goodware']))
    else:
        pre_goodware_samples = random.sample(global_family_dict['goodware'], len(pre_malware_samples))

    samples_to_replay = np.concatenate((np.array(pre_goodware_samples), np.array(pre_malware_samples)))
    labels_to_replay = np.concatenate((np.zeros(len(pre_goodware_samples)), np.ones(len(pre_malware_samples))))


    print(f'X_replay {samples_to_replay.shape} Y_replay {labels_to_replay.shape}')
    #print(f'Replay {len(pre_malware_samples)} malware samples of {len(global_family_dict.keys()) -1} families')
    #print(f'and Replay {len(pre_goodware_samples)} goodware samples')
    
    
    return samples_to_replay, labels_to_replay




all_task_months = ['2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06',
                   '2018-07', '2018-08', '2018-09', '2018-10', '2018-11', '2018-12']

data_dir = '../../month_based_processing_with_family_labels/'
num_samples_per_malware_family = 1


replay_samples_count = []
num_new_samples_per_task = []

stored_global_family_dict = defaultdict(list)
for task_month in range(len(all_task_months)):

    #print(f'\n{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} Round {cnt} ...')
    task_start = time.time()

    current_task = all_task_months[task_month]
    task_months = all_task_months[:task_month+1]
    print(f'Current Task {current_task} w/ {num_samples_per_malware_family} samples to Replay per Malware family.')


    X_train, Y_train, Y_train_family = get_family_labeled_month_data(data_dir, current_task)
    X_test, Y_test, Y_test_family = get_family_labeled_task_test_data(data_dir, task_months, mlp_net=True)
    
    num_new_samples_per_task.append(len(Y_train))
    
    #if current_task == all_task_months[0]:
    #        stored_global_family_dict = make_family_based_dict(X_train, Y_train, Y_train_family,\
    #                                                           current_task, stored_global_family_dict)
    #else:
    #        X_replay, Y_replay = get_replay_samples(stored_global_family_dict, num_samples_per_malware_family)
    #        replay_samples_count.append(len(Y_replay))
    #        stored_global_family_dict = make_family_based_dict(X_train, Y_train, Y_train_family,\
    #                                                           current_task, stored_global_family_dict)
            
    print(f'*****************   *****************   *****************')
    print()
    

print()
print(f'{replay_samples_count}')

Current Task 2018-01 w/ 1 samples to Replay per Malware family.
*****************   *****************   *****************

Current Task 2018-02 w/ 1 samples to Replay per Malware family.
*****************   *****************   *****************

Current Task 2018-03 w/ 1 samples to Replay per Malware family.
*****************   *****************   *****************

Current Task 2018-04 w/ 1 samples to Replay per Malware family.
*****************   *****************   *****************

Current Task 2018-05 w/ 1 samples to Replay per Malware family.
*****************   *****************   *****************

Current Task 2018-06 w/ 1 samples to Replay per Malware family.
*****************   *****************   *****************

Current Task 2018-07 w/ 1 samples to Replay per Malware family.
*****************   *****************   *****************

Current Task 2018-08 w/ 1 samples to Replay per Malware family.
*****************   *****************   *****************

Current Task 201

In [23]:
num_new_samples_per_task = np.array(num_new_samples_per_task)

In [46]:
joint_rep = np.zeros(12)

for indx, x in enumerate(num_new_samples_per_task):
    #print(indx)
    if indx == 0:
        print('fer')
        joint_rep[indx] = 0.0
    elif indx == 1:
        print('her')
        joint_rep[indx] = num_new_samples_per_task[indx - 1]
    else:
        print('ser')
        joint_rep[indx] = joint_rep[indx - 1] + x
        
print(joint_rep)
print(joint_rep*.20)
print(joint_rep*.50)

fer
her
ser
ser
ser
ser
ser
ser
ser
ser
ser
ser
[     0.  55722.  93094. 139967. 181287. 224847. 271125. 312007. 368499.
 446271. 536271. 626271.]
[     0.   11144.4  18618.8  27993.4  36257.4  44969.4  54225.   62401.4
  73699.8  89254.2 107254.2 125254.2]
[     0.   27861.   46547.   69983.5  90643.5 112423.5 135562.5 156003.5
 184249.5 223135.5 268135.5 313135.5]


In [4]:
all_task_months = ['2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06',
                   '2018-07', '2018-08', '2018-09', '2018-10', '2018-11', '2018-12']

data_dir = '../../month_based_processing_with_family_labels/'


patience = 5
replay_type = 'partial_joint_replay'


num_exps = 1 #args.num_exps
#task_month = args.task_month
num_epoch = 500 #args.num_epoch
batch_size = 6000 #args.batch_size
#replay_portion = args.replay_portion
num_samples_per_malware_family = 5000

exp_seeds = [random.randint(1, 99999) for i in range(num_exps)]


allexps_acc = {}
allexps_rocauc = {}
allexps_training_time = {}
all_exps_best_epoch = {}



cnt =  1    
for exp in exp_seeds:
    start_time = time.time()
    use_cuda = True
    print('Torch', torch.__version__, 'CUDA', torch.version.cuda)
    use_cuda = use_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    torch.manual_seed(exp)

    model = Ember_MLP_Net()
    
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.000001)
       
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)
    
    model = model.to(device)
    print(f'Model has {count_parameters(model)/1000000}m parameters')    
    criterion = nn.BCELoss()    

    
    
    stored_global_family_dict = defaultdict(list)
    
    standardization = StandardScaler()
    standard_scaler = None
    for task_month in range(len(all_task_months)):
                
        print(f'\n{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} Round {cnt} ...')
        task_start = time.time()
        
        current_task = all_task_months[task_month]
        task_months = all_task_months[:task_month+1]
        print(f'Current Task {current_task} w/ {num_samples_per_malware_family} samples to Replay per Malware family.')


        model_save_dir = '../pjr_saved_model/PJR_replay_' + str(num_samples_per_malware_family) + '/' + str(current_task) + '/'
        create_parent_folder(model_save_dir)
        
        results_save_dir = './saved_results/PJR_replay_' + str(num_samples_per_malware_family) + '/' 
        create_parent_folder(results_save_dir)

        
        X_train, Y_train, Y_train_family = get_family_labeled_month_data(data_dir, current_task)
        X_test, Y_test, Y_test_family = get_family_labeled_task_test_data(data_dir, task_months, mlp_net=True)
        
        # to debug
        #X_train, Y_train, Y_train_family = X_train[:500], Y_train [:500], Y_train_family[:500]
        #X_test, Y_test, Y_test_family = X_test[:50], Y_test[:50], Y_test_family[:50]
        
        print(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} Standardizing ...')
        
        standard_scaler = standardization.partial_fit(X_train)

        X_train = standard_scaler.transform(X_train)
        X_test = standard_scaler.transform(X_test)

        X_train, Y_train = np.array(X_train, np.float32), np.array(Y_train, np.int32)
        X_test, Y_test = np.array(X_test, np.float32), np.array(Y_test, np.int32)        
        
        
        
        
        if current_task == all_task_months[0]:
            stored_global_family_dict = make_family_based_dict(X_train, Y_train, Y_train_family,\
                                                               current_task, stored_global_family_dict)
        else:
            X_replay, Y_replay = get_replay_samples(stored_global_family_dict, num_samples_per_malware_family)
            stored_global_family_dict = make_family_based_dict(X_train, Y_train, Y_train_family,\
                                                               current_task, stored_global_family_dict)
        
        
        if current_task == all_task_months[0]:
            print(f'Initial Task {current_task} X_train {X_train.shape} Y_train {Y_train.shape}')
            print(f'************** ************** **************')
            print()
        else:
            print(f'W/O replay samples \n X_train {X_train.shape} Y_train {Y_train.shape}')
            X_train, Y_train = np.concatenate((X_train, X_replay)), np.concatenate((Y_train, Y_replay))
            print(f'With replay samples \n X_train {X_train.shape} Y_train {Y_train.shape}')
            print(f'************** ************** **************')
            print()
        
        
        
        print(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} Training ...')
        task_training_time, epoch_ran, training_loss, validation_loss  = training_early_stopping(model, model_save_dir,\
                                X_train, Y_train, X_test, Y_test, patience,\
                                batch_size, device, optimizer, num_epoch,\
                                 criterion, replay_type, current_task, exp, earlystopping=True)

        acc, rocauc = testing_aucscore(model, X_test, Y_test, batch_size, device)

        end_time = time.time()

        print(f'Elapsed time {(end_time - start_time)/60} mins.')    


        task_end = time.time()
        task_run_time = (task_end - task_start)/60
        
        try:
            allexps_acc[str(current_task)].append(acc)
            allexps_rocauc[str(current_task)].append(rocauc)
            allexps_training_time[str(current_task)].append(task_run_time)
            all_exps_best_epoch[str(current_task)].append(epoch_ran)
        except:
            allexps_acc[str(current_task)] = [acc]
            allexps_rocauc[str(current_task)] = [rocauc]
            allexps_training_time[str(current_task)] = [task_run_time]
            all_exps_best_epoch[str(current_task)] = [epoch_ran]
        
        
        
        results_f = open(os.path.join(results_save_dir + 'results_accumulated_replay_' + str(num_samples_per_malware_family) + '_results.txt'), 'a')
        result_string = '{}\t{}\t{}\t{}\t{}\t\n'.format(current_task,epoch_ran, task_training_time, acc, rocauc)
        results_f.write(result_string)
        results_f.flush()
        results_f.close()

        
        wf = open(os.path.join(results_save_dir + 'Results_' + str(current_task) + '_' + str(num_epoch) + '_replay_' + str(num_samples_per_malware_family) + '_results.txt'), 'a')
        task_exp_string = '\n\nSeed\t{}\t\tRun time\t{}\tAcc:\t{}\t\tROC_AUC:\t{}\n\tepoch_ran\t{}\t\n\ntraining_loss\t{}\n\nValid_loss\t{}\n\n'.format(exp,task_training_time, acc, rocauc, epoch_ran, training_loss, validation_loss)
        
        wf.write('\n ########################### ########################### ###########################\n')
        wf.write(str(model))
        wf.write(task_exp_string)
        
        wf.flush()
        wf.close()

    end_time = time.time()
    cnt += 1
    print(f'Elapsed time {(end_time - start_time)/60} mins.')
    
results_save_dir = './saved_results/PJR_replay_' + str(num_samples_per_malware_family) + '/' 
create_parent_folder(results_save_dir)

all_results_save_file = results_save_dir + 'PJR_acc_rocauc_tr_time_best_epoch_' + str(args.num_run) + '.npz'

np.savez_compressed(all_results_save_file,
                        accuracy = allexps_acc, rocauc = allexps_rocauc, tr_time = allexps_training_time, best_epochs = all_exps_best_epoch)
print(f'all results saved')


Torch 1.6.0 CUDA 10.2
Model has 3.132161m parameters

2022-09-20 10:38:01 Round 1 ...
Current Task 2018-01 w/ 5000 samples to Replay per Malware family.
X_train (55722, 2381) Y_train (55722,) Y_tr_family (55722,)
X_test (6192, 2381) Y_test (6192,) Y_te_family (6192,)
2022-09-20 10:38:02 Standardizing ...


  0%|          | 0/9 [00:00<?, ?it/s]

Task 2018-01 and #-of new samples stored 55722
Initial Task 2018-01 X_train (55722, 2381) Y_train (55722,)
************** ************** **************

2022-09-20 10:38:05 Training ...
Epoch 1 of 500


100%|██████████| 9/9 [00:01<00:00,  5.51it/s]
100%|██████████| 2/2 [00:00<00:00,  6.09it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.7039, Train Acc: 53.01
Val Loss: 0.6807, Val Acc: 69.41
Validation loss decreased (inf --> 0.680662).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_1.pt
Epoch 2 of 500


100%|██████████| 9/9 [00:01<00:00,  6.09it/s]
100%|██████████| 2/2 [00:00<00:00,  5.34it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.6592, Train Acc: 61.92
Val Loss: 0.6331, Val Acc: 75.55
Validation loss decreased (0.680662 --> 0.633099).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_2.pt
Epoch 3 of 500


100%|██████████| 9/9 [00:01<00:00,  6.38it/s]
100%|██████████| 2/2 [00:00<00:00,  5.30it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.6085, Train Acc: 68.45
Val Loss: 0.5747, Val Acc: 77.15
Validation loss decreased (0.633099 --> 0.574686).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_3.pt
Epoch 4 of 500


100%|██████████| 9/9 [00:01<00:00,  6.47it/s]
100%|██████████| 2/2 [00:00<00:00,  5.47it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.5556, Train Acc: 72.58
Val Loss: 0.4838, Val Acc: 81.76
Validation loss decreased (0.574686 --> 0.483850).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_4.pt
Epoch 5 of 500


100%|██████████| 9/9 [00:01<00:00,  5.49it/s]
100%|██████████| 2/2 [00:00<00:00,  5.36it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.4987, Train Acc: 76.02
Val Loss: 0.4500, Val Acc: 78.71
Validation loss decreased (0.483850 --> 0.450000).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_5.pt
Epoch 6 of 500


100%|██████████| 9/9 [00:01<00:00,  7.20it/s]
100%|██████████| 2/2 [00:00<00:00,  5.51it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.4605, Train Acc: 78.01
Val Loss: 0.3819, Val Acc: 82.65
Validation loss decreased (0.450000 --> 0.381923).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_6.pt
Epoch 7 of 500


100%|██████████| 9/9 [00:01<00:00,  6.48it/s]
100%|██████████| 2/2 [00:00<00:00,  5.35it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.4232, Train Acc: 79.93
Val Loss: 0.3538, Val Acc: 84.62
Validation loss decreased (0.381923 --> 0.353795).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_7.pt
Epoch 8 of 500


100%|██████████| 9/9 [00:01<00:00,  7.37it/s]
100%|██████████| 2/2 [00:00<00:00,  5.45it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.3855, Train Acc: 82.16
Val Loss: 0.3322, Val Acc: 87.19
Validation loss decreased (0.353795 --> 0.332237).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_8.pt
Epoch 9 of 500


100%|██████████| 9/9 [00:01<00:00,  6.37it/s]
100%|██████████| 2/2 [00:00<00:00,  5.26it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.3584, Train Acc: 83.99
Val Loss: 0.2824, Val Acc: 89.75
Validation loss decreased (0.332237 --> 0.282358).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_9.pt
Epoch 10 of 500


100%|██████████| 9/9 [00:01<00:00,  7.39it/s]
100%|██████████| 2/2 [00:00<00:00,  5.28it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.3314, Train Acc: 85.44
Val Loss: 0.2788, Val Acc: 88.47
Validation loss decreased (0.282358 --> 0.278834).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_10.pt
Epoch 11 of 500


100%|██████████| 9/9 [00:01<00:00,  6.64it/s]
100%|██████████| 2/2 [00:00<00:00,  5.25it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.3076, Train Acc: 86.64
Val Loss: 0.2815, Val Acc: 88.26
EarlyStopping counter: 1 out of 5
Epoch 12 of 500


100%|██████████| 9/9 [00:01<00:00,  6.52it/s]
100%|██████████| 2/2 [00:00<00:00,  5.30it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.2893, Train Acc: 87.54
Val Loss: 0.2788, Val Acc: 87.04
Validation loss decreased (0.278834 --> 0.278775).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_12.pt
Epoch 13 of 500


100%|██████████| 9/9 [00:01<00:00,  6.59it/s]
100%|██████████| 2/2 [00:00<00:00,  5.28it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.2728, Train Acc: 88.40
Val Loss: 0.2366, Val Acc: 90.05
Validation loss decreased (0.278775 --> 0.236636).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_13.pt
Epoch 14 of 500


100%|██████████| 9/9 [00:01<00:00,  7.25it/s]
100%|██████████| 2/2 [00:00<00:00,  5.49it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.2601, Train Acc: 89.14
Val Loss: 0.2015, Val Acc: 91.70
Validation loss decreased (0.236636 --> 0.201539).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_14.pt
Epoch 15 of 500


100%|██████████| 9/9 [00:01<00:00,  6.39it/s]
100%|██████████| 2/2 [00:00<00:00,  5.42it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.2471, Train Acc: 89.52
Val Loss: 0.2338, Val Acc: 90.24
EarlyStopping counter: 1 out of 5
Epoch 16 of 500


100%|██████████| 9/9 [00:01<00:00,  5.32it/s]
100%|██████████| 2/2 [00:00<00:00,  5.27it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.2403, Train Acc: 89.99
Val Loss: 0.2075, Val Acc: 91.37
EarlyStopping counter: 2 out of 5
Epoch 17 of 500


100%|██████████| 9/9 [00:01<00:00,  6.56it/s]
100%|██████████| 2/2 [00:00<00:00,  5.28it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.2331, Train Acc: 90.37
Val Loss: 0.1934, Val Acc: 91.79
Validation loss decreased (0.201539 --> 0.193431).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_17.pt
Epoch 18 of 500


100%|██████████| 9/9 [00:01<00:00,  6.56it/s]
100%|██████████| 2/2 [00:00<00:00,  5.29it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.2250, Train Acc: 90.87
Val Loss: 0.2167, Val Acc: 90.62
EarlyStopping counter: 1 out of 5
Epoch 19 of 500


100%|██████████| 9/9 [00:01<00:00,  6.52it/s]
100%|██████████| 2/2 [00:00<00:00,  5.32it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.2165, Train Acc: 91.20
Val Loss: 0.1775, Val Acc: 92.67
Validation loss decreased (0.193431 --> 0.177496).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_19.pt
Epoch 20 of 500


100%|██████████| 9/9 [00:01<00:00,  6.50it/s]
100%|██████████| 2/2 [00:00<00:00,  5.33it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.2068, Train Acc: 91.70
Val Loss: 0.1719, Val Acc: 92.32
Validation loss decreased (0.177496 --> 0.171850).  Saving model ...
../pjr_saved_model/PJR_replay_5000/2018-01/best_model_epoch_20.pt
Epoch 21 of 500


100%|██████████| 9/9 [00:01<00:00,  7.19it/s]
100%|██████████| 2/2 [00:00<00:00,  5.44it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.2037, Train Acc: 91.86
Val Loss: 0.2051, Val Acc: 92.25
EarlyStopping counter: 1 out of 5
Epoch 22 of 500


100%|██████████| 9/9 [00:01<00:00,  5.62it/s]
100%|██████████| 2/2 [00:00<00:00,  5.50it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.2008, Train Acc: 91.93
Val Loss: 0.1983, Val Acc: 91.88
EarlyStopping counter: 2 out of 5
Epoch 23 of 500


100%|██████████| 9/9 [00:01<00:00,  6.25it/s]
100%|██████████| 2/2 [00:00<00:00,  5.34it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.1939, Train Acc: 92.38
Val Loss: 0.1889, Val Acc: 92.66
EarlyStopping counter: 3 out of 5
Epoch 24 of 500


100%|██████████| 9/9 [00:01<00:00,  6.39it/s]
100%|██████████| 2/2 [00:00<00:00,  5.46it/s]
  0%|          | 0/9 [00:00<?, ?it/s]

Train Loss: 0.1933, Train Acc: 92.27
Val Loss: 0.1821, Val Acc: 92.51
EarlyStopping counter: 4 out of 5
Epoch 25 of 500


100%|██████████| 9/9 [00:01<00:00,  5.66it/s]
100%|██████████| 2/2 [00:00<00:00,  5.23it/s]
100%|██████████| 2/2 [00:00<00:00, 12.36it/s]


Train Loss: 0.1857, Train Acc: 92.61
Val Loss: 0.2000, Val Acc: 92.14
EarlyStopping counter: 5 out of 5
Early stopping
Training time: 0.773 minutes
0.9278100775193798 0.9818103702095049
Elapsed time 0.8428683559099833 mins.

2022-09-20 10:38:51 Round 1 ...
Current Task 2018-02 w/ 5000 samples to Replay per Malware family.
X_train (48723, 2381) Y_train (48723,) Y_tr_family (48723,)
X_test (11606, 2381) Y_test (11606,) Y_te_family (11606,)
2022-09-20 10:38:52 Standardizing ...


ValueError: Sample larger than population or is negative