In [2]:
import numpy as np
import os
# import pandas as pd
import matplotlib.pyplot as plt
import time, random
from tqdm import tqdm
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler, QuantileTransformer
from sklearn.model_selection import train_test_split
import datetime
import argparse
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, utils, datasets
from torch.utils.data import Dataset, DataLoader

from ember_utils import *
from ember_model import *
from ember_pjr_utils import *

In [3]:
raw_path = '/home/mr6564/continual_research/AZ_Data/Domain/'
train_file = raw_path + 'All_Domain_AZ_Train.npz'
test_file = raw_path + 'All_Domain_AZ_Test.npz'


train_data = np.load(train_file, allow_pickle=True)
test_data = np.load(test_file, allow_pickle=True)

X_train, Y_train, Y_tr_family_year = train_data['X_train'], train_data['Y_train'], train_data['Y_tr_family']
X_test, Y_test, Y_te_family_year = test_data['X_test'], test_data['Y_test'], test_data['Y_te_family']


In [9]:
class Ember_MLP_Net(nn.Module):
    def __init__(self, input_features):
        super(Ember_MLP_Net, self).__init__()
        
        self.fc1 = nn.Linear(input_features, 1024)
        #self.fc1_bn = nn.BatchNorm1d(1024)
        self.act1 = nn.ReLU()
        #self.fc1_drop = nn.Dropout(p=0.5)
        
        self.fc2 = nn.Linear(1024, 512)
        #self.fc2_bn = nn.BatchNorm1d(512)
        self.act2 = nn.ReLU()
        #self.fc2_drop = nn.Dropout(p=0.5)
        
        self.fc3 = nn.Linear(512, 256)
        #self.fc3_bn = nn.BatchNorm1d(256)
        self.act3 = nn.ReLU()
        #self.fc3_drop = nn.Dropout(p=0.5)        
        
        self.fc4 = nn.Linear(256, 128)
        #self.fc4_bn = nn.BatchNorm1d(128)
        self.act4 = nn.ReLU()
        #self.fc4_drop = nn.Dropout(p=0.5)  
        
        self.fc_last = nn.Linear(128, 1) 
        self.out = nn.Sigmoid()
        
        #self.activate = nn.ReLU()

    def forward(self, x):
        x = x.view(x.size(0), -1)
        #print(x.shape)
        x = self.fc1(x)
        #x = self.fc1_bn(x)
        x = self.act1(x) 
        #x = self.fc1_drop(x)

        x = self.fc2(x)
        #x = self.fc2_bn(x)
        x = self.act2(x) 
        #x = self.fc2_drop(x)
        
        x = self.fc3(x)
        #x = self.fc3_bn(x)
        x = self.act3(x) 
        #x = self.fc3_drop(x)
        
        x = self.fc4(x)
        #x = self.fc4_bn(x)
        x = self.act4(x)
        #x = self.fc4_drop(x)
        
        x = self.fc_last(x)
        x = self.out(x)
        return x






exp_seeds = [random.randint(1, 99999) for i in range(1)]


accs_all = []
rocauc_all = []

num_epoch = 500
batch_size = 512
patience = 5


input_features = X_train.shape[1]

replay_type, current_task = 'azdomain', 'azdomain'

for exp in exp_seeds:

    start_time = time.time()
    use_cuda = True
    print('Torch', torch.__version__, 'CUDA', torch.version.cuda)
    use_cuda = use_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    torch.manual_seed(exp)

    model = Ember_MLP_Net(input_features)
    #optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.000001)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
       
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)
    
    model = model.to(device)
    print(f'Model has {count_parameters(model)/1000000}m parameters')    
    criterion = nn.BCELoss()    

    
#     standardization = StandardScaler()
#     standard_scaler = standardization.fit(X_train)

#     X_train = standard_scaler.transform(X_train)
#     X_test = standard_scaler.transform(X_test)
    
    X_train, Y_train = np.array(X_train, np.float32), np.array(Y_train, np.int32)
    X_test, Y_test = np.array(X_test, np.float32), np.array(Y_test, np.int32)  

    
    
    model_save_dir = '../az_model/model/'
    create_parent_folder(model_save_dir)

    opt_save_path = '../az_model/opt/'
    create_parent_folder(opt_save_path)

    results_save_dir =  '../az_model/res/' 
    create_parent_folder(results_save_dir)

    print(f'X_train {X_train.shape} Y_train {Y_train.shape}')
    print(f'X_test {X_test.shape} Y_test {Y_test.shape}')
    
    
    task_training_time, epoch_ran, training_loss, validation_loss  = training_early_stopping(\
                                 model, model_save_dir, opt_save_path, X_train, Y_train,\
                                 X_test, Y_test, patience, batch_size, device, optimizer, num_epoch,\
                                 criterion, replay_type, current_task, exp, earlystopping=True)

    
    end_time = time.time()
    print(f'Elapsed time {(end_time - start_time)/60} mins.') 
    
    
    
    best_model_path = model_save_dir + os.listdir(model_save_dir)[0]
    print(f'loading best model {best_model_path}')
    model.load_state_dict(torch.load(best_model_path))

    #optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.000001)
    best_optimizer = opt_save_path + os.listdir(opt_save_path)[0]
    print(f'loading best optimizer {best_optimizer}')
    optimizer.load_state_dict(torch.load(best_optimizer))


    acc, precision, recall, f1score = testing_aucscore(model, X_test, Y_test, batch_size, device)
    print()
    del model_save_dir
    del opt_save_path
    del results_save_dir
    
#     accs_all.append(acc)
#     rocauc_all.append(rocauc)



Torch 2.0.1 CUDA 11.8
Model has 3.720193m parameters
X_train (682598, 2959) Y_train (682598,)
X_test (75848, 2959) Y_test (75848,)
Epoch 1 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:15<00:00, 88.17it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 108.70it/s]


Train Loss: 0.6931, Train Acc: 0.5009
Val Loss: 0.6933, Val Acc: 0.4641
Validation loss decreased (inf --> 0.693308).  Saving model ...
../az_model/best_model_epoch_1.pt
../az_model/best_optimizer_epoch_1.pt
Epoch 2 of 500


100%|██████████████████████████████████████████████████████████| 1333/1333 [00:12<00:00, 104.01it/s]
100%|█████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 99.83it/s]


Train Loss: 0.6931, Train Acc: 0.5060
Val Loss: 0.6916, Val Acc: 0.8509
Validation loss decreased (0.693308 --> 0.691646).  Saving model ...
../az_model/best_model_epoch_2.pt
../az_model/best_optimizer_epoch_2.pt
Epoch 3 of 500


100%|██████████████████████████████████████████████████████████| 1333/1333 [00:13<00:00, 102.45it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 102.68it/s]


Train Loss: 0.6931, Train Acc: 0.5046
Val Loss: 0.6940, Val Acc: 0.2701
EarlyStopping counter: 1 out of 5
Epoch 4 of 500


100%|██████████████████████████████████████████████████████████| 1333/1333 [00:12<00:00, 108.98it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 123.34it/s]


Train Loss: 0.6931, Train Acc: 0.5058
Val Loss: 0.6948, Val Acc: 0.1623
EarlyStopping counter: 2 out of 5
Epoch 5 of 500


100%|██████████████████████████████████████████████████████████| 1333/1333 [00:12<00:00, 105.14it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 108.88it/s]


Train Loss: 0.6931, Train Acc: 0.5066
Val Loss: 0.6929, Val Acc: 0.5798
EarlyStopping counter: 3 out of 5
Epoch 6 of 500


100%|██████████████████████████████████████████████████████████| 1333/1333 [00:12<00:00, 106.63it/s]
100%|█████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 98.36it/s]


Train Loss: 0.6931, Train Acc: 0.5102
Val Loss: 0.6938, Val Acc: 0.4080
EarlyStopping counter: 4 out of 5
Epoch 7 of 500


100%|██████████████████████████████████████████████████████████| 1333/1333 [00:12<00:00, 104.98it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 114.72it/s]


Train Loss: 0.6931, Train Acc: 0.5086
Val Loss: 0.6916, Val Acc: 0.7596
EarlyStopping counter: 5 out of 5
Early stopping
Training time: 1.683 minutes
Elapsed time 1.7526292045911154 mins.
loading best model ../az_model/best_model_epoch_2.pt
loading best optimizer ../az_model/best_model_epoch_2.pt


KeyError: 'param_groups'

In [None]:
model

In [5]:
best_optimizer = opt_save_path + os.listdir(opt_save_path)[0]


In [8]:
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)

In [7]:
torch.load(best_optimizer)

OrderedDict([('fc1.weight',
              tensor([[ 0.0042, -0.0176,  0.0149,  ..., -0.0178, -0.0115,  0.0123],
                      [ 0.1327,  0.0044,  0.1471,  ...,  0.0402,  0.1023, -0.0720],
                      [ 0.0067,  0.0534,  0.0393,  ..., -0.0618, -0.0373,  0.0042],
                      ...,
                      [-0.0147, -0.0301,  0.0096,  ...,  0.0037, -0.0127, -0.0127],
                      [-0.0687,  0.0553, -0.2438,  ..., -0.0508, -0.0132,  0.0895],
                      [-0.0018,  0.1080,  0.0264,  ...,  0.0352,  0.0104, -0.0060]],
                     device='cuda:0')),
             ('fc1.bias',
              tensor([-0.0549, -0.0018, -0.0854,  ..., -0.0033,  0.0239, -0.0687],
                     device='cuda:0')),
             ('fc2.weight',
              tensor([[-0.0189, -0.0295,  0.0117,  ...,  0.0259, -0.0872, -0.0226],
                      [ 0.0567,  0.0365,  0.0364,  ..., -0.0423, -0.1114, -0.0538],
                      [-0.0182, -0.0354, -0.0076,  ...,

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

print(f'loading best optimizer {best_optimizer}')
optimizer.load_state_dict(torch.load(best_optimizer))

In [11]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import time, random
from tqdm import tqdm
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler, QuantileTransformer
import datetime
import argparse
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, utils, datasets
from torch.utils.data import Dataset, DataLoader

from ember_utils import *
from ember_model import *
from ember_pjr_utils import *


start_time = time.time()
use_cuda = True
print('Torch', torch.__version__, 'CUDA', torch.version.cuda)
use_cuda = use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
torch.manual_seed(exp)

model = Ember_MLP_Net(input_features)
#optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.000001)

if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model)

model = model.to(device)
print(f'Model has {count_parameters(model)/1000000}m parameters')    
criterion = nn.BCELoss()    
    

model_save_dir = '../az_model/model/'
create_parent_folder(model_save_dir)

opt_save_path = '../az_model/opt/'
create_parent_folder(opt_save_path)

results_save_dir =  '../az_model/res/' 
create_parent_folder(results_save_dir)

# X_train = standard_scaler.transform(X_train)
# X_test = standard_scaler.transform(X_test)


X_train, Y_train = np.array(X_train, np.float32), np.array(Y_train, np.int32)
X_test, Y_test = np.array(X_test, np.float32), np.array(Y_test, np.int32)        

print(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} Training ...')



task_training_time, epoch_ran, training_loss, validation_loss  = training_early_stopping(\
                     model, model_save_dir, opt_save_path, X_train, Y_train,\
                     X_test, Y_test, patience, batch_size, device, optimizer, num_epoch,\
                     criterion, replay_type, current_task, exp, earlystopping=True)
        

#model = Ember_MLP_Net()
#model = model.to(device)
#load the best model for this task
best_model_path = model_save_dir + os.listdir(model_save_dir)[0]
print(f'loading best model {best_model_path}')
model.load_state_dict(torch.load(best_model_path))


#optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.000001)
best_optimizer = opt_save_path + os.listdir(opt_save_path)[0]
print(f'loading best optimizer {best_optimizer}')
optimizer.load_state_dict(torch.load(best_optimizer))

acc, precision, recall, f1score = testing_aucscore(model, X_test, Y_test, batch_size, device)



end_time = time.time()
print(f'Elapsed time {(end_time - start_time)/60} mins.')

del model_save_dir
del opt_save_path
del results_save_dir



Torch 2.0.1 CUDA 11.8
Model has 3.724033m parameters
2023-10-30 15:25:44 Training ...
Epoch 1 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:14<00:00, 90.97it/s]
100%|█████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 98.37it/s]


Train Loss: 0.6998, Train Acc: 0.5028
Val Loss: 0.6937, Val Acc: 0.5250
Validation loss decreased (inf --> 0.693733).  Saving model ...
../az_model/model/best_model_epoch_1.pt
../az_model/opt/best_optimizer_epoch_1.pt
Epoch 2 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:14<00:00, 93.41it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 121.56it/s]


Train Loss: 0.6933, Train Acc: 0.5029
Val Loss: 0.6864, Val Acc: 0.7771
Validation loss decreased (0.693733 --> 0.686436).  Saving model ...
../az_model/model/best_model_epoch_2.pt
../az_model/opt/best_optimizer_epoch_2.pt
Epoch 3 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:14<00:00, 93.09it/s]
100%|█████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 91.81it/s]


Train Loss: 0.6931, Train Acc: 0.5045
Val Loss: 0.6858, Val Acc: 0.7895
Validation loss decreased (0.686436 --> 0.685780).  Saving model ...
../az_model/model/best_model_epoch_3.pt
../az_model/opt/best_optimizer_epoch_3.pt
Epoch 4 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:13<00:00, 97.90it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 107.14it/s]


Train Loss: 0.6931, Train Acc: 0.5044
Val Loss: 0.6914, Val Acc: 0.6860
EarlyStopping counter: 1 out of 5
Epoch 5 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:14<00:00, 89.05it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 117.58it/s]


Train Loss: 0.6929, Train Acc: 0.5071
Val Loss: 0.6841, Val Acc: 0.7831
Validation loss decreased (0.685780 --> 0.684097).  Saving model ...
../az_model/model/best_model_epoch_5.pt
../az_model/opt/best_optimizer_epoch_5.pt
Epoch 6 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:15<00:00, 88.45it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 102.01it/s]


Train Loss: 0.6929, Train Acc: 0.5079
Val Loss: 0.6840, Val Acc: 0.7873
Validation loss decreased (0.684097 --> 0.683965).  Saving model ...
../az_model/model/best_model_epoch_6.pt
../az_model/opt/best_optimizer_epoch_6.pt
Epoch 7 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:14<00:00, 91.54it/s]
100%|█████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 92.77it/s]


Train Loss: 0.6927, Train Acc: 0.5095
Val Loss: 0.6834, Val Acc: 0.7893
Validation loss decreased (0.683965 --> 0.683361).  Saving model ...
../az_model/model/best_model_epoch_7.pt
../az_model/opt/best_optimizer_epoch_7.pt
Epoch 8 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:15<00:00, 87.01it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 105.07it/s]


Train Loss: 0.6926, Train Acc: 0.5102
Val Loss: 0.6846, Val Acc: 0.7960
EarlyStopping counter: 1 out of 5
Epoch 9 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:15<00:00, 87.69it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 107.02it/s]


Train Loss: 0.6925, Train Acc: 0.5110
Val Loss: 0.6811, Val Acc: 0.8059
Validation loss decreased (0.683361 --> 0.681071).  Saving model ...
../az_model/model/best_model_epoch_9.pt
../az_model/opt/best_optimizer_epoch_9.pt
Epoch 10 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:14<00:00, 91.02it/s]
100%|█████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 97.64it/s]


Train Loss: 0.6924, Train Acc: 0.5105
Val Loss: 0.6791, Val Acc: 0.8099
Validation loss decreased (0.681071 --> 0.679131).  Saving model ...
../az_model/model/best_model_epoch_10.pt
../az_model/opt/best_optimizer_epoch_10.pt
Epoch 11 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:15<00:00, 84.60it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 117.70it/s]


Train Loss: 0.6922, Train Acc: 0.5126
Val Loss: 0.6783, Val Acc: 0.8061
Validation loss decreased (0.679131 --> 0.678349).  Saving model ...
../az_model/model/best_model_epoch_11.pt
../az_model/opt/best_optimizer_epoch_11.pt
Epoch 12 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:14<00:00, 90.85it/s]
100%|█████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 87.77it/s]


Train Loss: 0.6922, Train Acc: 0.5130
Val Loss: 0.6805, Val Acc: 0.7994
EarlyStopping counter: 1 out of 5
Epoch 13 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:14<00:00, 93.73it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 106.71it/s]


Train Loss: 0.6921, Train Acc: 0.5128
Val Loss: 0.6869, Val Acc: 0.7855
EarlyStopping counter: 2 out of 5
Epoch 14 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:14<00:00, 89.96it/s]
100%|█████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 99.72it/s]


Train Loss: 0.6920, Train Acc: 0.5134
Val Loss: 0.6717, Val Acc: 0.8190
Validation loss decreased (0.678349 --> 0.671723).  Saving model ...
../az_model/model/best_model_epoch_14.pt
../az_model/opt/best_optimizer_epoch_14.pt
Epoch 15 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:13<00:00, 97.83it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 120.37it/s]


Train Loss: 0.6918, Train Acc: 0.5156
Val Loss: 0.6815, Val Acc: 0.7996
EarlyStopping counter: 1 out of 5
Epoch 16 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:16<00:00, 78.67it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 116.70it/s]


Train Loss: 0.6917, Train Acc: 0.5149
Val Loss: 0.6757, Val Acc: 0.8093
EarlyStopping counter: 2 out of 5
Epoch 17 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:13<00:00, 97.96it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 120.65it/s]


Train Loss: 0.6916, Train Acc: 0.5149
Val Loss: 0.6782, Val Acc: 0.8040
EarlyStopping counter: 3 out of 5
Epoch 18 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:13<00:00, 96.97it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 101.05it/s]


Train Loss: 0.6917, Train Acc: 0.5152
Val Loss: 0.6754, Val Acc: 0.8106
EarlyStopping counter: 4 out of 5
Epoch 19 of 500


100%|███████████████████████████████████████████████████████████| 1333/1333 [00:14<00:00, 91.45it/s]
100%|████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 115.83it/s]


Train Loss: 0.6916, Train Acc: 0.5157
Val Loss: 0.6780, Val Acc: 0.7994
EarlyStopping counter: 5 out of 5
Early stopping
Training time: 5.103 minutes
loading best model ../az_model/model/best_model_epoch_14.pt
loading best optimizer ../az_model/opt/best_optimizer_epoch_14.pt


100%|█████████████████████████████████████████████████████████████| 149/149 [00:01<00:00, 93.77it/s]


test accuracy 0.5177572707601842 and ROC-AUC 0.5188043549039545
Elapsed time 5.203850718339284 mins.
