In [1]:
import torch
import os
import sys

sys.path.insert(1, os.path.join(sys.path[0], 'src'))

processed_dir = '/data/pengmiao/PaCKD_0/processed'

train_loader = torch.load(os.path.join(processed_dir, f"bc-3.train.pt"))
test_loader = torch.load(os.path.join(processed_dir, f"bc-3.test.pt"))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sigmoid = torch.nn.Sigmoid()
import torch.nn.functional as F

def train(ep, train_loader, model_save_path):
    global steps
    epoch_loss = 0
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):#d,t: (torch.Size([64, 1, 784]),64)        
        optimizer.zero_grad()
        output = sigmoid(model(data))
        loss = F.binary_cross_entropy(output, target, reduction='mean')
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    epoch_loss/=len(train_loader)
    return epoch_loss


def test(test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = sigmoid(model(data))
            test_loss += F.binary_cross_entropy(output, target, reduction='mean').item()
            thresh=0.5
            output_bin=(output>=thresh)*1
            correct+=(output_bin&target.int()).sum()
        test_loss /=  len(test_loader)
        return test_loss

import csv

def run_epoch(epochs, early_stop, loading, model_save_path, train_loader, test_loader, tsv_path, model):
    if loading==True:
        model.load_state_dict(torch.load(model_save_path))
        print("-------------Model Loaded------------")
        
    best_loss=0
    early_stop = early_stop
    curr_early_stop = early_stop

    metrics_data = []

    for epoch in range(epochs):

        train_loss=train(epoch,train_loader,model_save_path)
        test_loss=test(test_loader)
        print((f"Epoch: {epoch+1} - loss: {train_loss:.10f} - test_loss: {test_loss:.10f}"))
        
        if epoch == 0:
            best_loss=test_loss
        if test_loss<=best_loss:
            torch.save(model.state_dict(), model_save_path)    
            best_loss=test_loss
            print("-------- Save Best Model! --------")
            curr_early_stop = early_stop
        else:
            curr_early_stop -= 1
            print("Early Stop Left: {}".format(curr_early_stop))
        if curr_early_stop == 0:
            print("-------- Early Stop! --------")
            break

        metrics_data.append([epoch+1, train_loss, test_loss])

    with open(tsv_path, 'w') as file:
        writer = csv.writer(file, delimiter='\t')
        writer.writerow(['Epoch', 'Train Loss', 'Test Loss'])
        writer.writerows(metrics_data)

Baseline Teacher R Training No Cluster No KD, lr = 0.0001, gamma = 0.05

In [9]:
from torch.optim.lr_scheduler import StepLR
import torch.optim as optim

from data_loader import init_dataloader
from utils import select_tch

device = torch.device(f"cuda:5" if torch.cuda.is_available() else "cpu")

model = select_tch('r')
model = model.to(device)

optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = StepLR(optimizer, step_size=20, gamma=0.05)

epochs = 50
early_stop = 15
loading = False
model_save_path = '/data/pengmiao/PaCKD_1/model/bc-3.teacher.gamma.5.r.pth'
tsv_path = '/data/pengmiao/PaCKD_1/model/bc-3.teacher.gamma.5.r.tsv'

init_dataloader('5')

run_epoch(epochs, early_stop, loading, model_save_path, train_loader, test_loader, tsv_path, model)

Epoch: 1 - loss: 0.2249865555 - test_loss: 0.2179443322
-------- Save Best Model! --------
Epoch: 2 - loss: 0.2175688889 - test_loss: 0.2289805716
Early Stop Left: 14
Epoch: 3 - loss: 0.2174327775 - test_loss: 0.2206873246
Early Stop Left: 13
Epoch: 4 - loss: 0.2168335402 - test_loss: 0.2319437766
Early Stop Left: 12
Epoch: 5 - loss: 0.2145503027 - test_loss: 0.2275580301
Early Stop Left: 11
Epoch: 6 - loss: 0.2144655880 - test_loss: 0.2461138607
Early Stop Left: 10
Epoch: 7 - loss: 0.2136679025 - test_loss: 0.2222226236
Early Stop Left: 9
Epoch: 8 - loss: 0.2151345382 - test_loss: 0.2168030499
-------- Save Best Model! --------
Epoch: 9 - loss: 0.2149396160 - test_loss: 0.2211324256
Early Stop Left: 14
Epoch: 10 - loss: 0.2154008062 - test_loss: 0.1998694631
-------- Save Best Model! --------
Epoch: 11 - loss: 0.2167794622 - test_loss: 0.2171379208
Early Stop Left: 14
Epoch: 12 - loss: 0.2174248653 - test_loss: 0.2116057643
Early Stop Left: 13
Epoch: 13 - loss: 0.2163505160 - test_los

In [4]:
from tqdm import tqdm
from numpy import nanargmax, sqrt
from sklearn.metrics import auc, f1_score, precision_score, recall_score, precision_recall_curve, roc_curve
import numpy as np
def threshold_throttleing(test_df,throttle_type="f1",optimal_type="micro",topk=2,threshold=0.5):
    y_score=np.stack(test_df["y_score"])
    y_real=np.stack(test_df["future"])
    best_threshold=0
    if throttle_type=="roc":
        print("throttleing by roc curve")
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        threshold=dict()
        best_threshold_list=[]
        gmeans=dict()
        ix=dict()
        #pdb.set_trace()
        for i in range(BITMAP_SIZE):
            fpr[i], tpr[i], threshold[i] =roc_curve(y_real[:,i],y_score[:,i])
            roc_auc[i] = auc(fpr[i],tpr[i])
            #best:
            gmeans[i] = sqrt(tpr[i]*(1-fpr[i]))
            ix[i]=nanargmax(gmeans[i])
            best_threshold_list.append(threshold[i][ix[i]])
            #print('Dimension: i=%d, Best threshold=%f, G-Mean=%.3f' %(i, threshold[i][ix[i]], gmeans[i][ix[i]]))
        if optimal_type=="indiv":
            best_threshold=best_threshold_list
            y_pred_bin = (y_score-np.array(best_threshold) >0)*1
            test_df["predicted"]= list(y_pred_bin)#(all,[length])
        elif optimal_type=="micro":
            fpr["micro"], tpr["micro"], threshold["micro"] = roc_curve(y_real.ravel(), y_score.ravel())
            roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
            #best:
            gmeans["micro"] = sqrt(tpr["micro"]*(1-fpr["micro"]))
            ix["micro"]=nanargmax(gmeans["micro"])
            best_threshold=threshold["micro"][ix["micro"]]
            print('Best micro threshold=%f, G-Mean=%.3f' %(best_threshold, gmeans["micro"][ix["micro"]]))
            
            y_pred_bin = (y_score-best_threshold >0)*1
            test_df["predicted"]= list(y_pred_bin)#(all,[length])
            
    if throttle_type=="f1":
        print("throttleing by precision-recall curve")
        p = dict()
        r = dict()
        threshold=dict()
        best_threshold_list=[]
        fscore=dict()
        ix=dict()
        
        p["micro"], r["micro"], threshold["micro"]=precision_recall_curve(y_real.ravel(),y_score.ravel())
        fscore["micro"] = (2 * p["micro"] * r["micro"]) / (p["micro"] + r["micro"])
        ix["micro"]=nanargmax(fscore["micro"])
        best_threshold=threshold["micro"][ix["micro"]]
        print('Best micro threshold=%f, fscore=%.3f' %(best_threshold, fscore["micro"][ix["micro"]]))
        y_pred_bin = (y_score-best_threshold >0)*1
        test_df["predicted"]= list(y_pred_bin)
        
    elif throttle_type=="topk":
        print("throttleing by topk:",topk)
        pred_index = torch.tensor(y_score).topk(topk)[1].cpu().detach().numpy()
        y_pred_bin=[to_bitmap(a,BITMAP_SIZE) for a in pred_index]
        test_df["predicted"]= list(y_pred_bin)
        
    elif throttle_type =="fixed_threshold":
        print("throttleing by fixed threshold:",threshold)
        best_threshold=threshold
        y_pred_bin = (y_score-np.array(best_threshold) >0)*1
        test_df["predicted"]= list(y_pred_bin)#(all,[length])
    
    return test_df, best_threshold

def model_prediction(test_loader, test_df, model_save_path):
    print("predicting")
    prediction = []
    model.load_state_dict(torch.load(model_save_path))
    model.to(device)
    model.eval()
    for data, _ in tqdm(test_loader):
        output = sigmoid(model(data))
        prediction.extend(output.cpu().detach().numpy())
    test_df["y_score"] = prediction

    return test_df[['id', 'cycle', 'addr', 'ip', 'block_address', 'future', 'y_score']]

def evaluate(y_test,y_pred_bin):
    f1_score_res=f1_score(y_test, y_pred_bin, average='micro')
    #recall: tp / (tp + fn)
    recall_score_res=recall_score(y_test, y_pred_bin, average='micro')
    #precision: tp / (tp + fp)
    precision_score_res=precision_score(y_test, y_pred_bin, average='micro',zero_division=0)
    print("p,r,f1:",precision_score_res,recall_score_res,f1_score_res)
    return precision_score_res,recall_score_res,f1_score_res

def run_val(test_loader, test_df, app_name, model_save_path):
    res = {}

    print("Validation start")
    test_df = model_prediction(test_loader, test_df, model_save_path)
    df_res, threshold=threshold_throttleing(test_df,throttle_type="f1",optimal_type="micro")
    p,r,f1 = evaluate(np.stack(df_res["future"]), np.stack(df_res["predicted"]))
    res["app"], res["opt_th"], res["p"], res["r"], res["f1"]=[app_name],[threshold],[p],[r],[f1]

    df_res, _ =threshold_throttleing(test_df,throttle_type="fixed_threshold",threshold=0.5)
    p,r,f1 = evaluate(np.stack(df_res["future"]), np.stack(df_res["predicted"]))
    res["p_5"],  res["r_5"], res["f1_5"]=[p],[r],[f1]
    
    return res

In [10]:
test_df = torch.load(os.path.join(processed_dir, 'bc-3.df.pt'))

In [11]:
res = run_val(test_loader, test_df, 'bc-3.txt.xz', model_save_path)

Validation start
predicting


100%|██████████| 1232/1232 [00:16<00:00, 74.24it/s]


throttleing by precision-recall curve
Best micro threshold=0.172694, fscore=0.248


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df["predicted"]= list(y_pred_bin)


p,r,f1: 0.15109066106576013 0.6904550293198635 0.2479278499265169
throttleing by fixed threshold: 0.5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df["predicted"]= list(y_pred_bin)#(all,[length])


p,r,f1: 0.8107001094735587 0.021543385897532564 0.04197143120421303


In [7]:
from torch.optim.lr_scheduler import StepLR
import torch.optim as optim

from data_loader import init_dataloader
from utils import select_tch

device = torch.device(f"cuda:2" if torch.cuda.is_available() else "cpu")

model = select_tch('r')
model = model.to(device)

optimizer = optim.Adam(model.parameters(), lr=0.00005)
scheduler = StepLR(optimizer, step_size=20, gamma=0.1)

epochs = 50
early_stop = 15
loading = False
model_save_path = '/data/pengmiao/PaCKD_1/model/bc-3.teacher.lr.05.r.pth'
tsv_path = '/data/pengmiao/PaCKD_1/model/bc-3.teacher.lr.05.r.tsv'

init_dataloader('2')

run_epoch(epochs, early_stop, loading, model_save_path, train_loader, test_loader, tsv_path, model)

Epoch: 1 - loss: 0.2302720435 - test_loss: 0.2320394492
-------- Save Best Model! --------
Epoch: 2 - loss: 0.2178523771 - test_loss: 0.2297895878
-------- Save Best Model! --------
Epoch: 3 - loss: 0.2163823675 - test_loss: 0.2294684086
-------- Save Best Model! --------
Epoch: 4 - loss: 0.2163723651 - test_loss: 0.2484645673
Early Stop Left: 14
Epoch: 5 - loss: 0.2165643955 - test_loss: 0.2276227242
-------- Save Best Model! --------
Epoch: 6 - loss: 0.2171162103 - test_loss: 0.2221536248
-------- Save Best Model! --------
Epoch: 7 - loss: 0.2167922092 - test_loss: 0.2344347408
Early Stop Left: 14
Epoch: 8 - loss: 0.2165621888 - test_loss: 0.2183304573
-------- Save Best Model! --------
Epoch: 9 - loss: 0.2163453429 - test_loss: 0.2171222698
-------- Save Best Model! --------
Epoch: 10 - loss: 0.2163750105 - test_loss: 0.2148041282
-------- Save Best Model! --------
Epoch: 11 - loss: 0.2166638527 - test_loss: 0.2138872603
-------- Save Best Model! --------
Epoch: 12 - loss: 0.2174821

In [8]:
res = run_val(test_loader, test_df, 'bc-3.txt.xz', model_save_path)

Validation start
predicting


100%|██████████| 1232/1232 [00:14<00:00, 82.56it/s]


throttleing by precision-recall curve
Best micro threshold=0.143467, fscore=0.256


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df["predicted"]= list(y_pred_bin)


p,r,f1: 0.1530393097630983 0.7744075420453235 0.25557215592217
throttleing by fixed threshold: 0.5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df["predicted"]= list(y_pred_bin)#(all,[length])


p,r,f1: 0.5334083992199675 0.03881252920260777 0.07235991570148408


In [12]:
!nvidia-smi

Sun Jul  9 22:10:42 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.63.01    Driver Version: 470.63.01    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A4000    On   | 00000000:03:00.0 Off |                  Off |
| 49%   66C    P2    96W / 140W |   3956MiB / 16117MiB |     88%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA RTX A4000    On   | 00000000:44:00.0 Off |                  Off |
| 41%   29C    P8    14W / 140W |   3788MiB / 16117MiB |      0%      Default |
|       