In [None]:
%load_ext autoreload
%autoreload 2 

### Check embedding error


In [None]:
path = "/mnt/ssd2t/mandy/Projects/few_shot_fcg/embeddings/x86_64_withVal_withPretrain_ghidra/word2vec/Advanced Micro Devices X86-64/aidra/81193e9a87778d7899a523adc7949f1a8af267d268e1dd51298165c22b890f4e.gpickle"

import networkx as nx
import pickle
import torch

with open(path, "rb") as f:
    G = pickle.load(f)
    
node = "0x4012f0L"

print(G.nodes[node])
G.nodes[node]["x"] = []
print(G.nodes[node])

# with open(path, "wb") as f:
#     pickle.dump(G, f)

In [None]:
import torch
def check_model_weights(model_path, device):
    # 載入模型
    checkpoint = torch.load(model_path, map_location=device)
    # 1. 檢查state dict是否為空
    if not checkpoint["model_state_dict"]:
        print("Warning: Model state dict is empty!")
        return False
        
    # 2. 印出模型的所有權重名稱和形狀
    for name, param in checkpoint["model_state_dict"].items():
        print(f"Layer: {name} | Shape: {param.shape}")
        print(param)
        
    # 3. 檢查權重是否包含非零值
    for param in checkpoint["model_state_dict"].values():
        if torch.all(param == 0):
            print(f"Warning: Found all-zero parameter tensor!")
            
    return True

In [None]:
path = "pretrained/x86_pretrained_20241122_1616/epoch_1342_best_backbone.pth"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

check_model_weights(path, device)

### Check validation dataset & test dataset difference

In [None]:
import pickle
testPath = "/mnt/ssd2t/mandy/Projects/few_shot_fcg/embeddings/x86_64_withVal_withPretrain_ghidra/word2vec/testData.pkl"
valPath = "/mnt/ssd2t/mandy/Projects/few_shot_fcg/embeddings/x86_64_withVal_withPretrain_ghidra/word2vec/valData.pkl"

with open(testPath, "rb") as f:
    test = pickle.load(f)
    
with open(valPath, "rb") as f:
    val = pickle.load(f)



In [None]:
print(len(test[0]))
print(len(val[0]))

In [None]:
testAvgLen = sum([len(graph.x) for graph in test[0]]) / len(test[0])
valAvgLen = sum([len(graph.x) for graph in val[0]]) / len(val[0])

print(testAvgLen)
print(valAvgLen)

### Get label dictionary data when loading data

In [None]:
from utils import load_config
from loadDataset import LoadDataset
from trainModule import TestModule
import os

seeds = [6, 7, 10, 11, 19, 22, 31, 42, 666, 888]

for seed in seeds:
    configPath = f"/home/manying/Projects/fcgFewShot/config/config_NICT_Ghidra_x86_64_{seed}.json"
    options = load_config(configPath)
    
    options["paths"]["data"]["embedding_folder"] = "/home/manying/Projects/fcgFewShot/embeddings"
    dataset = LoadDataset(options)
    test = TestModule(configPath, dataset, options)


### Test: train on 10-way test on 5-way performance
Testing on seed_6_baseline: LP 10-way 5-shot

In [None]:
from utils import load_config, save_config
import os
from loadDataset import LoadDataset
from trainModule import TestModule
configPath = "/home/mandy/Projects/few_shot_fcg/checkpoints/x86_64_withVal_withPretrain_ghidra_6_baseline/10way_5shot_LabelPropagation_alpha0.7_k20_20250315_155140/config.json"
options = load_config(configPath)
newConfigPath = os.path.join(os.path.dirname(configPath), "config_5way.json")
### change settings
options["settings"]["few_shot"]["test"]["class_per_iter"] = 5
options["settings"]["train"]["distance"] = "euclidean"
save_config(options, newConfigPath)

dataset = LoadDataset(options)

test = TestModule(newConfigPath, dataset)
test.eval()

### Analysis diff backbone parameters

In [None]:
from models import GraphSAGELayer, GATLayer, GCNLayer, GINLayer


def count_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

dims = dict(dim_in=128, dim_h=128, dim_o=128, num_layers=3)
models = {
    'GCN'      : GCNLayer(**dims),
    'GraphSAGE': GraphSAGELayer(**dims),
    'GAT'      : GATLayer(**dims, heads=8),
    'GIN'      : GINLayer(**dims)
}

for name, m in models.items():
    print(f'{name}: {count_params(m):,} parameters')


### Check fcg node without "x"

In [None]:
check_path = "/home/manying/Projects/fcgFewShot/dataset/data_ghidra_fcg_openset/Intel 80386"

import os, pickle

for familyFolder in os.listdir(check_path):
    familyPath = os.path.join(check_path, familyFolder)
    if not os.path.isdir(familyPath):
        continue
    for file in os.listdir(familyPath):
        filePath = os.path.join(familyPath, file)
        if not file.endswith(".gpickle"):
            continue
        with open(filePath, "rb") as f:
            data = pickle.load(f)
            for node in data.nodes:
                if "x" not in data.nodes[node]:
                    print(f"Node {node} in file {filePath} does not have 'x' attribute.")
                    continue

In [None]:
file_path = "./dataset/data_ghidra_fcg/Intel 80386/ddostf/000f5bc23812367aecf93ff5d6c96ac644f0ae819096af6eab13eb1993b8dbe4.gpickle"

### Check ARM malware

In [None]:
import networkx as nx
import pickle
import os

familyList = ["tediss", "dowgin", "mobidash", "helper", "sagent", "zergrush", "zhtrap", "rootnik", "boqx", "mirai", "gafgyt", "shixot", "feejar", "gluper", "dofloo", "dnsamp", "sidewalk", "wapron", "badpac", "ngioweb", "tekya", "monitorminor", "meterpreter"]
dataPath = "/home/manying/Projects/fcgFewShot/dataset/data_ghidra_fcg/ARM"
embedPath = "/home/manying/Projects/fcgFewShot/embeddings/arm_withVal_ghidra_42/word2vec/ARM"

for familyFolder in familyList:
    familyPath = os.path.join(dataPath, familyFolder)
    embedFamilyPath = os.path.join(embedPath, familyFolder)
    if not os.path.isdir(familyPath):
        print(f"Family folder {familyFolder} does not exist in {dataPath}.")
        continue
    for file in os.listdir(familyPath):
        filePath = os.path.join(familyPath, file)
        print(filePath)
        if not file.endswith(".gpickle"):
            continue
        with open(filePath, "rb") as f:
            data = pickle.load(f)
            print(data)
            for node in data.nodes:
                print(data.nodes[node])
    for file in os.listdir(embedFamilyPath):
        filePath = os.path.join(embedFamilyPath, file)
        if not file.endswith(".gpickle"):
            continue
        with open(filePath, "rb") as f:
            data = pickle.load(f)
            print(data)
            for node in data.nodes:
                print(data.nodes[node])

    break
