GCN + PDG + Jaccard

In [3]:
import os
import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
import json
import numpy as np
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def assign_node_type(node):
    node = node.strip()
    if 'if' in node:
        return 'condition'
    elif 'for' in node or 'while' in node:
        return 'loop'
    elif 'System.out.println' in node:
        return 'output'
    elif node.startswith('//') or node == '' or 'LogFrame' in node:
        return 'meaningless'
    return 'statement'

def assign_edge_type(edge):
    return 'control' if any(keyword in edge[0] for keyword in ['if', 'for', 'while']) else 'data'

def generate_pdg(code):
    G = nx.DiGraph()
    lines = code.splitlines()
    for i, line in enumerate(lines):
        line = line.strip()
        if not line:
            continue
        node_type = assign_node_type(line)
        if node_type != 'meaningless':
            G.add_node(i, label=line, type=node_type)
            if i > 0:
                prev_line = lines[i-1].strip()
                prev_node_type = assign_node_type(prev_line)
                if prev_node_type != 'meaningless':
                    G.add_edge(i-1, i, type=assign_edge_type((prev_line, line)))

    isolated_nodes = [node for node, degree in G.degree if degree == 0]
    G.remove_nodes_from(isolated_nodes)
    mapping = {node: idx for idx, node in enumerate(G.nodes())}
    return nx.relabel_nodes(G, mapping) if G.number_of_nodes() > 0 else None

def convert_nx_to_torch_data(G):
    node_features = torch.tensor(
        [[int(node_data['type'] == t) for t in ['condition', 'loop', 'output', 'statement']] for _, node_data in G.nodes(data=True)],
        dtype=torch.float
    )
    edge_index = torch.tensor(list(G.edges), dtype=torch.long).t().contiguous()
    return Data(x=node_features, edge_index=edge_index)

def create_pdg_pairs(data_file, jsonl_file):
    with open(jsonl_file, 'r') as f:
        url_to_code = {entry['idx']: entry['func'] for entry in (json.loads(line.strip()) for line in f)}

    pdg_pairs, true_labels = [], []
    with open(data_file, 'r') as f:
        for line in tqdm(f, desc="Creating PDG pairs"):
            url1, url2, label = line.strip().split('\t')
            if url1 not in url_to_code or url2 not in url_to_code:
                continue

            pdg1, pdg2 = generate_pdg(url_to_code[url1]), generate_pdg(url_to_code[url2])
            if pdg1 is None or pdg2 is None:
                continue

            pdg_pairs.append((convert_nx_to_torch_data(pdg1), convert_nx_to_torch_data(pdg2)))
            true_labels.append(int(label))  # 0, 1, 2, 3 for Type-1, Type-2, Type-3, Type-4 respectively

    return pdg_pairs, true_labels

# Save checkpoint function
def save_checkpoint(model, optimizer, epoch, f1_score, accuracy, precision, recall, metrics, output_dir):
    checkpoint_path = os.path.join(output_dir, f"model_epoch_{epoch}_f1_{f1_score:.4f}.pth")
    
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'metrics': {
            'best_f1': f1_score,
            'best_accuracy': accuracy,
            'best_precision': precision,
            'best_recall': recall,
            'per_type_metrics': metrics  
        }
    }, checkpoint_path)
    
    print(f"Checkpoint saved to {checkpoint_path}")

class GNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate=0.3):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x, edge_index, batch):
        x = F.relu(self.conv1(x, edge_index))
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        x = global_mean_pool(x, batch)
        return x

def jaccard_similarity(x1, x2):
    # Chuyển đổi các embedding thành các tập hợp bằng cách làm tròn và chuyển đổi thành binary
    x1_set = (x1 > x1.mean()).float()
    x2_set = (x2 > x2.mean()).float()
    
    intersection = torch.min(x1_set, x2_set).sum(dim=1)
    union = torch.max(x1_set, x2_set).sum(dim=1)
    
    return intersection / (union + 1e-8)  # Thêm epsilon để tránh chia cho 0

class CloneDetector(nn.Module):
    def __init__(self, gnn):
        super(CloneDetector, self).__init__()
        self.gnn = gnn
        self.fc = nn.Linear(1, 4)  # 4 classes for Type-1, Type-2, Type-3, Type-4

    def forward(self, data):
        x1, edge_index1, batch1 = data[0].x, data[0].edge_index, data[0].batch
        x2, edge_index2, batch2 = data[1].x, data[1].edge_index, data[1].batch
        
        emb1 = self.gnn(x1, edge_index1, batch1)
        emb2 = self.gnn(x2, edge_index2, batch2)
        
        similarity = jaccard_similarity(emb1, emb2).unsqueeze(1)
        output = self.fc(similarity)
        return output

def train(model, train_loader, valid_loader, epochs=20, output_dir="checkpoints"):
    optimizer = Adam(model.parameters(), lr=0.01)
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=10, verbose=True)
    criterion = nn.CrossEntropyLoss()
    best_f1 = 0

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            graph_pair, labels = batch
            graph_pair = [g.to(device) for g in graph_pair]
            labels = labels.to(device)
            
            optimizer.zero_grad()
            logits = model(graph_pair)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        val_loss, val_metrics = evaluate(model, valid_loader)
        val_f1 = val_metrics['overall']['f1']
        val_accuracy = val_metrics['overall']['accuracy']
        val_precision = val_metrics['overall']['precision']
        val_recall = val_metrics['overall']['recall']
        
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}')
        
        scheduler.step(val_f1)
        
        if val_f1 > best_f1:
            best_f1 = val_f1
            save_checkpoint(model, optimizer, epoch, best_f1, val_accuracy, 
                val_precision, val_recall, val_metrics, output_dir)

def evaluate(model, data_loader):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    total_loss = 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            graph_pair, labels = batch
            graph_pair = [g.to(device) for g in graph_pair]
            labels = labels.to(device)
            
            logits = model(graph_pair)
            loss = criterion(logits, labels)
            total_loss += loss.item()

            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_preds, all_labels = np.array(all_preds), np.array(all_labels)

    metrics = {'overall': {
        'accuracy': accuracy_score(all_labels, all_preds),
        'precision': precision_score(all_labels, all_preds, average='weighted', zero_division=0),
        'recall': recall_score(all_labels, all_preds, average='weighted', zero_division=0),
        'f1': f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    }}

    for clone_type in range(4):
        type_preds, type_labels = (all_preds == clone_type), (all_labels == clone_type)
        metrics[f'Type-{clone_type+1}'] = {
            'accuracy': accuracy_score(type_labels, type_preds),
            'precision': precision_score(type_labels, type_preds, zero_division=0),
            'recall': recall_score(type_labels, type_preds, zero_division=0),
            'f1': f1_score(type_labels, type_preds, zero_division=0)
        }

    return total_loss / len(data_loader), metrics

# Main execution logic
input_dim = 4
hidden_dim = 64
output_dim = 128
dropout_rate = 0.3
gnn = GNN(input_dim, hidden_dim, output_dim, dropout_rate)
model = CloneDetector(gnn).to(device)

train_file, valid_file = 'test_set/train_5000.txt', 'test_set/train_5000.txt'
jsonl_file = 'test_set/data.jsonl'

train_pdg_pairs, train_labels = create_pdg_pairs(train_file, jsonl_file)
valid_pdg_pairs, valid_labels = create_pdg_pairs(valid_file, jsonl_file)

def collate_fn(batch):
    graphs, labels = zip(*batch)
    batched_graphs = [Batch.from_data_list([g[0] for g in graphs]),
                      Batch.from_data_list([g[1] for g in graphs])]
    return batched_graphs, torch.tensor(labels)

train_loader = DataLoader(list(zip(train_pdg_pairs, train_labels)), batch_size=64, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(list(zip(valid_pdg_pairs, valid_labels)), batch_size=64, collate_fn=collate_fn)

output_dir = "checkpoints_GCN_jaccard_similarity"
os.makedirs(output_dir, exist_ok=True)

train(model, train_loader, valid_loader, epochs=100, output_dir=output_dir)

Creating PDG pairs: 3000it [00:02, 1145.96it/s]
Creating PDG pairs: 3000it [00:02, 1113.56it/s]
Epoch 1/100: 100%|██████████| 23/23 [00:00<00:00, 38.38it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 43.86it/s]


Epoch 1/100, Train Loss: 1.5569, Val Loss: 1.3845, Val F1: 0.3347
Checkpoint saved to checkpoints_GCN_jaccard_similarity\model_epoch_0_f1_0.3347.pth


Epoch 2/100: 100%|██████████| 23/23 [00:00<00:00, 35.53it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 29.13it/s]


Epoch 2/100, Train Loss: 1.2991, Val Loss: 1.2125, Val F1: 0.2921


Epoch 3/100: 100%|██████████| 23/23 [00:00<00:00, 27.82it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 37.43it/s]


Epoch 3/100, Train Loss: 1.1839, Val Loss: 1.1513, Val F1: 0.2921


Epoch 4/100: 100%|██████████| 23/23 [00:00<00:00, 25.80it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 41.38it/s]


Epoch 4/100, Train Loss: 1.1406, Val Loss: 1.1256, Val F1: 0.2921


Epoch 5/100: 100%|██████████| 23/23 [00:00<00:00, 37.65it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 42.91it/s]


Epoch 5/100, Train Loss: 1.1214, Val Loss: 1.1104, Val F1: 0.2921


Epoch 6/100: 100%|██████████| 23/23 [00:00<00:00, 39.64it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 42.77it/s]


Epoch 6/100, Train Loss: 1.1093, Val Loss: 1.0998, Val F1: 0.2921


Epoch 7/100: 100%|██████████| 23/23 [00:00<00:00, 33.29it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 35.76it/s]


Epoch 7/100, Train Loss: 1.0998, Val Loss: 1.0927, Val F1: 0.2921


Epoch 8/100: 100%|██████████| 23/23 [00:00<00:00, 29.16it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 14.96it/s]


Epoch 8/100, Train Loss: 1.0949, Val Loss: 1.0884, Val F1: 0.2921


Epoch 9/100: 100%|██████████| 23/23 [00:03<00:00,  7.20it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 21.64it/s]


Epoch 9/100, Train Loss: 1.0908, Val Loss: 1.0837, Val F1: 0.2921


Epoch 10/100: 100%|██████████| 23/23 [00:01<00:00, 22.13it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 19.58it/s]


Epoch 10/100, Train Loss: 1.0874, Val Loss: 1.0811, Val F1: 0.2921


Epoch 11/100: 100%|██████████| 23/23 [00:02<00:00, 10.29it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 27.05it/s]


Epoch 11/100, Train Loss: 1.0856, Val Loss: 1.0807, Val F1: 0.2921


Epoch 12/100: 100%|██████████| 23/23 [00:01<00:00, 17.54it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 28.13it/s]


Epoch 12/100, Train Loss: 1.0847, Val Loss: 1.0783, Val F1: 0.2921


Epoch 13/100: 100%|██████████| 23/23 [00:00<00:00, 25.64it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 31.82it/s]


Epoch 13/100, Train Loss: 1.0823, Val Loss: 1.0780, Val F1: 0.2921


Epoch 14/100: 100%|██████████| 23/23 [00:00<00:00, 31.20it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 33.84it/s]


Epoch 14/100, Train Loss: 1.0828, Val Loss: 1.0777, Val F1: 0.2921


Epoch 15/100: 100%|██████████| 23/23 [00:00<00:00, 31.21it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 28.20it/s]


Epoch 15/100, Train Loss: 1.0831, Val Loss: 1.0776, Val F1: 0.2921


Epoch 16/100: 100%|██████████| 23/23 [00:00<00:00, 30.55it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 35.23it/s]


Epoch 16/100, Train Loss: 1.0813, Val Loss: 1.0773, Val F1: 0.2921


Epoch 17/100: 100%|██████████| 23/23 [00:00<00:00, 33.64it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 36.02it/s]


Epoch 17/100, Train Loss: 1.0828, Val Loss: 1.0773, Val F1: 0.2921


Epoch 18/100: 100%|██████████| 23/23 [00:00<00:00, 33.18it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 35.11it/s]


Epoch 18/100, Train Loss: 1.0828, Val Loss: 1.0771, Val F1: 0.2921


Epoch 19/100: 100%|██████████| 23/23 [00:00<00:00, 29.50it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 35.01it/s]


Epoch 19/100, Train Loss: 1.0822, Val Loss: 1.0770, Val F1: 0.2921


Epoch 20/100: 100%|██████████| 23/23 [00:01<00:00, 20.67it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 35.67it/s]


Epoch 20/100, Train Loss: 1.0817, Val Loss: 1.0768, Val F1: 0.2921


Epoch 21/100: 100%|██████████| 23/23 [00:00<00:00, 35.23it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 20.81it/s]


Epoch 21/100, Train Loss: 1.0811, Val Loss: 1.0766, Val F1: 0.2921


Epoch 22/100: 100%|██████████| 23/23 [00:01<00:00, 22.44it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 26.48it/s]


Epoch 22/100, Train Loss: 1.0811, Val Loss: 1.0765, Val F1: 0.2921


Epoch 23/100: 100%|██████████| 23/23 [00:01<00:00, 22.44it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 30.19it/s]


Epoch 23/100, Train Loss: 1.0811, Val Loss: 1.0765, Val F1: 0.2921


Epoch 24/100: 100%|██████████| 23/23 [00:00<00:00, 27.27it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 22.08it/s]


Epoch 24/100, Train Loss: 1.0810, Val Loss: 1.0764, Val F1: 0.2921


Epoch 25/100: 100%|██████████| 23/23 [00:01<00:00, 21.87it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 24.76it/s]


Epoch 25/100, Train Loss: 1.0808, Val Loss: 1.0764, Val F1: 0.2921


Epoch 26/100: 100%|██████████| 23/23 [00:00<00:00, 24.41it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 30.54it/s]


Epoch 26/100, Train Loss: 1.0814, Val Loss: 1.0764, Val F1: 0.2921


Epoch 27/100: 100%|██████████| 23/23 [00:00<00:00, 32.04it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 36.87it/s]


Epoch 27/100, Train Loss: 1.0814, Val Loss: 1.0764, Val F1: 0.2921


Epoch 28/100: 100%|██████████| 23/23 [00:00<00:00, 27.20it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 40.24it/s]


Epoch 28/100, Train Loss: 1.0820, Val Loss: 1.0764, Val F1: 0.2921


Epoch 29/100: 100%|██████████| 23/23 [00:00<00:00, 32.48it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 43.16it/s]


Epoch 29/100, Train Loss: 1.0809, Val Loss: 1.0764, Val F1: 0.2921


Epoch 30/100: 100%|██████████| 23/23 [00:00<00:00, 37.59it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 33.67it/s]


Epoch 30/100, Train Loss: 1.0819, Val Loss: 1.0763, Val F1: 0.2921


Epoch 31/100: 100%|██████████| 23/23 [00:00<00:00, 23.64it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 38.43it/s]


Epoch 31/100, Train Loss: 1.0816, Val Loss: 1.0763, Val F1: 0.2921


Epoch 32/100: 100%|██████████| 23/23 [00:00<00:00, 36.22it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 29.67it/s]


Epoch 32/100, Train Loss: 1.0821, Val Loss: 1.0763, Val F1: 0.2921


Epoch 33/100: 100%|██████████| 23/23 [00:00<00:00, 26.65it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 18.98it/s]


Epoch 33/100, Train Loss: 1.0821, Val Loss: 1.0763, Val F1: 0.2921


Epoch 34/100: 100%|██████████| 23/23 [00:00<00:00, 25.65it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 29.03it/s]


Epoch 34/100, Train Loss: 1.0814, Val Loss: 1.0763, Val F1: 0.2921


Epoch 35/100: 100%|██████████| 23/23 [00:02<00:00, 10.68it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 18.79it/s]


Epoch 35/100, Train Loss: 1.0824, Val Loss: 1.0763, Val F1: 0.2921


Epoch 36/100: 100%|██████████| 23/23 [00:00<00:00, 23.12it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 25.56it/s]


Epoch 36/100, Train Loss: 1.0806, Val Loss: 1.0763, Val F1: 0.2921


Epoch 37/100: 100%|██████████| 23/23 [00:00<00:00, 24.61it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 32.27it/s]


Epoch 37/100, Train Loss: 1.0812, Val Loss: 1.0763, Val F1: 0.2921


Epoch 38/100: 100%|██████████| 23/23 [00:00<00:00, 28.67it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 29.53it/s]


Epoch 38/100, Train Loss: 1.0799, Val Loss: 1.0763, Val F1: 0.2921


Epoch 39/100: 100%|██████████| 23/23 [00:00<00:00, 32.02it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 36.46it/s]


Epoch 39/100, Train Loss: 1.0802, Val Loss: 1.0763, Val F1: 0.2921


Epoch 40/100: 100%|██████████| 23/23 [00:01<00:00, 22.64it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 22.29it/s]


Epoch 40/100, Train Loss: 1.0812, Val Loss: 1.0763, Val F1: 0.2921


Epoch 41/100: 100%|██████████| 23/23 [00:01<00:00, 17.42it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 17.71it/s]


Epoch 41/100, Train Loss: 1.0823, Val Loss: 1.0763, Val F1: 0.2921


Epoch 42/100: 100%|██████████| 23/23 [00:00<00:00, 24.55it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 25.72it/s]


Epoch 42/100, Train Loss: 1.0809, Val Loss: 1.0763, Val F1: 0.2921


Epoch 43/100: 100%|██████████| 23/23 [00:01<00:00, 21.76it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 11.79it/s]


Epoch 43/100, Train Loss: 1.0806, Val Loss: 1.0763, Val F1: 0.2921


Epoch 44/100: 100%|██████████| 23/23 [00:01<00:00, 22.17it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 28.28it/s]


Epoch 44/100, Train Loss: 1.0825, Val Loss: 1.0763, Val F1: 0.2921


Epoch 45/100: 100%|██████████| 23/23 [00:00<00:00, 26.56it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 26.34it/s]


Epoch 45/100, Train Loss: 1.0813, Val Loss: 1.0763, Val F1: 0.2921


Epoch 46/100: 100%|██████████| 23/23 [00:00<00:00, 27.81it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 30.70it/s]


Epoch 46/100, Train Loss: 1.0824, Val Loss: 1.0763, Val F1: 0.2921


Epoch 47/100: 100%|██████████| 23/23 [00:00<00:00, 27.87it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 29.29it/s]


Epoch 47/100, Train Loss: 1.0819, Val Loss: 1.0763, Val F1: 0.2921


Epoch 48/100: 100%|██████████| 23/23 [00:00<00:00, 23.90it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 27.36it/s]


Epoch 48/100, Train Loss: 1.0806, Val Loss: 1.0763, Val F1: 0.2921


Epoch 49/100: 100%|██████████| 23/23 [00:01<00:00, 18.32it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 33.05it/s]


Epoch 49/100, Train Loss: 1.0826, Val Loss: 1.0763, Val F1: 0.2921


Epoch 50/100: 100%|██████████| 23/23 [00:00<00:00, 28.20it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 33.98it/s]


Epoch 50/100, Train Loss: 1.0815, Val Loss: 1.0763, Val F1: 0.2921


Epoch 51/100: 100%|██████████| 23/23 [00:01<00:00, 21.08it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 29.77it/s]


Epoch 51/100, Train Loss: 1.0809, Val Loss: 1.0763, Val F1: 0.2921


Epoch 52/100: 100%|██████████| 23/23 [00:00<00:00, 24.79it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 32.77it/s]


Epoch 52/100, Train Loss: 1.0811, Val Loss: 1.0763, Val F1: 0.2921


Epoch 53/100: 100%|██████████| 23/23 [00:00<00:00, 27.77it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 28.40it/s]


Epoch 53/100, Train Loss: 1.0817, Val Loss: 1.0763, Val F1: 0.2921


Epoch 54/100: 100%|██████████| 23/23 [00:01<00:00, 22.81it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 26.71it/s]


Epoch 54/100, Train Loss: 1.0816, Val Loss: 1.0763, Val F1: 0.2921


Epoch 55/100: 100%|██████████| 23/23 [00:00<00:00, 28.40it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 17.84it/s]


Epoch 55/100, Train Loss: 1.0824, Val Loss: 1.0763, Val F1: 0.2921


Epoch 56/100: 100%|██████████| 23/23 [00:01<00:00, 15.31it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 17.44it/s]


Epoch 56/100, Train Loss: 1.0795, Val Loss: 1.0763, Val F1: 0.2921


Epoch 57/100: 100%|██████████| 23/23 [00:00<00:00, 24.91it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 19.72it/s]


Epoch 57/100, Train Loss: 1.0823, Val Loss: 1.0763, Val F1: 0.2921


Epoch 58/100: 100%|██████████| 23/23 [00:00<00:00, 24.14it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 21.26it/s]


Epoch 58/100, Train Loss: 1.0814, Val Loss: 1.0763, Val F1: 0.2921


Epoch 59/100: 100%|██████████| 23/23 [00:00<00:00, 28.94it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 31.04it/s]


Epoch 59/100, Train Loss: 1.0811, Val Loss: 1.0763, Val F1: 0.2921


Epoch 60/100: 100%|██████████| 23/23 [00:01<00:00, 14.55it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 19.52it/s]


Epoch 60/100, Train Loss: 1.0802, Val Loss: 1.0763, Val F1: 0.2921


Epoch 61/100: 100%|██████████| 23/23 [00:01<00:00, 15.46it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 26.94it/s]


Epoch 61/100, Train Loss: 1.0812, Val Loss: 1.0763, Val F1: 0.2921


Epoch 62/100: 100%|██████████| 23/23 [00:00<00:00, 25.84it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 27.97it/s]


Epoch 62/100, Train Loss: 1.0829, Val Loss: 1.0763, Val F1: 0.2921


Epoch 63/100: 100%|██████████| 23/23 [00:00<00:00, 23.80it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 23.52it/s]


Epoch 63/100, Train Loss: 1.0810, Val Loss: 1.0763, Val F1: 0.2921


Epoch 64/100: 100%|██████████| 23/23 [00:00<00:00, 29.08it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 32.85it/s]


Epoch 64/100, Train Loss: 1.0807, Val Loss: 1.0763, Val F1: 0.2921


Epoch 65/100: 100%|██████████| 23/23 [00:00<00:00, 27.06it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 31.57it/s]


Epoch 65/100, Train Loss: 1.0820, Val Loss: 1.0762, Val F1: 0.2921


Epoch 66/100: 100%|██████████| 23/23 [00:00<00:00, 26.50it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 32.81it/s]


Epoch 66/100, Train Loss: 1.0808, Val Loss: 1.0762, Val F1: 0.2921


Epoch 67/100: 100%|██████████| 23/23 [00:00<00:00, 29.10it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 34.11it/s]


Epoch 67/100, Train Loss: 1.0807, Val Loss: 1.0762, Val F1: 0.2921


Epoch 68/100: 100%|██████████| 23/23 [00:01<00:00, 20.34it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 16.91it/s]


Epoch 68/100, Train Loss: 1.0807, Val Loss: 1.0762, Val F1: 0.2921


Epoch 69/100: 100%|██████████| 23/23 [00:00<00:00, 26.69it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 34.63it/s]


Epoch 69/100, Train Loss: 1.0826, Val Loss: 1.0762, Val F1: 0.2921


Epoch 70/100: 100%|██████████| 23/23 [00:00<00:00, 29.31it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 30.39it/s]


Epoch 70/100, Train Loss: 1.0811, Val Loss: 1.0762, Val F1: 0.2921


Epoch 71/100: 100%|██████████| 23/23 [00:00<00:00, 23.11it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 25.77it/s]


Epoch 71/100, Train Loss: 1.0806, Val Loss: 1.0762, Val F1: 0.2921


Epoch 72/100: 100%|██████████| 23/23 [00:00<00:00, 28.86it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 30.47it/s]


Epoch 72/100, Train Loss: 1.0815, Val Loss: 1.0762, Val F1: 0.2921


Epoch 73/100: 100%|██████████| 23/23 [00:00<00:00, 26.03it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 28.18it/s]


Epoch 73/100, Train Loss: 1.0800, Val Loss: 1.0762, Val F1: 0.2921


Epoch 74/100: 100%|██████████| 23/23 [00:00<00:00, 30.23it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 40.67it/s]


Epoch 74/100, Train Loss: 1.0809, Val Loss: 1.0762, Val F1: 0.2921


Epoch 75/100: 100%|██████████| 23/23 [00:00<00:00, 38.81it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 43.08it/s]


Epoch 75/100, Train Loss: 1.0817, Val Loss: 1.0762, Val F1: 0.2921


Epoch 76/100: 100%|██████████| 23/23 [00:00<00:00, 38.77it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 37.42it/s]


Epoch 76/100, Train Loss: 1.0818, Val Loss: 1.0762, Val F1: 0.2921


Epoch 77/100: 100%|██████████| 23/23 [00:00<00:00, 34.13it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 37.91it/s]


Epoch 77/100, Train Loss: 1.0814, Val Loss: 1.0762, Val F1: 0.2921


Epoch 78/100: 100%|██████████| 23/23 [00:01<00:00, 22.05it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 38.00it/s]


Epoch 78/100, Train Loss: 1.0808, Val Loss: 1.0762, Val F1: 0.2921


Epoch 79/100: 100%|██████████| 23/23 [00:00<00:00, 37.75it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 38.04it/s]


Epoch 79/100, Train Loss: 1.0812, Val Loss: 1.0762, Val F1: 0.2921


Epoch 80/100: 100%|██████████| 23/23 [00:01<00:00, 18.33it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 20.36it/s]


Epoch 80/100, Train Loss: 1.0811, Val Loss: 1.0762, Val F1: 0.2921


Epoch 81/100: 100%|██████████| 23/23 [00:01<00:00, 15.17it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 18.28it/s]


Epoch 81/100, Train Loss: 1.0806, Val Loss: 1.0762, Val F1: 0.2921


Epoch 82/100: 100%|██████████| 23/23 [00:01<00:00, 13.39it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 16.58it/s]


Epoch 82/100, Train Loss: 1.0817, Val Loss: 1.0762, Val F1: 0.2921


Epoch 83/100: 100%|██████████| 23/23 [00:00<00:00, 25.12it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 34.23it/s]


Epoch 83/100, Train Loss: 1.0798, Val Loss: 1.0762, Val F1: 0.2921


Epoch 84/100: 100%|██████████| 23/23 [00:00<00:00, 23.32it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 27.19it/s]


Epoch 84/100, Train Loss: 1.0821, Val Loss: 1.0762, Val F1: 0.2921


Epoch 85/100: 100%|██████████| 23/23 [00:00<00:00, 23.91it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 18.25it/s]


Epoch 85/100, Train Loss: 1.0809, Val Loss: 1.0762, Val F1: 0.2921


Epoch 86/100: 100%|██████████| 23/23 [00:00<00:00, 29.38it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 30.23it/s]


Epoch 86/100, Train Loss: 1.0832, Val Loss: 1.0762, Val F1: 0.2921


Epoch 87/100: 100%|██████████| 23/23 [00:00<00:00, 24.25it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 17.73it/s]


Epoch 87/100, Train Loss: 1.0819, Val Loss: 1.0762, Val F1: 0.2921


Epoch 88/100: 100%|██████████| 23/23 [00:01<00:00, 21.31it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 33.21it/s]


Epoch 88/100, Train Loss: 1.0817, Val Loss: 1.0762, Val F1: 0.2921


Epoch 89/100: 100%|██████████| 23/23 [00:01<00:00, 21.32it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 37.41it/s]


Epoch 89/100, Train Loss: 1.0820, Val Loss: 1.0762, Val F1: 0.2921


Epoch 90/100: 100%|██████████| 23/23 [00:00<00:00, 33.06it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 40.24it/s]


Epoch 90/100, Train Loss: 1.0822, Val Loss: 1.0762, Val F1: 0.2921


Epoch 91/100: 100%|██████████| 23/23 [00:00<00:00, 36.61it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 42.64it/s]


Epoch 91/100, Train Loss: 1.0805, Val Loss: 1.0762, Val F1: 0.2921


Epoch 92/100: 100%|██████████| 23/23 [00:00<00:00, 38.93it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 42.44it/s]


Epoch 92/100, Train Loss: 1.0799, Val Loss: 1.0762, Val F1: 0.2921


Epoch 93/100: 100%|██████████| 23/23 [00:00<00:00, 36.76it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 35.93it/s]


Epoch 93/100, Train Loss: 1.0820, Val Loss: 1.0762, Val F1: 0.2921


Epoch 94/100: 100%|██████████| 23/23 [00:00<00:00, 32.22it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 34.33it/s]


Epoch 94/100, Train Loss: 1.0812, Val Loss: 1.0762, Val F1: 0.2921


Epoch 95/100: 100%|██████████| 23/23 [00:00<00:00, 35.04it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 43.22it/s]


Epoch 95/100, Train Loss: 1.0804, Val Loss: 1.0762, Val F1: 0.2921


Epoch 96/100: 100%|██████████| 23/23 [00:00<00:00, 38.68it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 44.75it/s]


Epoch 96/100, Train Loss: 1.0815, Val Loss: 1.0762, Val F1: 0.2921


Epoch 97/100: 100%|██████████| 23/23 [00:00<00:00, 32.49it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 44.21it/s]


Epoch 97/100, Train Loss: 1.0815, Val Loss: 1.0762, Val F1: 0.2921


Epoch 98/100: 100%|██████████| 23/23 [00:00<00:00, 36.72it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 44.42it/s]


Epoch 98/100, Train Loss: 1.0810, Val Loss: 1.0762, Val F1: 0.2921


Epoch 99/100: 100%|██████████| 23/23 [00:00<00:00, 39.65it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 46.43it/s]


Epoch 99/100, Train Loss: 1.0822, Val Loss: 1.0762, Val F1: 0.2921


Epoch 100/100: 100%|██████████| 23/23 [00:00<00:00, 42.14it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 46.06it/s]

Epoch 100/100, Train Loss: 1.0821, Val Loss: 1.0762, Val F1: 0.2921





In [4]:
checkpoint = torch.load('checkpoints_GCN_jaccard_similarity/model_epoch_0_f1_0.3347.pth')
per_type_metrics = checkpoint['metrics']['per_type_metrics']

# Print overall metrics first
overall_metrics = per_type_metrics['overall']
print("Overall metrics:")
print(f"Accuracy-score: {overall_metrics['accuracy']:.4f}")
print(f"Precision-score: {overall_metrics['precision']:.4f}")
print(f"Recall-score: {overall_metrics['recall']:.4f}")
print(f"F1-score: {overall_metrics['f1']:.4f}")

# Print per-type metrics
for clone_type, metrics in per_type_metrics.items():
    if clone_type != 'overall':  # Skip overall metrics since already printed
        print(f"\n{clone_type}:")
        print(f"Accuracy-score: {metrics['accuracy']:.4f}")
        print(f"Precision-score: {metrics['precision']:.4f}")
        print(f"Recall-score: {metrics['recall']:.4f}")
        print(f"F1-score: {metrics['f1']:.4f}")



Overall metrics:
Accuracy-score: 0.4608
Precision-score: 0.2628
Recall-score: 0.4608
F1-score: 0.3347

Type-1:
Accuracy-score: 0.9884
Precision-score: 0.0000
Recall-score: 0.0000
F1-score: 0.0000

Type-2:
Accuracy-score: 0.6348
Precision-score: 0.0000
Recall-score: 0.0000
F1-score: 0.0000

Type-3:
Accuracy-score: 0.6491
Precision-score: 0.5687
Recall-score: 0.9970
F1-score: 0.7242

Type-4:
Accuracy-score: 0.6491
Precision-score: 0.0000
Recall-score: 0.0000
F1-score: 0.0000
