GCN + PDG

In [6]:
import os
import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
import json
import numpy as np
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def assign_node_type(node):
    node = node.strip()
    if 'if' in node:
        return 'condition'
    elif 'for' in node or 'while' in node:
        return 'loop'
    elif 'System.out.println' in node:
        return 'output'
    elif node.startswith('//') or node == '' or 'LogFrame' in node:
        return 'meaningless'
    return 'statement'

def assign_edge_type(edge):
    return 'control' if any(keyword in edge[0] for keyword in ['if', 'for', 'while']) else 'data'

def generate_pdg(code):
    G = nx.DiGraph()
    lines = code.splitlines()
    for i, line in enumerate(lines):
        line = line.strip()
        if not line:
            continue
        node_type = assign_node_type(line)
        if node_type != 'meaningless':
            G.add_node(i, label=line, type=node_type)
            if i > 0:
                prev_line = lines[i-1].strip()
                prev_node_type = assign_node_type(prev_line)
                if prev_node_type != 'meaningless':
                    G.add_edge(i-1, i, type=assign_edge_type((prev_line, line)))

    isolated_nodes = [node for node, degree in G.degree if degree == 0]
    G.remove_nodes_from(isolated_nodes)
    mapping = {node: idx for idx, node in enumerate(G.nodes())}
    return nx.relabel_nodes(G, mapping) if G.number_of_nodes() > 0 else None

def convert_nx_to_torch_data(G):
    node_features = torch.tensor(
        [[int(node_data['type'] == t) for t in ['condition', 'loop', 'output', 'statement']] for _, node_data in G.nodes(data=True)],
        dtype=torch.float
    )
    edge_index = torch.tensor(list(G.edges), dtype=torch.long).t().contiguous()
    return Data(x=node_features, edge_index=edge_index)

def create_pdg_pairs(data_file, jsonl_file):
    with open(jsonl_file, 'r') as f:
        url_to_code = {entry['idx']: entry['func'] for entry in (json.loads(line.strip()) for line in f)}

    pdg_pairs, true_labels = [], []
    with open(data_file, 'r') as f:
        for line in tqdm(f, desc="Creating PDG pairs"):
            url1, url2, label = line.strip().split('\t')
            if url1 not in url_to_code or url2 not in url_to_code:
                continue

            pdg1, pdg2 = generate_pdg(url_to_code[url1]), generate_pdg(url_to_code[url2])
            if pdg1 is None or pdg2 is None:
                continue

            pdg_pairs.append((convert_nx_to_torch_data(pdg1), convert_nx_to_torch_data(pdg2)))
            true_labels.append(int(label))  # 0, 1, 2, 3 for Type-1, Type-2, Type-3, Type-4 respectively

    return pdg_pairs, true_labels

# Save checkpoint function
def save_checkpoint(model, optimizer, epoch, f1_score, accuracy, precision, recall, metrics, output_dir):
    checkpoint_path = os.path.join(output_dir, f"model_epoch_{epoch}_f1_{f1_score:.4f}.pth")
    
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'metrics': {
            'best_f1': f1_score,
            'best_accuracy': accuracy,
            'best_precision': precision,
            'best_recall': recall,
            'per_type_metrics': metrics  
        }
    }, checkpoint_path)
    
    print(f"Checkpoint saved to {checkpoint_path}")

class GNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate=0.3):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x, edge_index, batch):
        x = F.relu(self.conv1(x, edge_index))
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        x = global_mean_pool(x, batch)
        return x

class CloneDetector(nn.Module):
    def __init__(self, gnn):
        super(CloneDetector, self).__init__()
        self.gnn = gnn
        self.fc = nn.Linear(256, 4)  # 4 classes for Type-1, Type-2, Type-3, Type-4

    def forward(self, data):
        x1, edge_index1, batch1 = data[0].x, data[0].edge_index, data[0].batch
        x2, edge_index2, batch2 = data[1].x, data[1].edge_index, data[1].batch
        
        emb1 = self.gnn(x1, edge_index1, batch1)
        emb2 = self.gnn(x2, edge_index2, batch2)
        
        combined = torch.cat([emb1, emb2], dim=1)
        output = self.fc(combined)
        return output

def train(model, train_loader, valid_loader, epochs=20, output_dir="checkpoints"):
    optimizer = Adam(model.parameters(), lr=0.01)
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=10, verbose=True)
    criterion = nn.CrossEntropyLoss()
    best_f1 = 0

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            graph_pair, labels = batch
            graph_pair = [g.to(device) for g in graph_pair]
            labels = labels.to(device)
            
            optimizer.zero_grad()
            logits = model(graph_pair)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        val_loss, val_metrics = evaluate(model, valid_loader)
        val_f1 = val_metrics['overall']['f1']
        val_accuracy = val_metrics['overall']['accuracy']
        val_precision = val_metrics['overall']['precision']
        val_recall = val_metrics['overall']['recall']
        
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}')
        
        scheduler.step(val_f1)
        
        if val_f1 > best_f1:
            best_f1 = val_f1
            save_checkpoint(model, optimizer, epoch, best_f1, val_accuracy, 
                val_precision, val_recall, val_metrics, output_dir)

def evaluate(model, data_loader):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    total_loss = 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            graph_pair, labels = batch
            graph_pair = [g.to(device) for g in graph_pair]
            labels = labels.to(device)
            
            logits = model(graph_pair)
            loss = criterion(logits, labels)
            total_loss += loss.item()

            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_preds, all_labels = np.array(all_preds), np.array(all_labels)

    metrics = {'overall': {
        'accuracy': accuracy_score(all_labels, all_preds),
        'precision': precision_score(all_labels, all_preds, average='weighted', zero_division=0),
        'recall': recall_score(all_labels, all_preds, average='weighted', zero_division=0),
        'f1': f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    }}

    for clone_type in range(4):
        type_preds, type_labels = (all_preds == clone_type), (all_labels == clone_type)
        metrics[f'Type-{clone_type+1}'] = {
            'accuracy': accuracy_score(type_labels, type_preds),
            'precision': precision_score(type_labels, type_preds, zero_division=0),
            'recall': recall_score(type_labels, type_preds, zero_division=0),
            'f1': f1_score(type_labels, type_preds, zero_division=0)
        }

    return total_loss / len(data_loader), metrics

# Main execution logic
input_dim = 4
hidden_dim = 64
output_dim = 128
dropout_rate = 0.3
gnn = GNN(input_dim, hidden_dim, output_dim, dropout_rate)
model = CloneDetector(gnn).to(device)

train_file, valid_file = 'test_set/train_5000.txt', 'test_set/train_5000.txt'
jsonl_file = 'test_set/data.jsonl'

train_pdg_pairs, train_labels = create_pdg_pairs(train_file, jsonl_file)
valid_pdg_pairs, valid_labels = create_pdg_pairs(valid_file, jsonl_file)

def collate_fn(batch):
    graphs, labels = zip(*batch)
    batched_graphs = [Batch.from_data_list([g[0] for g in graphs]),
                      Batch.from_data_list([g[1] for g in graphs])]
    return batched_graphs, torch.tensor(labels)

train_loader = DataLoader(list(zip(train_pdg_pairs, train_labels)), batch_size=64, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(list(zip(valid_pdg_pairs, valid_labels)), batch_size=64, collate_fn=collate_fn)

output_dir = "checkpoints_GCN_cosine_similarity"
os.makedirs(output_dir, exist_ok=True)

train(model, train_loader, valid_loader, epochs=100, output_dir=output_dir)

Creating PDG pairs: 3000it [00:02, 1415.54it/s]
Creating PDG pairs: 3000it [00:02, 1350.51it/s]
Epoch 1/100: 100%|██████████| 23/23 [00:00<00:00, 31.98it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 47.66it/s]


Epoch 1/100, Train Loss: 1.0272, Val Loss: 0.7715, Val F1: 0.6155
Checkpoint saved to checkpoints_GCN_cosine_similarity\model_epoch_0_f1_0.6155.pth


Epoch 2/100: 100%|██████████| 23/23 [00:00<00:00, 30.30it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 60.52it/s]


Epoch 2/100, Train Loss: 0.7748, Val Loss: 0.7202, Val F1: 0.7506
Checkpoint saved to checkpoints_GCN_cosine_similarity\model_epoch_1_f1_0.7506.pth


Epoch 3/100: 100%|██████████| 23/23 [00:00<00:00, 35.91it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 57.68it/s]


Epoch 3/100, Train Loss: 0.7285, Val Loss: 0.6993, Val F1: 0.7193


Epoch 4/100: 100%|██████████| 23/23 [00:00<00:00, 31.67it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 45.56it/s]


Epoch 4/100, Train Loss: 0.6984, Val Loss: 0.6765, Val F1: 0.7910
Checkpoint saved to checkpoints_GCN_cosine_similarity\model_epoch_3_f1_0.7910.pth


Epoch 5/100: 100%|██████████| 23/23 [00:00<00:00, 26.67it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 30.98it/s]


Epoch 5/100, Train Loss: 0.6914, Val Loss: 0.6751, Val F1: 0.6978


Epoch 6/100: 100%|██████████| 23/23 [00:01<00:00, 22.22it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 41.13it/s]


Epoch 6/100, Train Loss: 0.6765, Val Loss: 0.6814, Val F1: 0.7464


Epoch 7/100: 100%|██████████| 23/23 [00:01<00:00, 21.37it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 44.24it/s]


Epoch 7/100, Train Loss: 0.6911, Val Loss: 0.6960, Val F1: 0.7772


Epoch 8/100: 100%|██████████| 23/23 [00:00<00:00, 29.70it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 43.97it/s]


Epoch 8/100, Train Loss: 0.6762, Val Loss: 0.6452, Val F1: 0.8122
Checkpoint saved to checkpoints_GCN_cosine_similarity\model_epoch_7_f1_0.8122.pth


Epoch 9/100: 100%|██████████| 23/23 [00:00<00:00, 29.62it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 38.96it/s]


Epoch 9/100, Train Loss: 0.6601, Val Loss: 0.6331, Val F1: 0.7987


Epoch 10/100: 100%|██████████| 23/23 [00:00<00:00, 25.82it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 23.10it/s]


Epoch 10/100, Train Loss: 0.6832, Val Loss: 0.6349, Val F1: 0.8045


Epoch 11/100: 100%|██████████| 23/23 [00:03<00:00,  5.80it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 25.02it/s]


Epoch 11/100, Train Loss: 0.6871, Val Loss: 0.6372, Val F1: 0.8050


Epoch 12/100: 100%|██████████| 23/23 [00:01<00:00, 15.77it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 19.78it/s]


Epoch 12/100, Train Loss: 0.6605, Val Loss: 0.6442, Val F1: 0.7827


Epoch 13/100: 100%|██████████| 23/23 [00:02<00:00,  9.28it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 28.18it/s]


Epoch 13/100, Train Loss: 0.6545, Val Loss: 0.6384, Val F1: 0.8220
Checkpoint saved to checkpoints_GCN_cosine_similarity\model_epoch_12_f1_0.8220.pth


Epoch 14/100: 100%|██████████| 23/23 [00:01<00:00, 14.74it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 29.50it/s]


Epoch 14/100, Train Loss: 0.6490, Val Loss: 0.6238, Val F1: 0.8068


Epoch 15/100: 100%|██████████| 23/23 [00:01<00:00, 21.51it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 35.65it/s]


Epoch 15/100, Train Loss: 0.6425, Val Loss: 0.6185, Val F1: 0.8141


Epoch 16/100: 100%|██████████| 23/23 [00:00<00:00, 23.76it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 37.05it/s]


Epoch 16/100, Train Loss: 0.6373, Val Loss: 0.6179, Val F1: 0.8124


Epoch 17/100: 100%|██████████| 23/23 [00:01<00:00, 20.83it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 35.95it/s]


Epoch 17/100, Train Loss: 0.6497, Val Loss: 0.6131, Val F1: 0.8036


Epoch 18/100: 100%|██████████| 23/23 [00:00<00:00, 23.50it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 37.52it/s]


Epoch 18/100, Train Loss: 0.6479, Val Loss: 0.6624, Val F1: 0.8482
Checkpoint saved to checkpoints_GCN_cosine_similarity\model_epoch_17_f1_0.8482.pth


Epoch 19/100: 100%|██████████| 23/23 [00:00<00:00, 25.42it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 36.13it/s]


Epoch 19/100, Train Loss: 0.6685, Val Loss: 0.6707, Val F1: 0.7927


Epoch 20/100: 100%|██████████| 23/23 [00:00<00:00, 23.94it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 36.15it/s]


Epoch 20/100, Train Loss: 0.6760, Val Loss: 0.6304, Val F1: 0.8025


Epoch 21/100: 100%|██████████| 23/23 [00:01<00:00, 17.64it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 38.88it/s]


Epoch 21/100, Train Loss: 0.6339, Val Loss: 0.6177, Val F1: 0.8053


Epoch 22/100: 100%|██████████| 23/23 [00:01<00:00, 16.71it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 24.66it/s]


Epoch 22/100, Train Loss: 0.6387, Val Loss: 0.6270, Val F1: 0.8188


Epoch 23/100: 100%|██████████| 23/23 [00:01<00:00, 19.35it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 26.46it/s]


Epoch 23/100, Train Loss: 0.6417, Val Loss: 0.6149, Val F1: 0.8144


Epoch 24/100: 100%|██████████| 23/23 [00:01<00:00, 21.44it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 32.83it/s]


Epoch 24/100, Train Loss: 0.6507, Val Loss: 0.6118, Val F1: 0.8090


Epoch 25/100: 100%|██████████| 23/23 [00:01<00:00, 15.68it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 23.92it/s]


Epoch 25/100, Train Loss: 0.6369, Val Loss: 0.6096, Val F1: 0.8082


Epoch 26/100: 100%|██████████| 23/23 [00:01<00:00, 17.17it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 31.94it/s]


Epoch 26/100, Train Loss: 0.6424, Val Loss: 0.6256, Val F1: 0.8175


Epoch 27/100: 100%|██████████| 23/23 [00:00<00:00, 25.23it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 36.85it/s]


Epoch 27/100, Train Loss: 0.6470, Val Loss: 0.6260, Val F1: 0.7961


Epoch 28/100: 100%|██████████| 23/23 [00:01<00:00, 20.94it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 40.56it/s]


Epoch 28/100, Train Loss: 0.6493, Val Loss: 0.6253, Val F1: 0.8095


Epoch 29/100: 100%|██████████| 23/23 [00:00<00:00, 26.39it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 44.68it/s]


Epoch 29/100, Train Loss: 0.6426, Val Loss: 0.6128, Val F1: 0.8108


Epoch 30/100: 100%|██████████| 23/23 [00:00<00:00, 25.55it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 26.91it/s]


Epoch 30/100, Train Loss: 0.6250, Val Loss: 0.6058, Val F1: 0.8169


Epoch 31/100: 100%|██████████| 23/23 [00:00<00:00, 23.76it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 41.26it/s]


Epoch 31/100, Train Loss: 0.6177, Val Loss: 0.6044, Val F1: 0.8152


Epoch 32/100: 100%|██████████| 23/23 [00:01<00:00, 20.66it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 26.74it/s]


Epoch 32/100, Train Loss: 0.6186, Val Loss: 0.6031, Val F1: 0.8124


Epoch 33/100: 100%|██████████| 23/23 [00:01<00:00, 14.31it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 26.35it/s]


Epoch 33/100, Train Loss: 0.6116, Val Loss: 0.6029, Val F1: 0.8177


Epoch 34/100: 100%|██████████| 23/23 [00:02<00:00,  8.46it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 20.72it/s]


Epoch 34/100, Train Loss: 0.6088, Val Loss: 0.6026, Val F1: 0.8109


Epoch 35/100: 100%|██████████| 23/23 [00:01<00:00, 15.81it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 27.35it/s]


Epoch 35/100, Train Loss: 0.6195, Val Loss: 0.6023, Val F1: 0.8116


Epoch 36/100: 100%|██████████| 23/23 [00:01<00:00, 20.70it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 32.32it/s]


Epoch 36/100, Train Loss: 0.6078, Val Loss: 0.6025, Val F1: 0.8145


Epoch 37/100: 100%|██████████| 23/23 [00:01<00:00, 21.83it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 37.44it/s]


Epoch 37/100, Train Loss: 0.6155, Val Loss: 0.6017, Val F1: 0.8184


Epoch 38/100: 100%|██████████| 23/23 [00:01<00:00, 21.45it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 23.52it/s]


Epoch 38/100, Train Loss: 0.6107, Val Loss: 0.6016, Val F1: 0.8206


Epoch 39/100: 100%|██████████| 23/23 [00:01<00:00, 13.36it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 21.95it/s]


Epoch 39/100, Train Loss: 0.6153, Val Loss: 0.6019, Val F1: 0.8145


Epoch 40/100: 100%|██████████| 23/23 [00:01<00:00, 16.87it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 27.64it/s]


Epoch 40/100, Train Loss: 0.6100, Val Loss: 0.6014, Val F1: 0.8221


Epoch 41/100: 100%|██████████| 23/23 [00:02<00:00,  9.86it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 25.33it/s]


Epoch 41/100, Train Loss: 0.6062, Val Loss: 0.6012, Val F1: 0.8169


Epoch 42/100: 100%|██████████| 23/23 [00:01<00:00, 17.90it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 27.06it/s]


Epoch 42/100, Train Loss: 0.6220, Val Loss: 0.6010, Val F1: 0.8176


Epoch 43/100: 100%|██████████| 23/23 [00:01<00:00, 19.85it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 31.35it/s]


Epoch 43/100, Train Loss: 0.6127, Val Loss: 0.6010, Val F1: 0.8191


Epoch 44/100: 100%|██████████| 23/23 [00:01<00:00, 21.06it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 32.58it/s]


Epoch 44/100, Train Loss: 0.6180, Val Loss: 0.6010, Val F1: 0.8184


Epoch 45/100: 100%|██████████| 23/23 [00:01<00:00, 19.25it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 27.83it/s]


Epoch 45/100, Train Loss: 0.6084, Val Loss: 0.6009, Val F1: 0.8184


Epoch 46/100: 100%|██████████| 23/23 [00:01<00:00, 14.55it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 33.85it/s]


Epoch 46/100, Train Loss: 0.6064, Val Loss: 0.6009, Val F1: 0.8184


Epoch 47/100: 100%|██████████| 23/23 [00:01<00:00, 22.06it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 34.56it/s]


Epoch 47/100, Train Loss: 0.6187, Val Loss: 0.6009, Val F1: 0.8184


Epoch 48/100: 100%|██████████| 23/23 [00:01<00:00, 17.26it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 32.54it/s]


Epoch 48/100, Train Loss: 0.6181, Val Loss: 0.6009, Val F1: 0.8198


Epoch 49/100: 100%|██████████| 23/23 [00:01<00:00, 22.10it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 35.22it/s]


Epoch 49/100, Train Loss: 0.6248, Val Loss: 0.6009, Val F1: 0.8206


Epoch 50/100: 100%|██████████| 23/23 [00:01<00:00, 19.34it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 27.19it/s]


Epoch 50/100, Train Loss: 0.6114, Val Loss: 0.6009, Val F1: 0.8206


Epoch 51/100: 100%|██████████| 23/23 [00:01<00:00, 16.78it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 34.27it/s]


Epoch 51/100, Train Loss: 0.6131, Val Loss: 0.6009, Val F1: 0.8191


Epoch 52/100: 100%|██████████| 23/23 [00:02<00:00,  9.75it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 26.60it/s]


Epoch 52/100, Train Loss: 0.6160, Val Loss: 0.6009, Val F1: 0.8191


Epoch 53/100: 100%|██████████| 23/23 [00:01<00:00, 14.56it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 19.77it/s]


Epoch 53/100, Train Loss: 0.6179, Val Loss: 0.6009, Val F1: 0.8191


Epoch 54/100: 100%|██████████| 23/23 [00:01<00:00, 18.39it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 22.04it/s]


Epoch 54/100, Train Loss: 0.6119, Val Loss: 0.6009, Val F1: 0.8191


Epoch 55/100: 100%|██████████| 23/23 [00:00<00:00, 23.42it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 17.64it/s]


Epoch 55/100, Train Loss: 0.6097, Val Loss: 0.6009, Val F1: 0.8191


Epoch 56/100: 100%|██████████| 23/23 [00:01<00:00, 14.10it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 16.24it/s]


Epoch 56/100, Train Loss: 0.6091, Val Loss: 0.6008, Val F1: 0.8191


Epoch 57/100: 100%|██████████| 23/23 [00:01<00:00, 18.61it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 27.55it/s]


Epoch 57/100, Train Loss: 0.6105, Val Loss: 0.6008, Val F1: 0.8191


Epoch 58/100: 100%|██████████| 23/23 [00:01<00:00, 19.25it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 26.90it/s]


Epoch 58/100, Train Loss: 0.6128, Val Loss: 0.6008, Val F1: 0.8191


Epoch 59/100: 100%|██████████| 23/23 [00:01<00:00, 17.73it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 35.11it/s]


Epoch 59/100, Train Loss: 0.6114, Val Loss: 0.6008, Val F1: 0.8198


Epoch 60/100: 100%|██████████| 23/23 [00:01<00:00, 21.79it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 32.02it/s]


Epoch 60/100, Train Loss: 0.6126, Val Loss: 0.6008, Val F1: 0.8198


Epoch 61/100: 100%|██████████| 23/23 [00:01<00:00, 20.45it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 32.98it/s]


Epoch 61/100, Train Loss: 0.6170, Val Loss: 0.6008, Val F1: 0.8191


Epoch 62/100: 100%|██████████| 23/23 [00:01<00:00, 21.61it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 34.23it/s]


Epoch 62/100, Train Loss: 0.6209, Val Loss: 0.6008, Val F1: 0.8191


Epoch 63/100: 100%|██████████| 23/23 [00:02<00:00, 11.31it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 30.98it/s]


Epoch 63/100, Train Loss: 0.6160, Val Loss: 0.6008, Val F1: 0.8198


Epoch 64/100: 100%|██████████| 23/23 [00:01<00:00, 22.21it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 31.93it/s]


Epoch 64/100, Train Loss: 0.6153, Val Loss: 0.6008, Val F1: 0.8198


Epoch 65/100: 100%|██████████| 23/23 [00:01<00:00, 20.61it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 26.54it/s]


Epoch 65/100, Train Loss: 0.6110, Val Loss: 0.6008, Val F1: 0.8198


Epoch 66/100: 100%|██████████| 23/23 [00:01<00:00, 20.35it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 29.64it/s]


Epoch 66/100, Train Loss: 0.6181, Val Loss: 0.6008, Val F1: 0.8198


Epoch 67/100: 100%|██████████| 23/23 [00:01<00:00, 22.29it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 29.15it/s]


Epoch 67/100, Train Loss: 0.6024, Val Loss: 0.6008, Val F1: 0.8198


Epoch 68/100: 100%|██████████| 23/23 [00:01<00:00, 22.57it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 40.31it/s]


Epoch 68/100, Train Loss: 0.6178, Val Loss: 0.6008, Val F1: 0.8198


Epoch 69/100: 100%|██████████| 23/23 [00:00<00:00, 29.48it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 45.42it/s]


Epoch 69/100, Train Loss: 0.6131, Val Loss: 0.6008, Val F1: 0.8198


Epoch 70/100: 100%|██████████| 23/23 [00:00<00:00, 26.65it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 41.86it/s]


Epoch 70/100, Train Loss: 0.6116, Val Loss: 0.6008, Val F1: 0.8198


Epoch 71/100: 100%|██████████| 23/23 [00:00<00:00, 26.24it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 25.62it/s]


Epoch 71/100, Train Loss: 0.6064, Val Loss: 0.6008, Val F1: 0.8198


Epoch 72/100: 100%|██████████| 23/23 [00:00<00:00, 24.81it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 44.57it/s]


Epoch 72/100, Train Loss: 0.6120, Val Loss: 0.6008, Val F1: 0.8198


Epoch 73/100: 100%|██████████| 23/23 [00:01<00:00, 20.84it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 22.41it/s]


Epoch 73/100, Train Loss: 0.6159, Val Loss: 0.6008, Val F1: 0.8198


Epoch 74/100: 100%|██████████| 23/23 [00:01<00:00, 13.37it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 19.73it/s]


Epoch 74/100, Train Loss: 0.6123, Val Loss: 0.6008, Val F1: 0.8198


Epoch 75/100: 100%|██████████| 23/23 [00:01<00:00, 13.92it/s]
Evaluating: 100%|██████████| 23/23 [00:01<00:00, 11.71it/s]


Epoch 75/100, Train Loss: 0.6133, Val Loss: 0.6008, Val F1: 0.8198


Epoch 76/100: 100%|██████████| 23/23 [00:01<00:00, 18.78it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 34.45it/s]


Epoch 76/100, Train Loss: 0.6143, Val Loss: 0.6008, Val F1: 0.8198


Epoch 77/100: 100%|██████████| 23/23 [00:01<00:00, 16.16it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 30.32it/s]


Epoch 77/100, Train Loss: 0.6130, Val Loss: 0.6008, Val F1: 0.8198


Epoch 78/100: 100%|██████████| 23/23 [00:01<00:00, 13.71it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 34.63it/s]


Epoch 78/100, Train Loss: 0.6110, Val Loss: 0.6008, Val F1: 0.8198


Epoch 79/100: 100%|██████████| 23/23 [00:01<00:00, 20.38it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 27.18it/s]


Epoch 79/100, Train Loss: 0.6105, Val Loss: 0.6008, Val F1: 0.8198


Epoch 80/100: 100%|██████████| 23/23 [00:01<00:00, 14.14it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 23.21it/s]


Epoch 80/100, Train Loss: 0.6066, Val Loss: 0.6008, Val F1: 0.8198


Epoch 81/100: 100%|██████████| 23/23 [00:01<00:00, 17.31it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 36.15it/s]


Epoch 81/100, Train Loss: 0.6091, Val Loss: 0.6008, Val F1: 0.8198


Epoch 82/100: 100%|██████████| 23/23 [00:00<00:00, 24.86it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 40.95it/s]


Epoch 82/100, Train Loss: 0.6103, Val Loss: 0.6008, Val F1: 0.8198


Epoch 83/100: 100%|██████████| 23/23 [00:00<00:00, 28.58it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 44.09it/s]


Epoch 83/100, Train Loss: 0.6108, Val Loss: 0.6008, Val F1: 0.8198


Epoch 84/100: 100%|██████████| 23/23 [00:00<00:00, 29.83it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 42.57it/s]


Epoch 84/100, Train Loss: 0.6111, Val Loss: 0.6008, Val F1: 0.8198


Epoch 85/100: 100%|██████████| 23/23 [00:00<00:00, 26.26it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 39.69it/s]


Epoch 85/100, Train Loss: 0.6109, Val Loss: 0.6008, Val F1: 0.8198


Epoch 86/100: 100%|██████████| 23/23 [00:00<00:00, 23.83it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 39.04it/s]


Epoch 86/100, Train Loss: 0.6131, Val Loss: 0.6008, Val F1: 0.8198


Epoch 87/100: 100%|██████████| 23/23 [00:00<00:00, 29.60it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 45.27it/s]


Epoch 87/100, Train Loss: 0.6070, Val Loss: 0.6008, Val F1: 0.8198


Epoch 88/100: 100%|██████████| 23/23 [00:00<00:00, 26.85it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 42.04it/s]


Epoch 88/100, Train Loss: 0.6136, Val Loss: 0.6008, Val F1: 0.8198


Epoch 89/100: 100%|██████████| 23/23 [00:00<00:00, 28.75it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 43.87it/s]


Epoch 89/100, Train Loss: 0.6097, Val Loss: 0.6008, Val F1: 0.8198


Epoch 90/100: 100%|██████████| 23/23 [00:00<00:00, 31.22it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 48.16it/s]


Epoch 90/100, Train Loss: 0.6046, Val Loss: 0.6008, Val F1: 0.8198


Epoch 91/100: 100%|██████████| 23/23 [00:00<00:00, 32.26it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 45.86it/s]


Epoch 91/100, Train Loss: 0.6139, Val Loss: 0.6008, Val F1: 0.8198


Epoch 92/100: 100%|██████████| 23/23 [00:00<00:00, 43.15it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 72.59it/s]


Epoch 92/100, Train Loss: 0.6172, Val Loss: 0.6008, Val F1: 0.8198


Epoch 93/100: 100%|██████████| 23/23 [00:00<00:00, 45.83it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 67.22it/s]


Epoch 93/100, Train Loss: 0.6154, Val Loss: 0.6008, Val F1: 0.8198


Epoch 94/100: 100%|██████████| 23/23 [00:00<00:00, 39.35it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 71.82it/s]


Epoch 94/100, Train Loss: 0.6131, Val Loss: 0.6008, Val F1: 0.8198


Epoch 95/100: 100%|██████████| 23/23 [00:00<00:00, 49.95it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 71.34it/s]


Epoch 95/100, Train Loss: 0.6136, Val Loss: 0.6008, Val F1: 0.8198


Epoch 96/100: 100%|██████████| 23/23 [00:00<00:00, 44.82it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 63.54it/s]


Epoch 96/100, Train Loss: 0.6105, Val Loss: 0.6008, Val F1: 0.8198


Epoch 97/100: 100%|██████████| 23/23 [00:00<00:00, 38.76it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 60.82it/s]


Epoch 97/100, Train Loss: 0.6171, Val Loss: 0.6008, Val F1: 0.8198


Epoch 98/100: 100%|██████████| 23/23 [00:00<00:00, 42.33it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 71.41it/s]


Epoch 98/100, Train Loss: 0.6155, Val Loss: 0.6008, Val F1: 0.8198


Epoch 99/100: 100%|██████████| 23/23 [00:00<00:00, 33.63it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 57.70it/s]


Epoch 99/100, Train Loss: 0.6161, Val Loss: 0.6008, Val F1: 0.8198


Epoch 100/100: 100%|██████████| 23/23 [00:00<00:00, 41.40it/s]
Evaluating: 100%|██████████| 23/23 [00:00<00:00, 68.09it/s]

Epoch 100/100, Train Loss: 0.6135, Val Loss: 0.6008, Val F1: 0.8198





In [7]:
checkpoint = torch.load('checkpoints_GCN_cosine_similarity/model_epoch_17_f1_0.8482.pth')
per_type_metrics = checkpoint['metrics']['per_type_metrics']

# Print overall metrics first
overall_metrics = per_type_metrics['overall']
print("Overall metrics:")
print(f"Accuracy-score: {overall_metrics['accuracy']:.4f}")
print(f"Precision-score: {overall_metrics['precision']:.4f}")
print(f"Recall-score: {overall_metrics['recall']:.4f}")
print(f"F1-score: {overall_metrics['f1']:.4f}")

# Print per-type metrics
for clone_type, metrics in per_type_metrics.items():
    if clone_type != 'overall':  # Skip overall metrics since already printed
        print(f"\n{clone_type}:")
        print(f"Accuracy-score: {metrics['accuracy']:.4f}")
        print(f"Precision-score: {metrics['precision']:.4f}")
        print(f"Recall-score: {metrics['recall']:.4f}")
        print(f"F1-score: {metrics['f1']:.4f}")



Overall metrics:
Accuracy-score: 0.8519
Precision-score: 0.8536
Recall-score: 0.8519
F1-score: 0.8482

Type-1:
Accuracy-score: 0.9939
Precision-score: 0.9000
Recall-score: 0.5294
F1-score: 0.6667

Type-2:
Accuracy-score: 0.9256
Precision-score: 0.8895
Recall-score: 0.6576
F1-score: 0.7562

Type-3:
Accuracy-score: 0.9024
Precision-score: 0.8589
Recall-score: 0.9439
F1-score: 0.8994

Type-4:
Accuracy-score: 0.8819
Precision-score: 0.8273
Recall-score: 0.8385
F1-score: 0.8329
