In [1]:
!pip install torch_geometric torch gdown --quiet

In [2]:
!git clone --branch baselineCe https://github.com/Graph-Classification-Noisy-Label/hackaton.git

Cloning into 'hackaton'...
remote: Enumerating objects: 81, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 81 (delta 7), reused 0 (delta 0), pack-reused 62 (from 2)[K
Receiving objects: 100% (81/81), 105.83 MiB | 33.63 MiB/s, done.
Resolving deltas: 100% (8/8), done.
Updating files: 100% (38/38), done.


In [2]:
%cd hackaton/

/home/onyxia/work/DL-Hackathon/hackaton


In [4]:
!gdown --folder https://drive.google.com/drive/folders/1Z-1JkPJ6q4C6jX4brvq1VRbJH5RPUCAk -O datasets


Retrieving folder contents
Retrieving folder 1wcUVBNQkZ04zStXkglXSgERfIvjSHJiL A
Processing file 1C8sjkO6JS0j2SyVwQ07m8PhQ-pHpuI78 test.json.gz
Processing file 12N11n8gufNA_C1ns-1IeBseBHgrSfRI1 train.json.gz
Retrieving folder 1Tj5YoYYDDXjDxxi-cywZgoDkT0b1Qbz- B
Processing file 11GBlrXMdP3HSD60w-56Tu6rbGkR-Ifww test.json.gz
Processing file 13vp-Kwef3UgAwMG-dokGwKyARym9iqtL train.json.gz
Retrieving folder 1e3B_tBMd693Iwv8x3zRR9c47l5yt_5ey C
Processing file 18XVe65ZsQ0PDLCqQa4WmneVhyfjGcXmT test.json.gz
Processing file 1z5lvG2CytbLQZt7Jmo9BopzFd0pKejEj train.json.gz
Retrieving folder 1cvM0eZwpD4gzjo44_zdodxudVBMrLza1 D
Processing file 1Gna_dHnBLX8vKaYGAAqAbw5QPerrNK1u test.json.gz
Processing file 1Pc-6LMML80-AgEoLVs2Q5hLtmR_rTEek train.json.gz
Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From (original): https://drive.google.com/uc?id=1C8sjkO6JS0j2SyVwQ07m8PhQ-pHpuI78
From (redirected): https://drive.google.com/uc?

In [3]:
!ls -lh datasets

total 16K
drwxr-sr-x 2 onyxia users 4.0K May 25 17:03 A
drwxr-sr-x 2 onyxia users 4.0K May 25 17:03 B
drwxr-sr-x 2 onyxia users 4.0K May 25 17:04 C
drwxr-sr-x 2 onyxia users 4.0K May 25 17:04 D


In [4]:
import os
import torch
import pandas as pd
import matplotlib.pyplot as plt
import logging
from tqdm import tqdm
from torch_geometric.loader import DataLoader
from torch.utils.data import random_split
# Load utility functions from cloned repository
from src.loadData import GraphDataset
from src.utils import set_seed
from src.models import GNN
import argparse

# Set the random seed
set_seed()


In [5]:
def add_zeros(data):
    data.x = torch.zeros(data.num_nodes, dtype=torch.long)
    return data

In [6]:
def train(data_loader, model, optimizer, criterion, device, save_checkpoints, checkpoint_path, current_epoch):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for data in tqdm(data_loader, desc="Iterating training graphs", unit="batch"):
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        pred = output.argmax(dim=1)
        correct += (pred == data.y).sum().item()
        total += data.y.size(0)

    # Save checkpoints if required
    if save_checkpoints:
        checkpoint_file = f"{checkpoint_path}_epoch_{current_epoch + 1}.pth"
        torch.save(model.state_dict(), checkpoint_file)
        print(f"Checkpoint saved at {checkpoint_file}")

    return total_loss / len(data_loader),  correct / total

In [7]:
def evaluate(data_loader, model, device, calculate_accuracy=False):
    model.eval()
    correct = 0
    total = 0
    predictions = []
    total_loss = 0
    criterion = torch.nn.CrossEntropyLoss()
    with torch.no_grad():
        for data in tqdm(data_loader, desc="Iterating eval graphs", unit="batch"):
            data = data.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            
            if calculate_accuracy:
                correct += (pred == data.y).sum().item()
                total += data.y.size(0)
                total_loss += criterion(output, data.y).item()
            else:
                predictions.extend(pred.cpu().numpy())
    if calculate_accuracy:
        accuracy = correct / total
        return  total_loss / len(data_loader),accuracy
    return predictions

In [8]:
def save_predictions(predictions, test_path):
    script_dir = os.getcwd() 
    submission_folder = os.path.join(script_dir, "submission")
    test_dir_name = os.path.basename(os.path.dirname(test_path))
    
    os.makedirs(submission_folder, exist_ok=True)
    
    output_csv_path = os.path.join(submission_folder, f"testset_{test_dir_name}.csv")
    
    test_graph_ids = list(range(len(predictions)))
    output_df = pd.DataFrame({
        "id": test_graph_ids,
        "pred": predictions
    })
    
    output_df.to_csv(output_csv_path, index=False)
    print(f"Predictions saved to {output_csv_path}")

In [9]:
def plot_training_progress(train_losses, train_accuracies, output_dir):
    epochs = range(1, len(train_losses) + 1)
    plt.figure(figsize=(12, 6))

    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, label="Training Loss", color='blue')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss per Epoch')

    # Plot accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accuracies, label="Training Accuracy", color='green')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training Accuracy per Epoch')

    # Save plots in the current directory
    os.makedirs(output_dir, exist_ok=True)
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "training_progress.png"))
    plt.close()

In [10]:
def get_user_input(prompt, default=None, required=False, type_cast=str):

    while True:
        user_input = input(f"{prompt} [{default}]: ")
        
        if user_input == "" and required:
            print("This field is required. Please enter a value.")
            continue
        
        if user_input == "" and default is not None:
            return default
        
        if user_input == "" and not required:
            return None
        
        try:
            return type_cast(user_input)
        except ValueError:
            print(f"Invalid input. Please enter a valid {type_cast.__name__}.")

In [11]:
def get_arguments():
    args = {}
    args['train_path'] = get_user_input("Path to the training dataset (optional)")
    args['test_path'] = get_user_input("Path to the test dataset", required=True)
    args['num_checkpoints'] = get_user_input("Number of checkpoints to save during training", type_cast=int)
    args['device'] = get_user_input("Which GPU to use if any", default=1, type_cast=int)
    args['gnn'] = get_user_input("GNN type (gin, gin-virtual, gcn, gcn-virtual)", default='gin')
    args['res'] = get_user_input("Residuals in GNN ? (1 yes, 0 no)", default=0)
    args['drop_ratio'] = get_user_input("Dropout ratio", default=0.0, type_cast=float)
    args['num_layer'] = get_user_input("Number of GNN message passing layers", default=5, type_cast=int)
    args['emb_dim'] = get_user_input("Dimensionality of hidden units in GNNs", default=300, type_cast=int)
    args['batch_size'] = get_user_input("Input batch size for training", default=32, type_cast=int)
    args['epochs'] = get_user_input("Number of epochs to train", default=10, type_cast=int)
    args['baseline_mode'] = get_user_input("Baseline mode: 1 (CE), 2 (Noisy CE)", default=1, type_cast=int)
    args['noise_prob'] = get_user_input("Noise probability p (used if baseline_mode=2)", default=0.2, type_cast=float)
    args['pooling'] = get_user_input("type of pooling (sum, mean, max, attention, set2set)", default='mean')
    
    return argparse.Namespace(**args)

In [12]:
def populate_args(args):
    print("Arguments received:")
    for key, value in vars(args).items():
        print(f"{key}: {value}")
args = get_arguments()
populate_args(args)

Path to the training dataset (optional) [None]:  datasets/B/train.json.gz
Path to the test dataset [None]:  datasets/B/test.json.gz
Number of checkpoints to save during training [None]:  30
Which GPU to use if any [1]:  
GNN type (gin, gin-virtual, gcn, gcn-virtual) [gin]:  gcn
Residuals in GNN ? (1 yes, 0 no) [0]:  1
Dropout ratio [0.0]:  0.5
Number of GNN message passing layers [5]:  
Dimensionality of hidden units in GNNs [300]:  
Input batch size for training [32]:  
Number of epochs to train [10]:  30
Baseline mode: 1 (CE), 2 (Noisy CE) [1]:  2
Noise probability p (used if baseline_mode=2) [0.2]:  0.4
type of pooling (sum, mean, max, attention, set2set) [mean]:  


Arguments received:
train_path: datasets/B/train.json.gz
test_path: datasets/B/test.json.gz
num_checkpoints: 30
device: 1
gnn: gcn
res: 1
drop_ratio: 0.5
num_layer: 5
emb_dim: 300
batch_size: 32
epochs: 30
baseline_mode: 2
noise_prob: 0.4
pooling: mean


In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class ForwardCorrectedCrossEntropyLoss(nn.Module):
    def __init__(self, num_classes, noise_rate):
        super().__init__()
        C = num_classes
        p = noise_rate
        T = torch.full((C, C), p / (C - 1))
        T.fill_diagonal_(1 - p)
        self.register_buffer('T', T)  # register buffer = tensor attaché au module


    def _build_T_matrix(self):
        C = self.num_classes
        p = self.noise_rate
        T = torch.full((C, C), p / (C - 1))
        T.fill_diagonal_(1 - p)
        return T

    def forward(self, logits, targets):
        #print("logits device:", logits.device)
        #print("self.T device before:", self.T.device)
        T = self.T.to(logits.device)
        #print("T device after:", T.device)
        corrected_logits = torch.matmul(F.softmax(logits, dim=1), T)
        loss = F.nll_loss(torch.log(corrected_logits + 1e-8), targets)
        return loss


In [14]:
script_dir = os.getcwd() 
# device = torch.device(f"cuda:{args.device}" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_checkpoints = args.num_checkpoints if args.num_checkpoints else 3
    
if args.gnn == 'gin':
    model = GNN(gnn_type='gin', num_class=6, num_layer=args.num_layer, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=False, residual = True if args.res == 1 else False, graph_pooling=args.pooling).to(device)
elif args.gnn == 'gin-virtual':
    model = GNN(gnn_type='gin-virtual', num_class=6, num_layer=args.num_layer, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=True, residual = True if args.res == 1 else False, graph_pooling=args.pooling).to(device)
elif args.gnn == 'gcn':
    model = GNN(gnn_type='gcn', num_class=6, num_layer=args.num_layer, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=False, residual = True if args.res == 1 else False, graph_pooling=args.pooling).to(device)
elif args.gnn == 'gcn-virtual':
    model = GNN(gnn_type='gcn-virtual', num_class=6, num_layer=args.num_layer, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=True, residual = True if args.res == 1 else False, graph_pooling=args.pooling).to(device)
else:
    raise ValueError('Invalid GNN type')
    
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# criterion = torch.nn.CrossEntropyLoss()
if args.baseline_mode == 2:
    criterion = ForwardCorrectedCrossEntropyLoss(num_classes=6, noise_rate=args.noise_prob)
else:
    criterion = torch.nn.CrossEntropyLoss()

In [15]:
test_dir_name = os.path.basename(os.path.dirname(args.test_path))
logs_folder = os.path.join(script_dir, "logs", test_dir_name)
log_file = os.path.join(logs_folder, "training.log")
os.makedirs(os.path.dirname(log_file), exist_ok=True)
logging.basicConfig(filename=log_file, level=logging.INFO, format='%(asctime)s - %(message)s')
logging.getLogger().addHandler(logging.StreamHandler())

checkpoint_path = os.path.join(script_dir, "checkpoints", f"model_{test_dir_name}_best.pth")
checkpoints_folder = os.path.join(script_dir, "checkpoints", test_dir_name)
os.makedirs(checkpoints_folder, exist_ok=True)


In [16]:
if os.path.exists(checkpoint_path) and not args.train_path:
    model.load_state_dict(torch.load(checkpoint_path))
    print(f"Loaded best model from {checkpoint_path}")

In [17]:
if args.train_path:
    full_dataset = GraphDataset(args.train_path, transform=add_zeros)
    val_size = int(0.2 * len(full_dataset))
    train_size = len(full_dataset) - val_size

    
    generator = torch.Generator().manual_seed(12)
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size], generator=generator)

    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)

    num_epochs = args.epochs
    best_val_accuracy = 0.0   

    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []

    if num_checkpoints > 1:
        checkpoint_intervals = [int((i + 1) * num_epochs / num_checkpoints) for i in range(num_checkpoints)]
    else:
        checkpoint_intervals = [num_epochs]

    for epoch in range(num_epochs):
        train_loss, train_acc = train(
            train_loader, model, optimizer, criterion, device,
            save_checkpoints=(epoch + 1 in checkpoint_intervals),
            checkpoint_path=os.path.join(checkpoints_folder, f"model_{test_dir_name}"),
            current_epoch=epoch
        )

        val_loss,val_acc = evaluate(val_loader, model, device, calculate_accuracy=True)

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")
        logging.info(f"Epoch {epoch + 1}/{num_epochs}, Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

        train_losses.append(train_loss)
        train_accuracies.append(train_acc)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        
        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc
            torch.save(model.state_dict(), checkpoint_path)
            print(f"Best model updated and saved at {checkpoint_path}")

    plot_training_progress(train_losses, train_accuracies, os.path.join(logs_folder, "plots"))
    plot_training_progress(val_losses, val_accuracies, os.path.join(logs_folder, "plotsVal"))

Iterating training graphs: 100%|██████████| 140/140 [00:55<00:00,  2.53batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_1.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.75batch/s]
Epoch 1/30, Loss: 1.6505, Train Acc: 0.3754, Val Acc: 0.3875


Epoch 1/30, Loss: 1.6505, Train Acc: 0.3754, Val Acc: 0.3875
Best model updated and saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/model_B_best.pth


Iterating training graphs: 100%|██████████| 140/140 [00:54<00:00,  2.55batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_2.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:10<00:00,  3.40batch/s]
Epoch 2/30, Loss: 1.5566, Train Acc: 0.4368, Val Acc: 0.4143


Epoch 2/30, Loss: 1.5566, Train Acc: 0.4368, Val Acc: 0.4143
Best model updated and saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/model_B_best.pth


Iterating training graphs: 100%|██████████| 140/140 [00:56<00:00,  2.47batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_3.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:10<00:00,  3.48batch/s]
Epoch 3/30, Loss: 1.5161, Train Acc: 0.4616, Val Acc: 0.4500


Epoch 3/30, Loss: 1.5161, Train Acc: 0.4616, Val Acc: 0.4500
Best model updated and saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/model_B_best.pth


Iterating training graphs: 100%|██████████| 140/140 [00:55<00:00,  2.54batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_4.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:11<00:00,  3.13batch/s]
Epoch 4/30, Loss: 1.5027, Train Acc: 0.4703, Val Acc: 0.4304


Epoch 4/30, Loss: 1.5027, Train Acc: 0.4703, Val Acc: 0.4304


Iterating training graphs: 100%|██████████| 140/140 [00:58<00:00,  2.39batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_5.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:08<00:00,  3.97batch/s]
Epoch 5/30, Loss: 1.4824, Train Acc: 0.4862, Val Acc: 0.4366


Epoch 5/30, Loss: 1.4824, Train Acc: 0.4862, Val Acc: 0.4366


Iterating training graphs: 100%|██████████| 140/140 [00:55<00:00,  2.50batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_6.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:10<00:00,  3.47batch/s]
Epoch 6/30, Loss: 1.4764, Train Acc: 0.4906, Val Acc: 0.3536


Epoch 6/30, Loss: 1.4764, Train Acc: 0.4906, Val Acc: 0.3536


Iterating training graphs: 100%|██████████| 140/140 [00:55<00:00,  2.51batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_7.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.68batch/s]
Epoch 7/30, Loss: 1.4565, Train Acc: 0.5020, Val Acc: 0.4696


Epoch 7/30, Loss: 1.4565, Train Acc: 0.5020, Val Acc: 0.4696
Best model updated and saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/model_B_best.pth


Iterating training graphs: 100%|██████████| 140/140 [00:56<00:00,  2.47batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_8.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.55batch/s]
Epoch 8/30, Loss: 1.4529, Train Acc: 0.5045, Val Acc: 0.4616


Epoch 8/30, Loss: 1.4529, Train Acc: 0.5045, Val Acc: 0.4616


Iterating training graphs: 100%|██████████| 140/140 [00:56<00:00,  2.46batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_9.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.60batch/s]
Epoch 9/30, Loss: 1.4425, Train Acc: 0.5134, Val Acc: 0.4179


Epoch 9/30, Loss: 1.4425, Train Acc: 0.5134, Val Acc: 0.4179


Iterating training graphs: 100%|██████████| 140/140 [00:55<00:00,  2.51batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_10.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:11<00:00,  3.18batch/s]
Epoch 10/30, Loss: 1.4437, Train Acc: 0.5089, Val Acc: 0.3652


Epoch 10/30, Loss: 1.4437, Train Acc: 0.5089, Val Acc: 0.3652


Iterating training graphs: 100%|██████████| 140/140 [00:55<00:00,  2.54batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_11.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:10<00:00,  3.42batch/s]
Epoch 11/30, Loss: 1.4307, Train Acc: 0.5170, Val Acc: 0.4893


Epoch 11/30, Loss: 1.4307, Train Acc: 0.5170, Val Acc: 0.4893
Best model updated and saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/model_B_best.pth


Iterating training graphs: 100%|██████████| 140/140 [00:54<00:00,  2.58batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_12.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:11<00:00,  3.11batch/s]
Epoch 12/30, Loss: 1.4187, Train Acc: 0.5263, Val Acc: 0.4161


Epoch 12/30, Loss: 1.4187, Train Acc: 0.5263, Val Acc: 0.4161


Iterating training graphs: 100%|██████████| 140/140 [00:55<00:00,  2.51batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_13.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:11<00:00,  3.15batch/s]
Epoch 13/30, Loss: 1.4145, Train Acc: 0.5306, Val Acc: 0.4741


Epoch 13/30, Loss: 1.4145, Train Acc: 0.5306, Val Acc: 0.4741


Iterating training graphs: 100%|██████████| 140/140 [00:54<00:00,  2.59batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_14.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.65batch/s]
Epoch 14/30, Loss: 1.3984, Train Acc: 0.5408, Val Acc: 0.5179


Epoch 14/30, Loss: 1.3984, Train Acc: 0.5408, Val Acc: 0.5179
Best model updated and saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/model_B_best.pth


Iterating training graphs: 100%|██████████| 140/140 [00:56<00:00,  2.48batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_15.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.57batch/s]
Epoch 15/30, Loss: 1.4027, Train Acc: 0.5339, Val Acc: 0.4804


Epoch 15/30, Loss: 1.4027, Train Acc: 0.5339, Val Acc: 0.4804


Iterating training graphs: 100%|██████████| 140/140 [00:58<00:00,  2.40batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_16.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:10<00:00,  3.47batch/s]
Epoch 16/30, Loss: 1.3995, Train Acc: 0.5397, Val Acc: 0.4420


Epoch 16/30, Loss: 1.3995, Train Acc: 0.5397, Val Acc: 0.4420


Iterating training graphs: 100%|██████████| 140/140 [00:57<00:00,  2.43batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_17.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.63batch/s]
Epoch 17/30, Loss: 1.3834, Train Acc: 0.5455, Val Acc: 0.4920


Epoch 17/30, Loss: 1.3834, Train Acc: 0.5455, Val Acc: 0.4920


Iterating training graphs: 100%|██████████| 140/140 [00:54<00:00,  2.58batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_18.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.56batch/s]
Epoch 18/30, Loss: 1.3879, Train Acc: 0.5433, Val Acc: 0.5339


Epoch 18/30, Loss: 1.3879, Train Acc: 0.5433, Val Acc: 0.5339
Best model updated and saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/model_B_best.pth


Iterating training graphs: 100%|██████████| 140/140 [00:54<00:00,  2.56batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_19.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.85batch/s]
Epoch 19/30, Loss: 1.3707, Train Acc: 0.5542, Val Acc: 0.5143


Epoch 19/30, Loss: 1.3707, Train Acc: 0.5542, Val Acc: 0.5143


Iterating training graphs: 100%|██████████| 140/140 [00:53<00:00,  2.60batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_20.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:10<00:00,  3.32batch/s]
Epoch 20/30, Loss: 1.3682, Train Acc: 0.5551, Val Acc: 0.5259


Epoch 20/30, Loss: 1.3682, Train Acc: 0.5551, Val Acc: 0.5259


Iterating training graphs: 100%|██████████| 140/140 [00:54<00:00,  2.57batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_21.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.62batch/s]
Epoch 21/30, Loss: 1.3734, Train Acc: 0.5507, Val Acc: 0.5304


Epoch 21/30, Loss: 1.3734, Train Acc: 0.5507, Val Acc: 0.5304


Iterating training graphs: 100%|██████████| 140/140 [00:55<00:00,  2.52batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_22.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:10<00:00,  3.24batch/s]
Epoch 22/30, Loss: 1.3701, Train Acc: 0.5507, Val Acc: 0.4920


Epoch 22/30, Loss: 1.3701, Train Acc: 0.5507, Val Acc: 0.4920


Iterating training graphs: 100%|██████████| 140/140 [00:54<00:00,  2.58batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_23.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.71batch/s]
Epoch 23/30, Loss: 1.3633, Train Acc: 0.5578, Val Acc: 0.5179


Epoch 23/30, Loss: 1.3633, Train Acc: 0.5578, Val Acc: 0.5179


Iterating training graphs: 100%|██████████| 140/140 [00:55<00:00,  2.52batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_24.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.59batch/s]
Epoch 24/30, Loss: 1.3541, Train Acc: 0.5629, Val Acc: 0.4768


Epoch 24/30, Loss: 1.3541, Train Acc: 0.5629, Val Acc: 0.4768


Iterating training graphs: 100%|██████████| 140/140 [00:53<00:00,  2.61batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_25.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:11<00:00,  2.93batch/s]
Epoch 25/30, Loss: 1.3516, Train Acc: 0.5645, Val Acc: 0.4223


Epoch 25/30, Loss: 1.3516, Train Acc: 0.5645, Val Acc: 0.4223


Iterating training graphs: 100%|██████████| 140/140 [00:55<00:00,  2.54batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_26.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:10<00:00,  3.49batch/s]
Epoch 26/30, Loss: 1.3429, Train Acc: 0.5696, Val Acc: 0.4045


Epoch 26/30, Loss: 1.3429, Train Acc: 0.5696, Val Acc: 0.4045


Iterating training graphs: 100%|██████████| 140/140 [00:53<00:00,  2.60batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_27.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:11<00:00,  3.00batch/s]
Epoch 27/30, Loss: 1.3468, Train Acc: 0.5670, Val Acc: 0.5232


Epoch 27/30, Loss: 1.3468, Train Acc: 0.5670, Val Acc: 0.5232


Iterating training graphs: 100%|██████████| 140/140 [00:58<00:00,  2.39batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_28.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.57batch/s]
Epoch 28/30, Loss: 1.3410, Train Acc: 0.5690, Val Acc: 0.5357


Epoch 28/30, Loss: 1.3410, Train Acc: 0.5690, Val Acc: 0.5357
Best model updated and saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/model_B_best.pth


Iterating training graphs: 100%|██████████| 140/140 [00:53<00:00,  2.61batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_29.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:10<00:00,  3.49batch/s]
Epoch 29/30, Loss: 1.3308, Train Acc: 0.5781, Val Acc: 0.5339


Epoch 29/30, Loss: 1.3308, Train Acc: 0.5781, Val Acc: 0.5339


Iterating training graphs: 100%|██████████| 140/140 [00:54<00:00,  2.56batch/s]


Checkpoint saved at /home/onyxia/work/DL-Hackathon/hackaton/checkpoints/B/model_B_epoch_30.pth


Iterating eval graphs: 100%|██████████| 35/35 [00:09<00:00,  3.53batch/s]
Epoch 30/30, Loss: 1.3318, Train Acc: 0.5766, Val Acc: 0.5348


Epoch 30/30, Loss: 1.3318, Train Acc: 0.5766, Val Acc: 0.5348


In [19]:
import gc
del train_dataset
del train_loader
del full_dataset
del val_dataset
del val_loader
gc.collect()

12500

In [20]:
test_dataset = GraphDataset(args.test_path, transform=add_zeros)
test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)
    

In [21]:
model.load_state_dict(torch.load(checkpoint_path))
predictions = evaluate(test_loader, model, device, calculate_accuracy=False)
save_predictions(predictions, args.test_path)

Iterating eval graphs: 100%|██████████| 74/74 [00:16<00:00,  4.62batch/s]

Predictions saved to /home/onyxia/work/DL-Hackathon/hackaton/submission/testset_A.csv



