In [1]:
!pip install torch_geometric torch gdown --quiet

In [2]:
!git clone --branch baselineCe https://github.com/Graph-Classification-Noisy-Label/hackaton.git

Cloning into 'hackaton'...
remote: Enumerating objects: 69, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 69 (delta 1), reused 0 (delta 0), pack-reused 62 (from 2)[K
Receiving objects: 100% (69/69), 105.83 MiB | 46.69 MiB/s, done.
Resolving deltas: 100% (2/2), done.


In [2]:
%cd hackaton/

/home/onyxia/work/DL-Hackathon/hackaton


In [4]:
!gdown --folder https://drive.google.com/drive/folders/1Z-1JkPJ6q4C6jX4brvq1VRbJH5RPUCAk -O datasets


Retrieving folder contents
Retrieving folder 1wcUVBNQkZ04zStXkglXSgERfIvjSHJiL A
Processing file 1C8sjkO6JS0j2SyVwQ07m8PhQ-pHpuI78 test.json.gz
Processing file 12N11n8gufNA_C1ns-1IeBseBHgrSfRI1 train.json.gz
Retrieving folder 1Tj5YoYYDDXjDxxi-cywZgoDkT0b1Qbz- B
Processing file 11GBlrXMdP3HSD60w-56Tu6rbGkR-Ifww test.json.gz
Processing file 13vp-Kwef3UgAwMG-dokGwKyARym9iqtL train.json.gz
Retrieving folder 1e3B_tBMd693Iwv8x3zRR9c47l5yt_5ey C
Processing file 18XVe65ZsQ0PDLCqQa4WmneVhyfjGcXmT test.json.gz
Processing file 1z5lvG2CytbLQZt7Jmo9BopzFd0pKejEj train.json.gz
Retrieving folder 1cvM0eZwpD4gzjo44_zdodxudVBMrLza1 D
Processing file 1Gna_dHnBLX8vKaYGAAqAbw5QPerrNK1u test.json.gz
Processing file 1Pc-6LMML80-AgEoLVs2Q5hLtmR_rTEek train.json.gz
Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From (original): https://drive.google.com/uc?id=1C8sjkO6JS0j2SyVwQ07m8PhQ-pHpuI78
From (redirected): https://dr

In [3]:
!ls -lh datasets

total 16K
drwxr-sr-x 2 onyxia users 4.0K May 28 07:46 A
drwxr-sr-x 2 onyxia users 4.0K May 28 07:46 B
drwxr-sr-x 2 onyxia users 4.0K May 28 07:46 C
drwxr-sr-x 2 onyxia users 4.0K May 28 07:47 D


In [4]:
import os
import torch
import pandas as pd
import matplotlib.pyplot as plt
import logging
from tqdm import tqdm
from torch_geometric.loader import DataLoader
from torch.utils.data import random_split
# Load utility functions from cloned repository
from src.loadData import GraphDataset
from src.utils import set_seed
from src.models import GNN
import argparse

# Set the random seed
set_seed(42)


In [5]:
def add_zeros(data):
    data.x = torch.zeros(data.num_nodes, dtype=torch.long)
    return data

In [6]:
def train(data_loader, model, optimizer, criterion, device, save_checkpoints, checkpoint_path, current_epoch):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for data in tqdm(data_loader, desc="Iterating training graphs", unit="batch"):
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        pred = output.argmax(dim=1)
        correct += (pred == data.y).sum().item()
        total += data.y.size(0)

    # Save checkpoints if required
    if save_checkpoints:
        checkpoint_file = f"{checkpoint_path}_epoch_{current_epoch + 1}.pth"
        torch.save(model.state_dict(), checkpoint_file)
        print(f"Checkpoint saved at {checkpoint_file}")

    return total_loss / len(data_loader),  correct / total

In [7]:
def evaluate(data_loader, model, device, calculate_accuracy=False):
    model.eval()
    correct = 0
    total = 0
    predictions = []
    total_loss = 0
    criterion = torch.nn.CrossEntropyLoss()
    with torch.no_grad():
        for data in tqdm(data_loader, desc="Iterating eval graphs", unit="batch"):
            data = data.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            
            if calculate_accuracy:
                correct += (pred == data.y).sum().item()
                total += data.y.size(0)
                total_loss += criterion(output, data.y).item()
            else:
                predictions.extend(pred.cpu().numpy())
    if calculate_accuracy:
        accuracy = correct / total
        return  total_loss / len(data_loader),accuracy
    return predictions

In [8]:
def save_predictions(predictions, test_path):
    script_dir = os.getcwd() 
    submission_folder = os.path.join(script_dir, "submission")
    test_dir_name = os.path.basename(os.path.dirname(test_path))
    
    os.makedirs(submission_folder, exist_ok=True)
    
    output_csv_path = os.path.join(submission_folder, f"testset_{test_dir_name}.csv")
    
    test_graph_ids = list(range(len(predictions)))
    output_df = pd.DataFrame({
        "id": test_graph_ids,
        "pred": predictions
    })
    
    output_df.to_csv(output_csv_path, index=False)
    print(f"Predictions saved to {output_csv_path}")

In [9]:
def plot_training_progress(train_losses, train_accuracies, output_dir):
    epochs = range(1, len(train_losses) + 1)
    plt.figure(figsize=(12, 6))

    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, label="Training Loss", color='blue')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss per Epoch')

    # Plot accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accuracies, label="Training Accuracy", color='green')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training Accuracy per Epoch')

    # Save plots in the current directory
    os.makedirs(output_dir, exist_ok=True)
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "training_progress.png"))
    plt.close()

In [10]:
def get_user_input(prompt, default=None, required=False, type_cast=str):

    while True:
        user_input = input(f"{prompt} [{default}]: ")
        
        if user_input == "" and required:
            print("This field is required. Please enter a value.")
            continue
        
        if user_input == "" and default is not None:
            return default
        
        if user_input == "" and not required:
            return None
        
        try:
            return type_cast(user_input)
        except ValueError:
            print(f"Invalid input. Please enter a valid {type_cast.__name__}.")

In [11]:
def get_arguments():
    args = {}
    args['train_path'] = get_user_input("Path to the training dataset (optional)")
    args['test_path'] = get_user_input("Path to the test dataset", required=True)
    args['num_checkpoints'] = get_user_input("Number of checkpoints to save during training", type_cast=int)
    args['device'] = get_user_input("Which GPU to use if any", default=1, type_cast=int)
    args['gnn'] = get_user_input("GNN type (gin, gin-virtual, gcn, gcn-virtual)", default='gin')
    args['drop_ratio'] = get_user_input("Dropout ratio", default=0.0, type_cast=float)
    args['num_layer'] = get_user_input("Number of GNN message passing layers", default=5, type_cast=int)
    args['emb_dim'] = get_user_input("Dimensionality of hidden units in GNNs", default=300, type_cast=int)
    args['batch_size'] = get_user_input("Input batch size for training", default=32, type_cast=int)
    args['epochs'] = get_user_input("Number of epochs to train", default=10, type_cast=int)
    args['baseline_mode'] = get_user_input("Baseline mode: 1 (CE), 2 (Noisy CE)", default=1, type_cast=int)
    args['noise_prob'] = get_user_input("Noise probability p (used if baseline_mode=2)", default=0.2, type_cast=float)

    
    return argparse.Namespace(**args)


In [27]:
def populate_args(args):
    print("Arguments received:")
    for key, value in vars(args).items():
        print(f"{key}: {value}")
args = get_arguments()
populate_args(args)

Path to the training dataset (optional) [None]:  datasets/B/train.json.gz
Path to the test dataset [None]:  datasets/B/test.json.gz
Number of checkpoints to save during training [None]:  20
Which GPU to use if any [1]:  
GNN type (gin, gin-virtual, gcn, gcn-virtual) [gin]:  gcn
Dropout ratio [0.0]:  0.5
Number of GNN message passing layers [5]:  
Dimensionality of hidden units in GNNs [300]:  
Input batch size for training [32]:  
Number of epochs to train [10]:  20
Baseline mode: 1 (CE), 2 (Noisy CE) [1]:  
Noise probability p (used if baseline_mode=2) [0.2]:  0.4


Arguments received:
train_path: datasets/B/train.json.gz
test_path: datasets/B/test.json.gz
num_checkpoints: 20
device: 1
gnn: gcn
drop_ratio: 0.5
num_layer: 5
emb_dim: 300
batch_size: 32
epochs: 20
baseline_mode: 1
noise_prob: 0.4


In [28]:
class NoisyCrossEntropyLoss(torch.nn.Module):
    def __init__(self, p_noisy):
        super().__init__()
        self.p = p_noisy
        self.ce = torch.nn.CrossEntropyLoss(reduction='none')

    def forward(self, logits, targets):
        losses = self.ce(logits, targets)
        weights = (1 - self.p) + self.p * (1 - torch.nn.functional.one_hot(targets, num_classes=logits.size(1)).float().sum(dim=1))
        return (losses * weights).mean()

In [29]:
import numpy as np

def estimate_loss_distribution(model, dataloader, criterion, device):
    model.eval()
    losses = []

    with torch.no_grad():
        for data in dataloader:
            data = data.to(device)
            outputs = model(data)
            loss = criterion(outputs, data.y)
            loss = loss.detach().cpu().numpy()
            losses.extend(loss)

    return np.array(losses)

def divide_data_by_loss(losses, threshold):
    """Divise les indices entre données propres (low loss) et bruitées (high loss)."""
    loss_mean = np.mean(losses)
    return np.where(losses < threshold * loss_mean)[0], np.where(losses >= threshold * loss_mean)[0]

In [30]:
def train_dividemix(train_loader, model1, model2, optimizer1, optimizer2, criterion, device, epoch, threshold=1.0):
    model1.train()
    model2.train()

    # Étape 1: Estimation des pertes individuelles
    losses1 = estimate_loss_distribution(model1, train_loader, criterion, device)
    losses2 = estimate_loss_distribution(model2, train_loader, criterion, device)

    idx_clean_1, _ = divide_data_by_loss(losses2, threshold)
    idx_clean_2, _ = divide_data_by_loss(losses1, threshold)

    total_loss1 = 0.0
    total_loss2 = 0.0
    num_batches = 0

    for i, data in enumerate(train_loader):
        data = data.to(device)
        optimizer1.zero_grad()
        optimizer2.zero_grad()

        # Création des masques clean pour le batch courant
        batch_indices = torch.arange(data.y.shape[0])
        mask_clean1 = torch.tensor([idx in idx_clean_2 for idx in batch_indices], device=device)
        mask_clean2 = torch.tensor([idx in idx_clean_1 for idx in batch_indices], device=device)

        # Modèle 1 entraîné sur les données propres identifiées par le modèle 2
        output1 = model1(data)
        if mask_clean2.any():
            loss1 = criterion(output1[mask_clean2], data.y[mask_clean2]).mean()
            loss1.backward()
            optimizer1.step()
            total_loss1 += loss1.item()
        else:
            total_loss1 += 0

        # Modèle 2 entraîné sur les données propres identifiées par le modèle 1
        output2 = model2(data)
        if mask_clean1.any():
            loss2 = criterion(output2[mask_clean1], data.y[mask_clean1]).mean()
            loss2.backward()
            optimizer2.step()
            total_loss2 += loss2.item()
        else:
            total_loss2 += 0

        num_batches += 1

    avg_loss1 = total_loss1 / num_batches
    avg_loss2 = total_loss2 / num_batches

    return avg_loss1, avg_loss2

In [31]:
script_dir = os.getcwd() 
# device = torch.device(f"cuda:{args.device}" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_checkpoints = args.num_checkpoints if args.num_checkpoints else 3
    
if args.gnn == 'gin':
    model = GNN(gnn_type='gin', num_class=6, num_layer=args.num_layer, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=False).to(device)
elif args.gnn == 'gin-virtual':
    model = GNN(gnn_type='gin', num_class=6, num_layer=args.num_layer, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=True).to(device)
elif args.gnn == 'gcn':
    model = GNN(gnn_type='gcn', num_class=6, num_layer=args.num_layer, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=False).to(device)
elif args.gnn == 'gcn-virtual':
    model = GNN(gnn_type='gcn', num_class=6, num_layer=args.num_layer, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=True).to(device)
else:
    raise ValueError('Invalid GNN type')
    
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# criterion = torch.nn.CrossEntropyLoss()
if args.baseline_mode == 2:
    criterion = NoisyCrossEntropyLoss(args.noise_prob)
else:
    criterion = torch.nn.CrossEntropyLoss()

In [32]:
from copy import deepcopy

model1 = deepcopy(model)
model2 = deepcopy(model)

optimizer1 = torch.optim.Adam(model1.parameters(), lr=0.001)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=0.001)

# Ici, utilise une loss élément par élément
criterion_dividemix = torch.nn.CrossEntropyLoss(reduction='none')

In [33]:
test_dir_name = os.path.basename(os.path.dirname(args.test_path))
logs_folder = os.path.join(script_dir, "logs", test_dir_name)
log_file = os.path.join(logs_folder, "training.log")
os.makedirs(os.path.dirname(log_file), exist_ok=True)
logging.basicConfig(filename=log_file, level=logging.INFO, format='%(asctime)s - %(message)s')
logging.getLogger().addHandler(logging.StreamHandler())

checkpoint_path = os.path.join(script_dir, "checkpoints", f"model_{test_dir_name}_best.pth")
checkpoints_folder = os.path.join(script_dir, "checkpoints", test_dir_name)
os.makedirs(checkpoints_folder, exist_ok=True)


In [34]:
if os.path.exists(checkpoint_path) and not args.train_path:
    model.load_state_dict(torch.load(checkpoint_path))
    print(f"Loaded best model from {checkpoint_path}")

In [35]:
if args.train_path:
    full_dataset = GraphDataset(args.train_path, transform=add_zeros)
    val_size = int(0.2 * len(full_dataset))
    train_size = len(full_dataset) - val_size

    
    generator = torch.Generator().manual_seed(12)
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size], generator=generator)

    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)

    num_epochs = args.epochs
    best_val_accuracy = 0.0   


    train_losses1 = []
    train_losses2 = []
    val_losses1 = []
    val_losses2 = []
    
    train_accuracies1 = []
    train_accuracies2 = []
    val_accuracies1 = []
    val_accuracies2 = []
    
    best_val_accuracy = 0.0
    best_model_name = ""

    if num_checkpoints > 1:
        checkpoint_intervals = [int((i + 1) * num_epochs / num_checkpoints) for i in range(num_checkpoints)]
    else:
        checkpoint_intervals = [num_epochs]

    for epoch in range(num_epochs):
        train_loss1, train_loss2 = train_dividemix(
            train_loader, model1, model2,
            optimizer1, optimizer2,
            criterion_dividemix, device, epoch, threshold=1.0
        )
    
        val_loss1, val_acc1 = evaluate(val_loader, model1, device, calculate_accuracy=True)
        val_loss2, val_acc2 = evaluate(val_loader, model2, device, calculate_accuracy=True)
    
        print(f"[Epoch {epoch + 1}]")
        print(f"  Model 1 -> Train Loss: {train_loss1:.4f}, Val Loss: {val_loss1:.4f}, Val Acc: {val_acc1:.4f}")
        print(f"  Model 2 -> Train Loss: {train_loss2:.4f}, Val Loss: {val_loss2:.4f}, Val Acc: {val_acc2:.4f}")
    
        save_checkpoints = (epoch + 1 in checkpoint_intervals)
    
        if save_checkpoints:
            torch.save(model1.state_dict(), os.path.join(checkpoints_folder, f"model1_epoch{epoch+1}.pth"))
            torch.save(model2.state_dict(), os.path.join(checkpoints_folder, f"model2_epoch{epoch+1}.pth"))
            print(f"Checkpoints saved for both models at epoch {epoch + 1}")
    
    
        # Stockage des métriques
        train_losses1.append(train_loss1)
        train_losses2.append(train_loss2)
        val_losses1.append(val_loss1)
        val_losses2.append(val_loss2)
    
        val_accuracies1.append(val_acc1)
        val_accuracies2.append(val_acc2)
    
        train_accuracies1.append(None)  # Si besoin de train_acc, implémenter calcul (cf plus bas)
        train_accuracies2.append(None)
    
        # Sauvegarde du meilleur modèle
        if val_acc1 > best_val_accuracy:
            best_val_accuracy = val_acc1
            torch.save(model1.state_dict(), os.path.join(checkpoints_folder, "best_model1.pth"))
            best_model_name = "model1"
            print("Best model updated: model1")
    
        if val_acc2 > best_val_accuracy:
            best_val_accuracy = val_acc2
            torch.save(model2.state_dict(), os.path.join(checkpoints_folder, "best_model2.pth"))
            best_model_name = "model2"
            print("Best model updated: model2")


    # Création des dossiers si besoin
    os.makedirs(os.path.join(logs_folder, "plots_model1"), exist_ok=True)
    os.makedirs(os.path.join(logs_folder, "plots_model2"), exist_ok=True)
    
    # Courbes pour model1
    plot_training_progress(train_losses1, train_accuracies1, os.path.join(logs_folder, "plots_model1"))
    plot_training_progress(val_losses1, val_accuracies1, os.path.join(logs_folder, "plots_model1Val"))
    
    # Courbes pour model2
    plot_training_progress(train_losses2, train_accuracies2, os.path.join(logs_folder, "plots_model2"))
    plot_training_progress(val_losses2, val_accuracies2, os.path.join(logs_folder, "plots_model2Val"))


Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.38batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.15batch/s]


[Epoch 1]
  Model 1 -> Train Loss: 1.7506, Val Loss: 1.7349, Val Acc: 0.2946
  Model 2 -> Train Loss: 1.7438, Val Loss: 1.7356, Val Acc: 0.2509
Checkpoints saved for both models at epoch 1
Best model updated: model1


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.50batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.73batch/s]


[Epoch 2]
  Model 1 -> Train Loss: 1.7449, Val Loss: 1.7567, Val Acc: 0.2616
  Model 2 -> Train Loss: 1.7224, Val Loss: 1.7147, Val Acc: 0.2848
Checkpoints saved for both models at epoch 2


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.88batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.28batch/s]


[Epoch 3]
  Model 1 -> Train Loss: 1.7117, Val Loss: 1.6851, Val Acc: 0.3384
  Model 2 -> Train Loss: 1.7045, Val Loss: 1.7071, Val Acc: 0.3187
Checkpoints saved for both models at epoch 3
Best model updated: model1


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.53batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.80batch/s]


[Epoch 4]
  Model 1 -> Train Loss: 1.6622, Val Loss: 1.6410, Val Acc: 0.3991
  Model 2 -> Train Loss: 1.6814, Val Loss: 1.6558, Val Acc: 0.3857
Checkpoints saved for both models at epoch 4
Best model updated: model1


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.52batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.73batch/s]


[Epoch 5]
  Model 1 -> Train Loss: 1.6387, Val Loss: 1.6104, Val Acc: 0.4018
  Model 2 -> Train Loss: 1.6460, Val Loss: 1.6213, Val Acc: 0.3946
Checkpoints saved for both models at epoch 5
Best model updated: model1


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.87batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.75batch/s]


[Epoch 6]
  Model 1 -> Train Loss: 1.6158, Val Loss: 1.6051, Val Acc: 0.3955
  Model 2 -> Train Loss: 1.6038, Val Loss: 1.5934, Val Acc: 0.4071
Checkpoints saved for both models at epoch 6
Best model updated: model2


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.50batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.77batch/s]


[Epoch 7]
  Model 1 -> Train Loss: 1.5865, Val Loss: 1.5558, Val Acc: 0.4393
  Model 2 -> Train Loss: 1.5874, Val Loss: 1.5745, Val Acc: 0.4214
Checkpoints saved for both models at epoch 7
Best model updated: model1


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.56batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.95batch/s]


[Epoch 8]
  Model 1 -> Train Loss: 1.5589, Val Loss: 1.5665, Val Acc: 0.4196
  Model 2 -> Train Loss: 1.5937, Val Loss: 1.5680, Val Acc: 0.4429
Checkpoints saved for both models at epoch 8
Best model updated: model2


Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.01batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.74batch/s]


[Epoch 9]
  Model 1 -> Train Loss: 1.5815, Val Loss: 1.5587, Val Acc: 0.4545
  Model 2 -> Train Loss: 1.5726, Val Loss: 1.5562, Val Acc: 0.4446
Checkpoints saved for both models at epoch 9
Best model updated: model1


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.81batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.33batch/s]


[Epoch 10]
  Model 1 -> Train Loss: 1.5527, Val Loss: 1.5518, Val Acc: 0.4464
  Model 2 -> Train Loss: 1.5576, Val Loss: 1.5614, Val Acc: 0.4188
Checkpoints saved for both models at epoch 10


Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.17batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.99batch/s]


[Epoch 11]
  Model 1 -> Train Loss: 1.5630, Val Loss: 1.5867, Val Acc: 0.4009
  Model 2 -> Train Loss: 1.5627, Val Loss: 1.5737, Val Acc: 0.4232
Checkpoints saved for both models at epoch 11


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.67batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.08batch/s]


[Epoch 12]
  Model 1 -> Train Loss: 1.5223, Val Loss: 1.5233, Val Acc: 0.4723
  Model 2 -> Train Loss: 1.5542, Val Loss: 1.5444, Val Acc: 0.4554
Checkpoints saved for both models at epoch 12
Best model updated: model1


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.51batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.81batch/s]


[Epoch 13]
  Model 1 -> Train Loss: 1.5338, Val Loss: 1.5444, Val Acc: 0.4268
  Model 2 -> Train Loss: 1.5454, Val Loss: 1.5343, Val Acc: 0.4402
Checkpoints saved for both models at epoch 13


Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.27batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.65batch/s]


[Epoch 14]
  Model 1 -> Train Loss: 1.5575, Val Loss: 1.5359, Val Acc: 0.4634
  Model 2 -> Train Loss: 1.5232, Val Loss: 1.5513, Val Acc: 0.4420
Checkpoints saved for both models at epoch 14


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.39batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.69batch/s]


[Epoch 15]
  Model 1 -> Train Loss: 1.5273, Val Loss: 1.5407, Val Acc: 0.4473
  Model 2 -> Train Loss: 1.5276, Val Loss: 1.5458, Val Acc: 0.4580
Checkpoints saved for both models at epoch 15


Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.17batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.80batch/s]


[Epoch 16]
  Model 1 -> Train Loss: 1.5475, Val Loss: 1.5456, Val Acc: 0.4214
  Model 2 -> Train Loss: 1.5233, Val Loss: 1.5830, Val Acc: 0.4188
Checkpoints saved for both models at epoch 16


Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.16batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.26batch/s]


[Epoch 17]
  Model 1 -> Train Loss: 1.5052, Val Loss: 1.5301, Val Acc: 0.4527
  Model 2 -> Train Loss: 1.5061, Val Loss: 1.5173, Val Acc: 0.4634
Checkpoints saved for both models at epoch 17


Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.44batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.59batch/s]


[Epoch 18]
  Model 1 -> Train Loss: 1.5280, Val Loss: 1.5152, Val Acc: 0.4821
  Model 2 -> Train Loss: 1.5306, Val Loss: 1.5153, Val Acc: 0.4777
Checkpoints saved for both models at epoch 18
Best model updated: model1


Iterating eval graphs: 100%|██████████| 35/35 [00:06<00:00,  5.23batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.77batch/s]


[Epoch 19]
  Model 1 -> Train Loss: 1.4968, Val Loss: 1.4972, Val Acc: 0.4821
  Model 2 -> Train Loss: 1.4993, Val Loss: 1.5403, Val Acc: 0.4946
Checkpoints saved for both models at epoch 19
Best model updated: model2


Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.43batch/s]
Iterating eval graphs: 100%|██████████| 35/35 [00:07<00:00,  4.79batch/s]


[Epoch 20]
  Model 1 -> Train Loss: 1.4914, Val Loss: 1.5129, Val Acc: 0.4830
  Model 2 -> Train Loss: 1.4863, Val Loss: 1.5129, Val Acc: 0.4938
Checkpoints saved for both models at epoch 20


In [36]:
import gc
del train_dataset
del train_loader
del full_dataset
del val_dataset
del val_loader
gc.collect()

23773

In [37]:
test_dataset = GraphDataset(args.test_path, transform=add_zeros)
test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)

In [38]:
#model1.load_state_dict(torch.load(checkpoint_path, map_location='cuda:0'))
predictions = evaluate(test_loader, model1, device, calculate_accuracy=False)
save_predictions(predictions, args.test_path)

Iterating eval graphs: 100%|██████████| 49/49 [00:10<00:00,  4.71batch/s]

Predictions saved to /home/onyxia/work/DL-Hackathon/hackaton/submission/testset_B.csv





In [39]:
import tarfile
import os

def gzip_folder(folder_path, output_file):
    """
    Compresses an entire folder into a single .tar.gz file.

    Args:
        folder_path (str): Path to the folder to compress.
        output_file (str): Path to the output .gz file.
    """
    with tarfile.open(output_file, "w:gz") as tar:
        tar.add(folder_path, arcname=os.path.basename(folder_path))
    print(f"Folder '{folder_path}' has been compressed into '{output_file}'")

# Example usage
folder_path = "/home/onyxia/work/DL-Hackathon/hackaton/submission"            # Path to the folder you want to compress
output_file = "/home/onyxia/work/DL-Hackathon/hackaton/submission.gz"        # Output .gz file name
gzip_folder(folder_path, output_file)

Folder '/home/onyxia/work/DL-Hackathon/hackaton/submission' has been compressed into '/home/onyxia/work/DL-Hackathon/hackaton/submission.gz'
