## GIN Grid Search Combination

### MUTAG

In [None]:
### MUTAG with best val acc and bset val loss
import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN/MUTAG/"

layer_grid = [2, 3, 5]

bs_grid = [32, 64]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []

for fold in range(1, 11):
    best_val_acc = 0
    best_test_acc = 0
    best_val_loss = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir +'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold, layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        if df.empty:
                            continue
                        else:
                            val_acc = df.iloc[:, -1].max()
                            id = df.iloc[:, -1].idxmax()
                            val_loss = df.iloc[id, -4]
                            # val_acc = df.iloc[:, -1].max()
                            if val_acc > best_val_acc:
                                best_val_acc = val_acc
                                best_val_loss = val_loss
                                best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                                best_test_acc = df.iloc[-2, 1]
                            elif val_acc == best_val_acc and val_loss < best_val_loss:
                                best_test_acc = df.iloc[-2, 1]
                                best_val_loss = val_loss
                                best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
    test_acc_list.append(best_test_acc)
    print('Best combination for fold {}: {}, bet_val_acc:{}, best test acc:{}'.format(fold, best_combination, best_val_acc,best_test_acc))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Test accuracy mean: {test_acc_mean}")
print(f"Test accuracy standard deviation: {test_acc_std}")
                                    

In [None]:
### MUTAG with best val acc and average test acc
import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN/MUTAG/"

layer_grid = [2, 3, 5]

bs_grid = [32, 64]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []
for fold in range(1, 11):
    best_val_acc = 0
    best_test_acc = 0
    test_acc_sum = 0
    test_acc_count = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir + 'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold,layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        val_acc = df.iloc[:, -1].max()
                        if val_acc > best_val_acc:
                            best_val_acc = val_acc
                            best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                            best_test_acc = df.iloc[-2, 1]
                            test_acc_sum = best_test_acc
                            test_acc_count = 1
                        elif val_acc == best_val_acc:
                            test_acc = df.iloc[-2, 1]
                            best_test_acc = max(best_test_acc, test_acc)
                            test_acc_sum += test_acc
                            test_acc_count += 1

    test_acc_list.append(test_acc_sum / test_acc_count)
    # print('Best combination for fold {}: {}, best_val_acc:{}, best test acc:{}'.format(fold, best_combination, best_val_acc,best_test_acc))
    print('average test acc for fold {}: {}'.format(fold, test_acc_sum / test_acc_count))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Average test accuracy mean: {test_acc_mean}")
print(f"Average test accuracy standard deviation: {test_acc_std}")
                                    

In [1]:
### GIN with 200 patience early stopping
import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN_200/MUTAG/"

layer_grid = [2, 3, 5]

bs_grid = [32, 64]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []
fold = 1
for fold in range(1, 11):
    best_val_loss = float('inf')
    best_test_acc = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir + 'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold, layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        val_loss = df.iloc[:, -1].min()
                        # val_acc = df.iloc[:, -1].max()
                        if val_loss < best_val_loss:
                            best_val_loss = val_loss
                            best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                            best_test_acc = df.iloc[-2, 1]
#                         elif val_acc == best_val_acc:
#                             test_acc = df.iloc[-2, 1]
#                             if test_acc > best_test_acc:
#                                 best_test_acc = test_acc
#                                 best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
    test_acc_list.append(best_test_acc)
    print('Best combination for fold {}: {}, bet_val_loss:{}, best test acc:{}'.format(fold, best_combination, best_val_loss,best_test_acc))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Test accuracy mean: {test_acc_mean}")
print(f"Test accuracy standard deviation: {test_acc_std}")
                                    

Best combination for fold 1: {'layer': 5, 'drop': 0.5, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_loss:0.0698954612016677, best test acc:0.631578947368421
Best combination for fold 2: {'layer': 5, 'drop': 0.5, 'lr': 0.01, 'bs': 64, 'dim_hidden': 32}, bet_val_loss:0.147932082414627, best test acc:0.8421052631578947
Best combination for fold 3: {'layer': 3, 'drop': 0.0, 'lr': 0.01, 'bs': 64, 'dim_hidden': 32}, bet_val_loss:0.1160806119441986, best test acc:0.8947368421052632
Best combination for fold 4: {'layer': 5, 'drop': 0.5, 'lr': 0.01, 'bs': 64, 'dim_hidden': 64}, bet_val_loss:0.0324104949831962, best test acc:0.7368421052631579
Best combination for fold 5: {'layer': 3, 'drop': 0.5, 'lr': 0.01, 'bs': 64, 'dim_hidden': 32}, bet_val_loss:0.03721634298563, best test acc:0.7368421052631579
Best combination for fold 6: {'layer': 5, 'drop': 0.5, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_loss:0.0010020037880167, best test acc:0.8421052631578947
Best combination for fold 7: {'l

### PTC

In [None]:
### PTC with best val acc and average test acc
import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN/PTC_MR/"

layer_grid = [2, 3, 5]

bs_grid = [32, 128]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []
for fold in range(1, 11):
    best_val_acc = 0
    best_test_acc = 0
    test_acc_sum = 0
    test_acc_count = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir + 'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold,layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        val_acc = df.iloc[:, -1].max()
                        if val_acc > best_val_acc:
                            best_val_acc = val_acc
                            best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                            best_test_acc = df.iloc[-2, 1]
                            test_acc_sum = best_test_acc
                            test_acc_count = 1
                        elif val_acc == best_val_acc:
                            test_acc = df.iloc[-2, 1]
                            best_test_acc = max(best_test_acc, test_acc)
                            test_acc_sum += test_acc
                            test_acc_count += 1

    test_acc_list.append(test_acc_sum / test_acc_count)
    # print('Best combination for fold {}: {}, best_val_acc:{}, best test acc:{}'.format(fold, best_combination, best_val_acc,best_test_acc))
    print('average test acc for fold {}: {}'.format(fold, test_acc_sum / test_acc_count))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Average test accuracy mean: {test_acc_mean}")
print(f"Average test accuracy standard deviation: {test_acc_std}")
                                    

In [None]:
### PTC with best val acc and bset val loss
import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN/PTC_MR/"

layer_grid = [2, 3, 5]

bs_grid = [32, 128]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []

for fold in range(1, 11):
    best_val_acc = 0
    best_test_acc = 0
    best_val_loss = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir +'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold, layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        if df.empty:
                            continue
                        else:
                            val_acc = df.iloc[:, -1].max()
                            id = df.iloc[:, -1].idxmax()
                            val_loss = df.iloc[id, -4]
                            # val_acc = df.iloc[:, -1].max()
                            if val_acc > best_val_acc:
                                best_val_acc = val_acc
                                best_val_loss = val_loss
                                best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                                best_test_acc = df.iloc[-2, 1]
                            elif val_acc == best_val_acc and val_loss < best_val_loss:
                                best_test_acc = df.iloc[-2, 1]
                                best_val_loss = val_loss
                                best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
    test_acc_list.append(best_test_acc)
    print('Best combination for fold {}: {}, bet_val_acc:{}, best test acc:{}'.format(fold, best_combination, best_val_acc,best_test_acc))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Test accuracy mean: {test_acc_mean}")
print(f"Test accuracy standard deviation: {test_acc_std}")
                                    

In [7]:
### PTC with 200 patience early stopping
import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN_200/PTC_MR/"

layer_grid = [2, 3, 5]

bs_grid = [32, 128]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []
fold = 1
for fold in range(1, 11):
    best_val_loss = float('inf')
    best_test_acc = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir + 'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold, layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        val_loss = df.iloc[:, -1].min()
                        # val_acc = df.iloc[:, -1].max()
                        if val_loss < best_val_loss:
                            best_val_loss = val_loss
                            best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                            best_test_acc = df.iloc[-2, 1]
#                         elif val_acc == best_val_acc:
#                             test_acc = df.iloc[-2, 1]
#                             if test_acc > best_test_acc:
#                                 best_test_acc = test_acc
#                                 best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
    test_acc_list.append(best_test_acc)
    print('Best combination for fold {}: {}, bet_val_loss:{}, best test acc:{}'.format(fold, best_combination, best_val_loss,best_test_acc))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Test accuracy mean: {test_acc_mean}")
print(f"Test accuracy standard deviation: {test_acc_std}")
                                    

Best combination for fold 1: {'layer': 5, 'drop': 0.5, 'lr': 0.01, 'bs': 32, 'dim_hidden': 64}, bet_val_loss:0.5767342448234558, best test acc:0.5428571428571428
Best combination for fold 2: {'layer': 5, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 64}, bet_val_loss:0.5576958060264587, best test acc:0.6285714285714286
Best combination for fold 3: {'layer': 5, 'drop': 0.5, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_loss:0.6192448139190674, best test acc:0.6857142857142857
Best combination for fold 4: {'layer': 3, 'drop': 0.5, 'lr': 0.01, 'bs': 64, 'dim_hidden': 32}, bet_val_loss:0.5750819444656372, best test acc:0.6
Best combination for fold 5: {'layer': 5, 'drop': 0.5, 'lr': 0.01, 'bs': 32, 'dim_hidden': 64}, bet_val_loss:0.5505779385566711, best test acc:0.5294117647058824
Best combination for fold 6: {'layer': 5, 'drop': 0.0, 'lr': 0.01, 'bs': 64, 'dim_hidden': 32}, bet_val_loss:0.5603137016296387, best test acc:0.5588235294117647
Best combination for fold 7: {'layer': 5, '

### PROTEINS

In [None]:
### PROTEINS with best val acc and bset val loss
import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN/PROTEINSs/"

layer_grid = [2, 3, 5]

bs_grid = [32, 128]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []

for fold in range(1, 11):
    best_val_acc = 0
    best_test_acc = 0
    best_val_loss = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir +'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold, layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        if df.empty:
                            continue
                        else:
                            val_acc = df.iloc[:, -1].max()
                            id = df.iloc[:, -1].idxmax()
                            val_loss = df.iloc[id, -4]
                            # val_acc = df.iloc[:, -1].max()
                            if val_acc > best_val_acc:
                                best_val_acc = val_acc
                                best_val_loss = val_loss
                                best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                                best_test_acc = df.iloc[-2, 1]
                            elif val_acc == best_val_acc and val_loss < best_val_loss:
                                best_test_acc = df.iloc[-2, 1]
                                best_val_loss = val_loss
                                best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
    test_acc_list.append(best_test_acc)
    print('Best combination for fold {}: {}, bet_val_acc:{}, best test acc:{}'.format(fold, best_combination, best_val_acc,best_test_acc))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Test accuracy mean: {test_acc_mean}")
print(f"Test accuracy standard deviation: {test_acc_std}")
                                    

In [None]:
### PROTEINS with best val acc and average test acc
import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN/PROTEINS/"

layer_grid = [2, 3, 5]

bs_grid = [32, 128]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []
for fold in range(1, 11):
    best_val_acc = 0
    best_test_acc = 0
    test_acc_sum = 0
    test_acc_count = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir + 'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold,layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        val_acc = df.iloc[:, -1].max()
                        if val_acc > best_val_acc:
                            best_val_acc = val_acc
                            best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                            best_test_acc = df.iloc[-2, 1]
                            test_acc_sum = best_test_acc
                            test_acc_count = 1
                        elif val_acc == best_val_acc:
                            test_acc = df.iloc[-2, 1]
                            best_test_acc = max(best_test_acc, test_acc)
                            test_acc_sum += test_acc
                            test_acc_count += 1

    test_acc_list.append(test_acc_sum / test_acc_count)
    # print('Best combination for fold {}: {}, best_val_acc:{}, best test acc:{}'.format(fold, best_combination, best_val_acc,best_test_acc))
    print('average test acc for fold {}: {}'.format(fold, test_acc_sum / test_acc_count))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Average test accuracy mean: {test_acc_mean}")
print(f"Average test accuracy standard deviation: {test_acc_std}")
                                    

In [5]:
### PROTEINS with 200 early stopping
import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN_200/PROTEINS/"

layer_grid = [2, 3, 5]

bs_grid = [32, 64]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []
fold = 1
for fold in range(1, 11):
    best_val_loss = float('inf')
    best_test_acc = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir + 'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold, layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        val_loss = df.iloc[:, -1].min()
                        # val_acc = df.iloc[:, -1].max()
                        if val_loss < best_val_loss:
                            best_val_loss = val_loss
                            best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                            best_test_acc = df.iloc[-2, 1]
#                         elif val_acc == best_val_acc:
#                             test_acc = df.iloc[-2, 1]
#                             if test_acc > best_test_acc:
#                                 best_test_acc = test_acc
#                                 best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
    test_acc_list.append(best_test_acc)
    print('Best combination for fold {}: {}, bet_val_loss:{}, best test acc:{}'.format(fold, best_combination, best_val_loss,best_test_acc))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Test accuracy mean: {test_acc_mean}")
print(f"Test accuracy standard deviation: {test_acc_std}")
                                    

Best combination for fold 1: {'layer': 2, 'drop': 0.5, 'lr': 0.01, 'bs': 64, 'dim_hidden': 64}, bet_val_loss:0.4658931994438171, best test acc:0.7232142857142857
Best combination for fold 2: {'layer': 5, 'drop': 0.5, 'lr': 0.01, 'bs': 64, 'dim_hidden': 64}, bet_val_loss:0.5064217162132263, best test acc:0.7232142857142857
Best combination for fold 3: {'layer': 5, 'drop': 0.0, 'lr': 0.01, 'bs': 64, 'dim_hidden': 32}, bet_val_loss:0.3813396239280701, best test acc:0.7142857142857143
Best combination for fold 4: {'layer': 5, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_loss:0.4970497012138367, best test acc:0.7567567567567568
Best combination for fold 5: {'layer': 3, 'drop': 0.5, 'lr': 0.01, 'bs': 64, 'dim_hidden': 32}, bet_val_loss:0.4796243023872375, best test acc:0.7567567567567568
Best combination for fold 6: {'layer': 5, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_loss:0.4898679375648498, best test acc:0.8018018018018018
Best combination for fold 7:

### NCI1

In [4]:
### NCI with best val acc and bset val loss
import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN/NCI1/"

layer_grid = [2, 3, 5]

bs_grid = [32, 128]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []

for fold in range(1, 11):
    best_val_acc = 0
    best_test_acc = 0
    best_val_loss = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir +'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold, layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        if df.empty:
                            continue
                        else:
                            val_acc = df.iloc[:, -1].max()
                            id = df.iloc[:, -1].idxmax()
                            val_loss = df.iloc[id, -4]
                            # val_acc = df.iloc[:, -1].max()
                            if val_acc > best_val_acc:
                                best_val_acc = val_acc
                                best_val_loss = val_loss
                                best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                                best_test_acc = df.iloc[-2, 1]
                            elif val_acc == best_val_acc and val_loss < best_val_loss:
                                best_test_acc = df.iloc[-2, 1]
                                best_val_loss = val_loss
                                best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
    test_acc_list.append(best_test_acc)
    print('Best combination for fold {}: {}, bet_val_acc:{}, best test acc:{}'.format(fold, best_combination, best_val_acc,best_test_acc))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Test accuracy mean: {test_acc_mean}")
print(f"Test accuracy standard deviation: {test_acc_std}")
                                    

Best combination for fold 1: {'layer': 3, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_acc:0.7594594594594595, best test acc:0.7956204379562044
Best combination for fold 2: {'layer': 5, 'drop': 0.5, 'lr': 0.01, 'bs': 128, 'dim_hidden': 32}, bet_val_acc:0.8517520215633423, best test acc:0.8102189781021898
Best combination for fold 3: {'layer': 5, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_acc:0.8436657681940701, best test acc:0.7834549878345499
Best combination for fold 4: {'layer': 3, 'drop': 0.0, 'lr': 0.01, 'bs': 128, 'dim_hidden': 32}, bet_val_acc:0.8355795148247979, best test acc:0.7980535279805353
Best combination for fold 5: {'layer': 3, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_acc:0.8189189189189189, best test acc:0.8272506082725061
Best combination for fold 6: {'layer': 5, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_acc:0.8243243243243243, best test acc:0.754257907542579
Best combination for fold 7: {'la

In [4]:
### NCI1 with best val acc and average test acc
import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN/NCI1/"

layer_grid = [2, 3, 5]

bs_grid = [32, 128]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []
for fold in range(1, 11):
    best_val_acc = 0
    best_test_acc = 0
    test_acc_sum = 0
    test_acc_count = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir + 'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold,layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        val_acc = df.iloc[:, -1].max()
                        if val_acc > best_val_acc:
                            best_val_acc = val_acc
                            best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                            best_test_acc = df.iloc[-2, 1]
                            test_acc_sum = best_test_acc
                            test_acc_count = 1
                        elif val_acc == best_val_acc:
                            test_acc = df.iloc[-2, 1]
                            best_test_acc = max(best_test_acc, test_acc)
                            test_acc_sum += test_acc
                            test_acc_count += 1

    test_acc_list.append(test_acc_sum / test_acc_count)
    # print('Best combination for fold {}: {}, best_val_acc:{}, best test acc:{}'.format(fold, best_combination, best_val_acc,best_test_acc))
    print('average test acc for fold {}: {}'.format(fold, test_acc_sum / test_acc_count))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Average test accuracy mean: {test_acc_mean}")
print(f"Average test accuracy standard deviation: {test_acc_std}")
                                    

average test acc for fold 1: 0.7956204379562044
average test acc for fold 2: 0.8102189781021898
average test acc for fold 3: 0.7834549878345499
average test acc for fold 4: 0.7919708029197081
average test acc for fold 5: 0.8248175182481752
average test acc for fold 6: 0.754257907542579
average test acc for fold 7: 0.7956204379562044
average test acc for fold 8: 0.7712895377128953
average test acc for fold 9: 0.8175182481751825
average test acc for fold 10: 0.8223844282238443
Average test accuracy mean: 0.7967153284671532
Average test accuracy standard deviation: 0.006865561833180736


In [5]:
### NCI1 GIN with 200 early stopping

import os
import pandas as pd
import numpy as np

# Define the path to the parent directory containing all the subdirectories
parent_dir = "GIN_200/NCI1/"

layer_grid = [2, 3, 5]

bs_grid = [32, 128]
drop_grid = [0.0, 0.5]
lr_grid = [0.01]
dim_hidden_grid = [32, 64]

test_acc_list = []
fold = 1
for fold in range(1, 11):
    best_val_loss = float('inf')
    best_test_acc = 0
    for layer in layer_grid:
        for drop in drop_grid:
            for lr in lr_grid:
                for bs in bs_grid:
                    for dim_hidden in dim_hidden_grid:
                        path = parent_dir + 'fold_{}/{}_{}_{}_{}_{}/results.csv'.format(fold, layer,drop, lr, bs, dim_hidden)
                        df = pd.read_csv(path)
                        val_loss = df.iloc[:, -1].min()
                        # val_acc = df.iloc[:, -1].max()
                        if val_loss < best_val_loss:
                            best_val_loss = val_loss
                            best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
                            best_test_acc = df.iloc[-2, 1]
#                         elif val_acc == best_val_acc:
#                             test_acc = df.iloc[-2, 1]
#                             if test_acc > best_test_acc:
#                                 best_test_acc = test_acc
#                                 best_combination = {'layer': layer, 'drop': drop, 'lr': lr, 'bs': bs, 'dim_hidden': dim_hidden}
    test_acc_list.append(best_test_acc)
    print('Best combination for fold {}: {}, bet_val_loss:{}, best test acc:{}'.format(fold, best_combination, best_val_loss,best_test_acc))

test_acc_mean = np.mean(test_acc_list)
test_acc_std = np.std(test_acc_list)/np.sqrt(10)

print(f"Test accuracy mean: {test_acc_mean}")
print(f"Test accuracy standard deviation: {test_acc_std}")
                                    

Best combination for fold 1: {'layer': 3, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_loss:0.4341772904564048, best test acc:0.7956204379562044
Best combination for fold 2: {'layer': 3, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_loss:0.4769335809115795, best test acc:0.8102189781021898
Best combination for fold 3: {'layer': 5, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_loss:0.4505386722443226, best test acc:0.8004866180048662
Best combination for fold 4: {'layer': 3, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_loss:0.4757267511310939, best test acc:0.7980535279805353
Best combination for fold 5: {'layer': 5, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_loss:0.4953068932543602, best test acc:0.7737226277372263
Best combination for fold 6: {'layer': 3, 'drop': 0.0, 'lr': 0.01, 'bs': 32, 'dim_hidden': 32}, bet_val_loss:0.558102876513308, best test acc:0.8004866180048662
Best combination for fold 7: 