In [1]:
from functools import partial
from os.path import exists
import random
import os
import torch
import pickle
import numpy as np
import pandas as pd
import torch.nn as nn
from sklearn import metrics
import scipy.sparse as scpy
from torch.nn import Linear
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch_geometric.data as data
import torch_geometric.data.data as Batch
import torch.utils.data as data_utils
import torch_geometric.transforms as T
from torch.nn.init import xavier_uniform
from sklearn.metrics import roc_curve, auc
import torch_geometric.datasets as datasets
from torch_geometric.loader import DataLoader
from sklearn.preprocessing import MinMaxScaler
import torch_geometric.transforms as transforms
from sklearn.preprocessing import label_binarize
from torch_geometric.utils.convert import to_networkx
from torch_geometric.data import InMemoryDataset, Data
from torch.nn.utils import clip_grad_norm_, clip_grad_value_
from torch_geometric.nn import  GATv2Conv, GraphNorm,  SAGEConv, global_mean_pool, global_max_pool
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler


In [2]:
def split_sp(path,seed=1,split=0.9,parcela=0):
    np.random.seed(seed)
    random.seed(seed)
    parcelas = os.listdir(path)
    n_sample = round(len(parcelas)*split)
    train_ind = np.random.choice(np.linspace(1,len(parcelas),len(parcelas)),n_sample,replace=False).astype(int)
    test_ind = np.delete(np.linspace(1,len(parcelas),len(parcelas)),train_ind-1).astype(int)
    test_graphs = []
    train_graphs = []
    
    parcela = parcela 
    for itrain in train_ind:
        filename = f"{path}/parcela{itrain}"
        archivos = os.listdir(filename)
        ngrafos=int(len(archivos)/4)
        parcela+=1
        for i in range(ngrafos):
            i+=1
            edges = pd.read_csv(f"{filename}/el{i}.csv").iloc[:,1:]
            attributes = pd.read_csv(f"{filename}/z{i}.csv").iloc[:,1]
            label = pd.read_csv(f"{filename}/y{i}.csv").iloc[:,1]
            weights = pd.read_csv(f"{filename}/ea{i}.csv").iloc[:,1]
            weights = torch.tensor(weights.to_numpy(),dtype=torch.float)
            edge_idx = torch.tensor(edges.to_numpy().transpose(), dtype=torch.long)
            edge_idx -= 1
            attrs = torch.tensor(attributes.to_numpy(), dtype=torch.float)
            np_lab = label.to_numpy()
            y = torch.tensor(np_lab, dtype=torch.long)-1
            y = y[0]
            graph = Data(x=attrs, edge_index=edge_idx,  y=y, edge_attr = weights,parcela=parcela)
            train_graphs.append(graph)     
    for itest in test_ind:
        filename = f"{path}/parcela{itest}"
        archivos = os.listdir(filename)
        ngrafos=int(len(archivos)/4)
        parcela+=1
        for i in range(ngrafos):
            i+=1
            edges = pd.read_csv(f"{filename}/el{i}.csv").iloc[:,1:]
            attributes = pd.read_csv(f"{filename}/z{i}.csv").iloc[:,1]
            label = pd.read_csv(f"{filename}/y{i}.csv").iloc[:,1]
            weights = pd.read_csv(f"{filename}/ea{i}.csv").iloc[:,1]
            weights = torch.tensor(weights.to_numpy(),dtype=torch.float)
            edge_idx = torch.tensor(edges.to_numpy().transpose(), dtype=torch.long)
            edge_idx -= 1
            attrs = torch.tensor(attributes.to_numpy(), dtype=torch.float)
            np_lab = label.to_numpy()
            y = torch.tensor(np_lab, dtype=torch.long)-1
            y = y[0]
            graph = Data(x=attrs, edge_index=edge_idx,  y=y, edge_attr = weights,parcela=parcela)
            test_graphs.append(graph)
    return [test_graphs,train_graphs]

In [3]:
datos1 = split_sp(path='./datos/grafos/train/sp1',parcela=0)
parcela=(len(datos1[0])+len(datos1[1]))
datos2 = split_sp(path='./datos/grafos/train/sp2',parcela=parcela)
parcela=parcela+(len(datos2[0])+len(datos2[1]))
datos3 = split_sp(path='./datos/grafos/train/sp3',parcela=parcela)

KeyboardInterrupt: 

In [8]:
test = []
test.extend(datos1[0])
test.extend(datos2[0])
test.extend(datos3[0])

train = []
train.extend(datos1[1])
train.extend(datos2[1])
train.extend(datos3[1])

with open("./datos/train_graphs1_90.pkl",'wb') as f:
    pickle.dump(train,f)
with open("./datos/test_graphs1_90.pkl",'wb') as f:
    pickle.dump(test,f)

In [2]:
class GraphDataset(InMemoryDataset):
    def __init__(self, path):
        self.path = path
        self.data_list = []
        with open(path,'rb') as f:
            self.data_list.extend(pickle.load(f))

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        return self.data_list[idx]


In [3]:
train_dataset = GraphDataset("./datos/train_graphs1_90.pkl")
test_dataset = GraphDataset("./datos/test_graphs1_90.pkl")

In [4]:

# funcion para inicializar pesos de la red
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_uniform_(m.weight.data, nonlinearity='relu')
    elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight.data, 1)
        nn.init.constant_(m.bias.data, 0)
        
dp=0.2


class GAT(torch.nn.Module):
    def __init__(self, hid = 64, 
                 in_head = 16, 
                 out_features = 4,
                 s_fc1 = 2048,
                 s_fc2 = 1024):
        super(GAT, self).__init__()
        
        self.hid = hid
        self.in_head = in_head
        self.in_features = 1
        self.out_features = out_features
        self.s_fc1 = s_fc1
        self.s_fc2 = s_fc2
        
        self.conv1 =  GATv2Conv(self.in_features, self.out_features,edge_dim=1,heads=self.in_head,concat=True)
        self.conv2 =  SAGEConv(self.out_features*self.in_head, self.hid,normalize=False)
        self.norm1=GraphNorm(self.out_features*self.in_head)
        self.fc1 = nn.Linear(self.hid*2,self.s_fc1)
        self.fc2 = nn.Linear(self.s_fc1,self.s_fc2)
        self.fc3 = nn.Linear(self.s_fc2,3)
        self.conv1.apply(init_weights)
        self.conv2.apply(init_weights)
        self.fc1.apply(init_weights)
        self.fc2.apply(init_weights)
        self.fc3.apply(init_weights)


    def forward(self, data):
        x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch
        edge_attr = torch.unsqueeze(edge_attr,1)
        x = torch.unsqueeze(x,-1)
        
        x = self.conv1(x,edge_index,edge_attr)
        x = F.relu(x)
        x = self.norm1(x,batch)
        x = F.dropout(x, p=dp, training=self.training)
        
        x = self.conv2(x,edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=dp, training=self.training)
        
        x1 = global_max_pool(x,batch)
        x2 = global_mean_pool(x,batch)
        x = torch.cat((x1,x2),1)
        
        x = self.fc1(x)
        x = F.dropout(x, p=dp, training=self.training)
        x = F.relu(x)
        
        x = self.fc2(x)
        x = F.dropout(x, p=dp, training=self.training)
        x = F.relu(x)
        
        x = self.fc3(x)
        
        return x








In [37]:
config = {
    "hid": tune.choice([4, 8, 16,32, 64,88,128]),
    "in_head": tune.choice([2, 4, 8, 16,20, 24]),
    "out_features": tune.choice([2, 4, 6, 8,12]),
    "s_fc1": tune.choice([256, 512, 1024, 2048,3096]),
    "s_fc2": tune.choice([256, 512, 1024, 2048,3096]),
    "lr": tune.loguniform(1e-5, 1e-1),
    "wd": tune.loguniform(5e-4, 1e-6)
}





device = "cpu"
if torch.cuda.is_available():
    device = "cuda:0"
    if torch.cuda.device_count() > 1:
        net = nn.DataParallel(net)
print(device)

cuda:0


In [40]:
def train_graphs(config):

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
            
    model = GAT(config['hid'],
            config['in_head'],
            config['out_features'],
            config['s_fc1'],
            config['s_fc2'])
    
    model.to(device)
    print(model.train())


    optimizer = torch.optim.AdamW(model.parameters(), lr=config['lr'], weight_decay=config['wd'])
    
    criterion = torch.nn.CrossEntropyLoss(torch.tensor([1.1,1.05,1.0]).to(device))


    trainset = GraphDataset("/home/martin/Master/TFM/grafitos/datos/train_graphs1_90.pkl")
    testset = GraphDataset("/home/martin/Master/TFM/grafitos/datos/test_graphs1_90.pkl")

    trainloader = DataLoader(
        trainset,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        drop_last=True)
    
    valloader = DataLoader(
        testset,
        batch_size=int(200),
        shuffle=True,
        drop_last=True)
    
    model.train()
    for epoch in range(3):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        lss=0
        count=0
        for i, data in enumerate(trainloader):

            batch=data.to(device)
            out = model(batch)
            loss = criterion(out, batch.y)
            running_loss+=loss.item()
            loss.backward()
            if(i!=0 and i%5==0):
                torch.nn.utils.clip_grad_norm_(model.parameters(),2)
                optimizer.step()
                optimizer.zero_grad()
                
            count+=1
            

        running_loss/=count
        epoch_steps += 1
    

        print('[Epoch %4d/%4d] Loss: % 2.2e' % (epoch + 1, 50, running_loss))

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader):
            with torch.no_grad():
                model.eval()
                data = data.to(device)

                outputs = model(data)
                _, predicted = torch.max(outputs.data, 1)
                total += data.y.size(0)
                correct += (predicted == data.y).sum().item()

                loss = criterion(outputs, data.y)
                val_loss += loss.cpu().numpy()
                val_steps += 1
        print('[Epoch %4d/%4d] Test Loss: % 2.2e' % (epoch + 1, 50, val_loss / val_steps))
        print('[Epoch %4d/%4d] Test Accuracy: % 2.2e' % (epoch + 1, 50, correct / total))

        tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
    print("Finished Training")

In [32]:
config = {
    "hid": 64,
    "in_head":16,
    "out_features" : 4,
    "s_fc1":  2048,
    "s_fc2": 1024,
    "lr":0.0001,
    "wd":5e-5,
    'batch_size': 80
}
a=train_graphs(config=config)

GAT(
  (conv1): GATv2Conv(1, 4, heads=16)
  (conv2): SAGEConv(64, 64)
  (norm1): GraphNorm(64)
  (fc1): Linear(in_features=128, out_features=2048, bias=True)
  (fc2): Linear(in_features=2048, out_features=1024, bias=True)
  (fc3): Linear(in_features=1024, out_features=3, bias=True)
)
[Epoch    1/  50] Loss:  1.03e+00


    return _run_code(code, main_globals, None,
  File "/home/martin/miniconda3/envs/local1/lib/python3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/martin/miniconda3/envs/local1/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/martin/miniconda3/envs/local1/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
    app.start()
  File "/home/martin/miniconda3/envs/local1/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 677, in start
    self.io_loop.start()
  File "/home/martin/miniconda3/envs/local1/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "/home/martin/miniconda3/envs/local1/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
    self._run_once()
  File "/home/martin/miniconda3/envs/local1/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
  

[Epoch    1/  50] Test Loss:  9.41e-01
[Epoch    1/  50] Test Accuracy:  4.97e-01
[Epoch    2/  50] Loss:  9.14e-01
[Epoch    2/  50] Test Loss:  8.78e-01
[Epoch    2/  50] Test Accuracy:  5.70e-01
[Epoch    3/  50] Loss:  8.74e-01
[Epoch    3/  50] Test Loss:  8.55e-01
[Epoch    3/  50] Test Accuracy:  5.86e-01
[Epoch    4/  50] Loss:  8.53e-01
[Epoch    4/  50] Test Loss:  8.40e-01
[Epoch    4/  50] Test Accuracy:  5.94e-01
[Epoch    5/  50] Loss:  8.39e-01
[Epoch    5/  50] Test Loss:  8.53e-01
[Epoch    5/  50] Test Accuracy:  5.85e-01
[Epoch    6/  50] Loss:  8.29e-01
[Epoch    6/  50] Test Loss:  8.09e-01
[Epoch    6/  50] Test Accuracy:  6.19e-01
[Epoch    7/  50] Loss:  8.24e-01
[Epoch    7/  50] Test Loss:  8.05e-01
[Epoch    7/  50] Test Accuracy:  6.20e-01
[Epoch    8/  50] Loss:  8.14e-01
[Epoch    8/  50] Test Loss:  8.09e-01
[Epoch    8/  50] Test Accuracy:  6.16e-01
[Epoch    9/  50] Loss:  8.09e-01
[Epoch    9/  50] Test Loss:  8.08e-01
[Epoch    9/  50] Test Accuracy: 

In [7]:
def test_accuracy(model, device="cpu"):


    testloader = DataLoader(
        test_dataset, batch_size=4, shuffle=False, drop_last=True)

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            data.to(device)
            
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == data.y).sum().item()

    return correct / total

In [44]:

config = {
    "hid": tune.choice([4, 8, 16,32, 64,88,128]),
    "in_head": tune.choice([2, 4, 8, 16,20, 24]),
    "out_features": tune.choice([2, 4, 6, 8,12,16]),
    "s_fc1": tune.choice([256, 512, 1024, 2048,3096]),
    "s_fc2": tune.choice([256, 512, 1024, 2048,3096]),
    "lr": tune.loguniform(1e-5, 1e-1),
    "wd": tune.loguniform(5e-4, 1e-6),
    "batch_size": tune.uniform(20,1000)
}

gpus_per_trial = 0
result = tune.run(train_graphs,
    resources_per_trial={"cpu":4, "gpu": gpus_per_trial},
    config=config,
    num_samples=1,
    scheduler=ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=2,
        grace_period=1,
        reduction_factor=2),
    progress_reporter=CLIReporter(
        parameter_columns=["hid","in_head","out_features","s_fc1","s_fc2","lr","wd"],
        metric_columns=["loss", "accuracy", "training_iteration"]))

2022-05-11 18:05:23,939	INFO trial_runner.py:803 -- starting train_graphs_2a3b0_00000


== Status ==
Current time: 2022-05-11 18:05:24 (running for 00:00:00.19)
Memory usage on this node: 10.6/15.5 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 2.000: None | Iter 1.000: None
Resources requested: 4.0/12 CPUs, 0/1 GPUs, 0.0/7.55 GiB heap, 0.0/3.77 GiB objects (0.0/1.0 accelerator_type:GTX)
Result logdir: /home/martin/ray_results/train_graphs_2022-05-11_18-05-23
Number of trials: 1/1 (1 RUNNING)
+--------------------------+----------+--------------------+-------+-----------+----------------+---------+---------+------------+-------------+
| Trial name               | status   | loc                |   hid |   in_head |   out_features |   s_fc1 |   s_fc2 |         lr |          wd |
|--------------------------+----------+--------------------+-------+-----------+----------------+---------+---------+------------+-------------|
| train_graphs_2a3b0_00000 | RUNNING  | 172.23.73.92:10961 |     8 |         8 |              8 |     512 |     256 | 3.1047e-05 | 0.000104569 |
+--

== Status ==
Current time: 2022-05-11 18:05:57 (running for 00:00:33.27)
Memory usage on this node: 11.5/15.5 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 2.000: None | Iter 1.000: -1.0721577175201908
Resources requested: 4.0/12 CPUs, 0/1 GPUs, 0.0/7.55 GiB heap, 0.0/3.77 GiB objects (0.0/1.0 accelerator_type:GTX)
Result logdir: /home/martin/ray_results/train_graphs_2022-05-11_18-05-23
Number of trials: 1/1 (1 RUNNING)
+--------------------------+----------+--------------------+-------+-----------+----------------+---------+---------+------------+-------------+---------+------------+----------------------+
| Trial name               | status   | loc                |   hid |   in_head |   out_features |   s_fc1 |   s_fc2 |         lr |          wd |    loss |   accuracy |   training_iteration |
|--------------------------+----------+--------------------+-------+-----------+----------------+---------+---------+------------+-------------+---------+------------+-------------------

2022-05-11 18:06:03,486	INFO tune.py:701 -- Total run time: 39.69 seconds (39.55 seconds for the tuning loop).


Result for train_graphs_2a3b0_00000:
  accuracy: 0.40774193548387094
  date: 2022-05-11_18-06-03
  done: true
  experiment_id: bfcafcaac0e749cf901b7a52757269f4
  hostname: martin-Prestige-14-A10SC
  iterations_since_restore: 2
  loss: 1.0607271655913322
  node_ip: 172.23.73.92
  pid: 10961
  time_since_restore: 37.30875873565674
  time_this_iter_s: 11.267946720123291
  time_total_s: 37.30875873565674
  timestamp: 1652285163
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 2a3b0_00000
  warmup_time: 0.0022125244140625
  
== Status ==
Current time: 2022-05-11 18:06:03 (running for 00:00:39.57)
Memory usage on this node: 11.5/15.5 GiB
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 2.000: -1.0607271655913322 | Iter 1.000: -1.0721577175201908
Resources requested: 0/12 CPUs, 0/1 GPUs, 0.0/7.55 GiB heap, 0.0/3.77 GiB objects (0.0/1.0 accelerator_type:GTX)
Result logdir: /home/martin/ray_results/train_graphs_2022-05-11_18-05-23
Number of trials: 1/1 (1 TERMINATED)
+---------

In [39]:
print(result)

NameError: name 'result' is not defined

In [None]:
def main(num_samples=10, max_num_epochs=50, gpus_per_trial=1):
    #data_dir = os.path.abspath("./data")
    #load_data(data_dir)
    config = {
        "hid": tune.choice([4, 8, 16,32, 64,88,128]),
        "in_head": tune.choice([2, 4, 8, 16,20, 24]),
        "in_features": 1,
        "out_features": tune.choice([2, 4, 6, 8,12,16,32]),
        "s_fc1": tune.choice([256, 512, 1024, 2048,3096]),
        "s_fc2": tune.choice([256, 512, 1024, 2048,3096]),
        "lr": tune.loguniform(1e-5, 1e-1),
        "wd": tune.loguniform(5e-4, 1e-6)
    }
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    reporter = CLIReporter(
        parameter_columns=["hid","in_head","in_features","out_features","s_fc1","s_fc2","lr","wd"],
        metric_columns=["loss", "accuracy", "training_iteration"])
    result = tune.run(
        partial(train_graphs),
        resources_per_trial={"cpu": 4, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

    best_trained_model = model(best_trial.config["hid"],
                               best_trial.config["in_head"],
                               best_trial.config["in_features"],
                               best_trial.config["out_features"],
                               best_trial.config["s_fc1"],
                               best_trial.config["s_fc2"],
                               best_trial.config["lr"],
                               best_trial.config["wd"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    test_acc = test_accuracy(best_trained_model, device)
    print("Best trial test set accuracy: {}".format(test_acc))


if __name__ == "__main__":
    # You can change the number of GPUs per trial here:
    main(num_samples=10, max_num_epochs=2, gpus_per_trial=0)