In [7]:
import torch

import pandas             as pd
import numpy              as np
import pylab              as pl

from sklearn.metrics import f1_score

from torch_geometric.data import Data
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [8]:
params = {'legend.fontsize': '20',
          'figure.figsize': (10, 10),
         'axes.labelsize': '20',
         'axes.titlesize':'20',
         'xtick.labelsize':'20',
         'ytick.labelsize':'20'}
pl.rcParams.update(params)

In [9]:
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(data.num_node_features, 64)
        self.conv2 = GCNConv(64, data.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

In [10]:
classes = ['Peak','Filament','Sheet','Void']

df = pd.read_pickle('./data/TCW_topological_dataset.pkl')
idx = np.arange(len(df))
df.keys(), len(idx)

(Index(['ID', 'X', 'Y', 'Z', 'MASS', 'ENVIRONMENT', 'ID_DELAUNAY_CONNECTIONS',
        'N_DELAUNAY_CONNECTIONS', 'DIS_DELAUNAY_CONNECTIONS',
        'AVDIS_DELAUNAY_CONNECTIONS', 'ID_DELAUNAY_FIRSTNEIGH_CONNECTIONS',
        'N_DELAUNAY_FIRSTNEIGH_CONNECTIONS', 'ID_BSK_CONNECTIONS',
        'N_BSK_CONNECTIONS', 'DIS_BSK_CONNECTIONS', 'AVDIS_BSK_CONNECTIONS',
        'ID_BSK_FIRSTNEIGH_CONNECTIONS', 'N_BSK_FIRSTNEIGH_CONNECTIONS'],
       dtype='object'),
 17963)

## Delaunay Features  || Delaunay Edges -> TWebEnv

In [11]:
predictors = np.array(df[['N_DELAUNAY_CONNECTIONS','AVDIS_DELAUNAY_CONNECTIONS']], dtype=float)
x = torch.tensor(predictors, dtype=torch.float)  #features
A = np.concatenate([ np.zeros(len(c))+i for i, c in  enumerate(df['ID_DELAUNAY_CONNECTIONS']) ]).ravel()
B = np.concatenate([ c for i, c in  enumerate(df['ID_DELAUNAY_CONNECTIONS']) ]).ravel()
edge_index = torch.tensor([A,B], dtype=torch.long) # Conecctions
target = np.array([ c for c in  df['ENVIRONMENT'] ])
y = torch.tensor(target, dtype=torch.long)  #target
data = Data(x=x, edge_index=edge_index, y=y, num_classes= len(np.unique(y)))
ii_train = idx <=len(idx)*0.7
train_mask = ii_train
test_mask  = ~ii_train
data.train_mask = torch.tensor(train_mask, dtype=torch.bool)
data.test_mask = torch.tensor(test_mask, dtype=torch.bool)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# device = 'cpu'
model = GCN().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
lloss = []
lf1   = []

model.train()
for epoch in range(500):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    lloss.append(loss.cpu().data)
    f1 = f1_score(data.y[data.train_mask].cpu().detach().numpy(), out[data.train_mask].cpu().argmax(dim=1).detach().numpy(), average='weighted')
    lf1.append(f1)
    loss.backward()
    optimizer.step()
    
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')    

f1 = f1_score(data.y[data.test_mask].cpu().data, pred[data.test_mask].cpu(), average='weighted')
print(f'f1_score: {f1:.4f}')

fig = pl.figure(figsize=(14,7))
pl.subplot(1,2,1)
_ = pl.plot(lloss)
pl.xlabel('epoch')
pl.ylabel('loss')
pl.subplot(1,2,2)
_ = pl.plot(lf1)
pl.xlabel('epoch')
pl.ylabel('f1_score')

classes = ['Peak','Filament','Sheet','Void']
#------------ Confusion Matrix
cm = confusion_matrix(np.array(data.y[data.test_mask].cpu().data), np.array(pred[data.test_mask].cpu().data))
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

fig= pl.figure(figsize=(6,6))
ax = fig.add_subplot(1,1,1)
im = ax.imshow(cm, interpolation='nearest', cmap=pl.cm.Blues)
ax.figure.colorbar(im, ax=ax, pad=0.01, shrink=0.79)
ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), xticklabels=classes, yticklabels=classes)
ax.set_xlabel("Environment Predicted",size=20)
ax.set_ylabel("Environment True",size=20)
# ax.set_ylim(4-0.5, -0.5)

pl.setp(ax.get_xticklabels(), rotation=15, size=12)
pl.setp(ax.get_yticklabels(), rotation=45, size=12)

fmt = '.2f'
thresh = cm.max()/2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, format(cm[i, j], fmt),ha="center", va="center",size=20 , color="white" if cm[i, j] > thresh else "black")
        
pl.show()

cuda


  edge_index = torch.tensor([A,B], dtype=torch.long) # Conecctions


RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 39.59 GiB total capacity; 19.09 MiB already allocated; 10.19 MiB free; 22.00 MiB reserved in total by PyTorch)

## Bsk Features  || Bsk Edges -> TWebEnv

In [None]:
predictors = np.array(df[['N_BSK_CONNECTIONS','AVDIS_BSK_CONNECTIONS']], dtype=float)
x = torch.tensor(predictors, dtype=torch.float)  #features
A = np.concatenate([ np.zeros(len(c))+i for i, c in  enumerate(df['ID_BSK_CONNECTIONS']) ]).ravel()
B = np.concatenate([ c for i, c in  enumerate(df['ID_BSK_CONNECTIONS']) ]).ravel()
edge_index = torch.tensor([A,B], dtype=torch.long) # Conecctions
target = np.array([ c for c in  df['ENVIRONMENT'] ])
y = torch.tensor(target, dtype=torch.long)  #target
data = Data(x=x, edge_index=edge_index, y=y, num_classes= len(np.unique(y)))
ii_train = idx <=len(idx)*0.7
train_mask = ii_train
test_mask  = ~ii_train
data.train_mask = torch.tensor(train_mask, dtype=torch.bool)
data.test_mask = torch.tensor(test_mask, dtype=torch.bool)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# device = 'cpu'
model = GCN().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
lloss = []
lf1   = []

model.train()
for epoch in range(500):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    lloss.append(loss.cpu().data)
    f1 = f1_score(data.y[data.train_mask].cpu().detach().numpy(), out[data.train_mask].cpu().argmax(dim=1).detach().numpy(), average='weighted')
    lf1.append(f1)
    loss.backward()
    optimizer.step()
    
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')    

f1 = f1_score(data.y[data.test_mask].cpu().data, pred[data.test_mask].cpu(), average='weighted')
print(f'f1_score: {f1:.4f}')

fig = pl.figure(figsize=(14,7))
pl.subplot(1,2,1)
_ = pl.plot(lloss)
pl.xlabel('epoch')
pl.ylabel('loss')
pl.subplot(1,2,2)
_ = pl.plot(lf1)
pl.xlabel('epoch')
pl.ylabel('f1_score')

classes = ['Peak','Filament','Sheet','Void']
#------------ Confusion Matrix
cm = confusion_matrix(np.array(data.y[data.test_mask].cpu().data), np.array(pred[data.test_mask].cpu().data))
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

fig= pl.figure(figsize=(6,6))
ax = fig.add_subplot(1,1,1)
im = ax.imshow(cm, interpolation='nearest', cmap=pl.cm.Blues)
ax.figure.colorbar(im, ax=ax, pad=0.01, shrink=0.79)
ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), xticklabels=classes, yticklabels=classes)
ax.set_xlabel("Environment Predicted",size=20)
ax.set_ylabel("Environment True",size=20)
# ax.set_ylim(4-0.5, -0.5)

pl.setp(ax.get_xticklabels(), rotation=15, size=12)
pl.setp(ax.get_yticklabels(), rotation=45, size=12)

fmt = '.2f'
thresh = cm.max()/2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, format(cm[i, j], fmt),ha="center", va="center",size=20 , color="white" if cm[i, j] > thresh else "black")
        
pl.show()

## Delaunay + Bsk Features  || Delaunay Edges -> TWebEnv

In [None]:
predictors = np.array(df[['N_DELAUNAY_CONNECTIONS','AVDIS_DELAUNAY_CONNECTIONS',
                          'N_BSK_CONNECTIONS','AVDIS_BSK_CONNECTIONS']], dtype=float)
x = torch.tensor(predictors, dtype=torch.float)  #features
A = np.concatenate([ np.zeros(len(c))+i for i, c in  enumerate(df['ID_DELAUNAY_CONNECTIONS']) ]).ravel()
B = np.concatenate([ c for i, c in  enumerate(df['ID_DELAUNAY_CONNECTIONS']) ]).ravel()
edge_index = torch.tensor([A,B], dtype=torch.long) # Conecctions
target = np.array([ c for c in  df['ENVIRONMENT'] ])
y = torch.tensor(target, dtype=torch.long)  #target
data = Data(x=x, edge_index=edge_index, y=y, num_classes= len(np.unique(y)))
ii_train = idx <=len(idx)*0.7
train_mask = ii_train
test_mask  = ~ii_train
data.train_mask = torch.tensor(train_mask, dtype=torch.bool)
data.test_mask = torch.tensor(test_mask, dtype=torch.bool)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# device = 'cpu'
model = GCN().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
lloss = []
lf1   = []

model.train()
for epoch in range(500):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    lloss.append(loss.cpu().data)
    f1 = f1_score(data.y[data.train_mask].cpu().detach().numpy(), out[data.train_mask].cpu().argmax(dim=1).detach().numpy(), average='weighted')
    lf1.append(f1)
    loss.backward()
    optimizer.step()
    
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')    

f1 = f1_score(data.y[data.test_mask].cpu().data, pred[data.test_mask].cpu(), average='weighted')
print(f'f1_score: {f1:.4f}')

fig = pl.figure(figsize=(14,7))
pl.subplot(1,2,1)
_ = pl.plot(lloss)
pl.xlabel('epoch')
pl.ylabel('loss')
pl.subplot(1,2,2)
_ = pl.plot(lf1)
pl.xlabel('epoch')
pl.ylabel('f1_score')

classes = ['Peak','Filament','Sheet','Void']
#------------ Confusion Matrix
cm = confusion_matrix(np.array(data.y[data.test_mask].cpu().data), np.array(pred[data.test_mask].cpu().data))
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

fig= pl.figure(figsize=(6,6))
ax = fig.add_subplot(1,1,1)
im = ax.imshow(cm, interpolation='nearest', cmap=pl.cm.Blues)
ax.figure.colorbar(im, ax=ax, pad=0.01, shrink=0.79)
ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), xticklabels=classes, yticklabels=classes)
ax.set_xlabel("Environment Predicted",size=20)
ax.set_ylabel("Environment True",size=20)
# ax.set_ylim(4-0.5, -0.5)

pl.setp(ax.get_xticklabels(), rotation=15, size=12)
pl.setp(ax.get_yticklabels(), rotation=45, size=12)

fmt = '.2f'
thresh = cm.max()/2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, format(cm[i, j], fmt),ha="center", va="center",size=20 , color="white" if cm[i, j] > thresh else "black")

pl.tight_layout()
pl.show()

## Delaunay + Bsk Features  || Bsk Edges -> TWebEnv

In [None]:
predictors = np.array(df[['N_DELAUNAY_CONNECTIONS','AVDIS_DELAUNAY_CONNECTIONS','N_BSK_CONNECTIONS','AVDIS_BSK_CONNECTIONS']], dtype=float)
x = torch.tensor(predictors, dtype=torch.float)  #features
A = np.concatenate([ np.zeros(len(c))+i for i, c in  enumerate(df['ID_BSK_CONNECTIONS']) ]).ravel()
B = np.concatenate([ c for i, c in  enumerate(df['ID_BSK_CONNECTIONS']) ]).ravel()
edge_index = torch.tensor([A,B], dtype=torch.long) # Conecctions
target = np.array([ c for c in  df['ENVIRONMENT'] ])
y = torch.tensor(target, dtype=torch.long)  #target
data = Data(x=x, edge_index=edge_index, y=y, num_classes= len(np.unique(y)))
ii_train = idx <=len(idx)*0.7
train_mask = ii_train
test_mask  = ~ii_train
data.train_mask = torch.tensor(train_mask, dtype=torch.bool)
data.test_mask = torch.tensor(test_mask, dtype=torch.bool)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# device = 'cpu'
model = GCN().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
lloss = []
lf1   = []

model.train()
for epoch in range(500):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    lloss.append(loss.cpu().data)
    f1 = f1_score(data.y[data.train_mask].cpu().detach().numpy(), out[data.train_mask].cpu().argmax(dim=1).detach().numpy(), average='weighted')
    lf1.append(f1)
    loss.backward()
    optimizer.step()
    
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')    

f1 = f1_score(data.y[data.test_mask].cpu().data, pred[data.test_mask].cpu(), average='weighted')
print(f'f1_score: {f1:.4f}')

fig = pl.figure(figsize=(14,7))
pl.subplot(1,2,1)
_ = pl.plot(lloss)
pl.xlabel('epoch')
pl.ylabel('loss')
pl.subplot(1,2,2)
_ = pl.plot(lf1)
pl.xlabel('epoch')
pl.ylabel('f1_score')

classes = ['Peak','Filament','Sheet','Void']
#------------ Confusion Matrix
cm = confusion_matrix(np.array(data.y[data.test_mask].cpu().data), np.array(pred[data.test_mask].cpu().data))
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

fig= pl.figure(figsize=(6,6))
ax = fig.add_subplot(1,1,1)
im = ax.imshow(cm, interpolation='nearest', cmap=pl.cm.Blues)
ax.figure.colorbar(im, ax=ax, pad=0.01, shrink=0.79)
ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), xticklabels=classes, yticklabels=classes)
ax.set_xlabel("Environment Predicted",size=20)
ax.set_ylabel("Environment True",size=20)
# ax.set_ylim(4-0.5, -0.5)

pl.setp(ax.get_xticklabels(), rotation=15, size=12)
pl.setp(ax.get_yticklabels(), rotation=45, size=12)

fmt = '.2f'
thresh = cm.max()/2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, format(cm[i, j], fmt),ha="center", va="center",size=20 , color="white" if cm[i, j] > thresh else "black")
        
pl.show()

##  Bsk Features  || Delaunay Edges -> TWebEnv

In [None]:
predictors = np.array(df[['N_BSK_CONNECTIONS','AVDIS_BSK_CONNECTIONS']], dtype=float)
x = torch.tensor(predictors, dtype=torch.float)  #features
A = np.concatenate([ np.zeros(len(c))+i for i, c in  enumerate(df['ID_DELAUNAY_CONNECTIONS']) ]).ravel()
B = np.concatenate([ c for i, c in  enumerate(df['ID_DELAUNAY_CONNECTIONS']) ]).ravel()
edge_index = torch.tensor([A,B], dtype=torch.long) # Conecctions
target = np.array([ c for c in  df['ENVIRONMENT'] ])
y = torch.tensor(target, dtype=torch.long)  #target
data = Data(x=x, edge_index=edge_index, y=y, num_classes= len(np.unique(y)))
ii_train = idx <=len(idx)*0.7
train_mask = ii_train
test_mask  = ~ii_train
data.train_mask = torch.tensor(train_mask, dtype=torch.bool)
data.test_mask = torch.tensor(test_mask, dtype=torch.bool)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# device = 'cpu'
model = GCN().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
lloss = []
lf1   = []

model.train()
for epoch in range(500):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    lloss.append(loss.cpu().data)
    f1 = f1_score(data.y[data.train_mask].cpu().detach().numpy(), out[data.train_mask].cpu().argmax(dim=1).detach().numpy(), average='weighted')
    lf1.append(f1)
    loss.backward()
    optimizer.step()
    
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')    

f1 = f1_score(data.y[data.test_mask].cpu().data, pred[data.test_mask].cpu(), average='weighted')
print(f'f1_score: {f1:.4f}')

fig = pl.figure(figsize=(14,7))
pl.subplot(1,2,1)
_ = pl.plot(lloss)
pl.xlabel('epoch')
pl.ylabel('loss')
pl.subplot(1,2,2)
_ = pl.plot(lf1)
pl.xlabel('epoch')
pl.ylabel('f1_score')

classes = ['Peak','Filament','Sheet','Void']
#------------ Confusion Matrix
cm = confusion_matrix(np.array(data.y[data.test_mask].cpu().data), np.array(pred[data.test_mask].cpu().data))
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

fig= pl.figure(figsize=(6,6))
ax = fig.add_subplot(1,1,1)
im = ax.imshow(cm, interpolation='nearest', cmap=pl.cm.Blues)
ax.figure.colorbar(im, ax=ax, pad=0.01, shrink=0.79)
ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), xticklabels=classes, yticklabels=classes)
ax.set_xlabel("Environment Predicted",size=20)
ax.set_ylabel("Environment True",size=20)
# ax.set_ylim(4-0.5, -0.5)

pl.setp(ax.get_xticklabels(), rotation=15, size=12)
pl.setp(ax.get_yticklabels(), rotation=45, size=12)

fmt = '.2f'
thresh = cm.max()/2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, format(cm[i, j], fmt),ha="center", va="center",size=20 , color="white" if cm[i, j] > thresh else "black")
        
pl.show()

##  Delaunay Features  || Bsk Edges -> TWebEnv

In [None]:
predictors = np.array(df[['N_DELAUNAY_CONNECTIONS','AVDIS_DELAUNAY_CONNECTIONS']], dtype=float)
x = torch.tensor(predictors, dtype=torch.float)  #features
A = np.concatenate([ np.zeros(len(c))+i for i, c in  enumerate(df['ID_BSK_CONNECTIONS']) ]).ravel()
B = np.concatenate([ c for i, c in  enumerate(df['ID_BSK_CONNECTIONS']) ]).ravel()
edge_index = torch.tensor([A,B], dtype=torch.long) # Conecctions
target = np.array([ c for c in  df['ENVIRONMENT'] ])
y = torch.tensor(target, dtype=torch.long)  #target
data = Data(x=x, edge_index=edge_index, y=y, num_classes= len(np.unique(y)))
ii_train = idx <=len(idx)*0.7
train_mask = ii_train
test_mask  = ~ii_train
data.train_mask = torch.tensor(train_mask, dtype=torch.bool)
data.test_mask = torch.tensor(test_mask, dtype=torch.bool)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# device = 'cpu'
model = GCN().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
lloss = []
lf1   = []

model.train()
for epoch in range(500):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    lloss.append(loss.cpu().data)
    f1 = f1_score(data.y[data.train_mask].cpu().detach().numpy(), out[data.train_mask].cpu().argmax(dim=1).detach().numpy(), average='weighted')
    lf1.append(f1)
    loss.backward()
    optimizer.step()
    
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')    

f1 = f1_score(data.y[data.test_mask].cpu().data, pred[data.test_mask].cpu(), average='weighted')
print(f'f1_score: {f1:.4f}')

fig = pl.figure(figsize=(14,7))
pl.subplot(1,2,1)
_ = pl.plot(lloss)
pl.xlabel('epoch')
pl.ylabel('loss')
pl.subplot(1,2,2)
_ = pl.plot(lf1)
pl.xlabel('epoch')
pl.ylabel('f1_score')

classes = ['Peak','Filament','Sheet','Void']
#------------ Confusion Matrix
cm = confusion_matrix(np.array(data.y[data.test_mask].cpu().data), np.array(pred[data.test_mask].cpu().data))
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

fig= pl.figure(figsize=(6,6))
ax = fig.add_subplot(1,1,1)
im = ax.imshow(cm, interpolation='nearest', cmap=pl.cm.Blues)
ax.figure.colorbar(im, ax=ax, pad=0.01, shrink=0.79)
ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), xticklabels=classes, yticklabels=classes)
ax.set_xlabel("Environment Predicted",size=20)
ax.set_ylabel("Environment True",size=20)
# ax.set_ylim(4-0.5, -0.5)

pl.setp(ax.get_xticklabels(), rotation=15, size=12)
pl.setp(ax.get_yticklabels(), rotation=45, size=12)

fmt = '.2f'
thresh = cm.max()/2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, format(cm[i, j], fmt),ha="center", va="center",size=20 , color="white" if cm[i, j] > thresh else "black")
        
pl.show()

## First Neighbors in second layer

In [None]:
class GCN_fn(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(data.num_node_features, 128)
        self.conv2 = GCNConv(128, data.num_classes)

    def forward(self, data):
        x, edge_index, edge_index_fn = data.x, data.edge_index, data.edge_index_fn

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index_fn)        
        
        return F.log_softmax(x, dim=1)    

In [None]:
predictors = np.array(df[['N_DELAUNAY_CONNECTIONS','AVDIS_DELAUNAY_CONNECTIONS','N_BSK_CONNECTIONS','AVDIS_BSK_CONNECTIONS']], dtype=float)
x = torch.tensor(predictors, dtype=torch.float)  #features

A = np.concatenate([ np.zeros(len(c))+i for i, c in  enumerate(df['ID_DELAUNAY_CONNECTIONS']) ]).ravel()
B = np.concatenate([ c for i, c in  enumerate(df['ID_DELAUNAY_CONNECTIONS']) ]).ravel()
edge_index = torch.tensor([A,B], dtype=torch.long) # Conecctions

A = np.concatenate([ np.zeros(len(c))+B[i] for i, c in  enumerate(df['ID_DELAUNAY_FIRSTNEIGH_CONNECTIONS']) ]).ravel()
# A = np.concatenate([ np.zeros(len(c))+ i for i, c in  enumerate(df['ID_DELAUNAY_FIRSTNEIGH_CONNECTIONS']) ]).ravel()
B = np.concatenate([ c for i, c in  enumerate(df['ID_DELAUNAY_FIRSTNEIGH_CONNECTIONS']) ]).ravel()
edge_index_fn = torch.tensor([A,B], dtype=torch.long) # Conecctions


target = np.array([ c for c in  df['ENVIRONMENT'] ])
y = torch.tensor(target, dtype=torch.long)  #target

data = Data(x=x, edge_index=edge_index, y=y, num_classes= len(np.unique(y)), edge_index_fn=edge_index_fn )

ii_train = idx <=len(idx)*0.7
train_mask = ii_train
test_mask  = ~ii_train
data.train_mask = torch.tensor(train_mask, dtype=torch.bool)
data.test_mask = torch.tensor(test_mask, dtype=torch.bool)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# device = 'cpu'
model = GCN_fn().to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
lloss = []
lf1   = []

model.train()
for epoch in range(500):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    lloss.append(loss.cpu().data)
    f1 = f1_score(data.y[data.train_mask].cpu().detach().numpy(), out[data.train_mask].cpu().argmax(dim=1).detach().numpy(), average='weighted')
    lf1.append(f1)
    loss.backward()
    optimizer.step()
    
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(data.test_mask.sum())
print(f'Accuracy: {acc:.4f}')    

f1 = f1_score(data.y[data.test_mask].cpu().data, pred[data.test_mask].cpu(), average='weighted')
print(f'f1_score: {f1:.4f}')

fig = pl.figure(figsize=(14,7))
pl.subplot(1,2,1)
_ = pl.plot(lloss)
pl.xlabel('epoch')
pl.ylabel('loss')
pl.subplot(1,2,2)
_ = pl.plot(lf1)
pl.xlabel('epoch')
pl.ylabel('f1_score')

classes = ['Peak','Filament','Sheet','Void']
#------------ Confusion Matrix
cm = confusion_matrix(np.array(data.y[data.test_mask].cpu().data), np.array(pred[data.test_mask].cpu().data))
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

fig= pl.figure(figsize=(6,6))
ax = fig.add_subplot(1,1,1)
im = ax.imshow(cm, interpolation='nearest', cmap=pl.cm.Blues)
ax.figure.colorbar(im, ax=ax, pad=0.01, shrink=0.79)
ax.set(xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), xticklabels=classes, yticklabels=classes)
ax.set_xlabel("Environment Predicted",size=20)
ax.set_ylabel("Environment True",size=20)
# ax.set_ylim(4-0.5, -0.5)

pl.setp(ax.get_xticklabels(), rotation=15, size=12)
pl.setp(ax.get_yticklabels(), rotation=45, size=12)

fmt = '.2f'
thresh = cm.max()/2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, format(cm[i, j], fmt),ha="center", va="center",size=20 , color="white" if cm[i, j] > thresh else "black")
        
pl.show()

In [None]:
B