In [23]:
lam=1e-4    # Regularisation parameter


In [25]:
import os
import torch
import networkx as nx
from sklearn.metrics import f1_score
import torch_geometric
import random
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
import torch.nn.functional as F
device = 'cuda' if torch.cuda.is_available() else 'cpu'
from torch_geometric.nn import GCNConv
import numpy as np
import math

In [2]:
def train_and_test(model,data,lmbda):       # training and testing GCN models
    data.to(device)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    criterion = torch.nn.CrossEntropyLoss()

    def train():
        model.train()
        optimizer.zero_grad()  # Clear gradients.
        reg = sum(p.pow(2.0).sum()
                  for p in model.parameters())
        out = model(data.x, data.edge_index)  # Perform a single forward pass.
        loss = criterion(out[data.train_mask], data.y[data.train_mask])+lmbda*reg  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        return loss

    def test():
        model.eval()
        out = model(data.x, data.edge_index)
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
        test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
        mac_f1=f1_score(data.y[data.test_mask].cpu(), pred[data.test_mask].cpu(), average='macro');
        return test_acc,mac_f1


    for epoch in range(1, 801):
        loss = train()

    test_acc,mac_f1 = test()
    return test_acc,mac_f1

In [3]:
def train_and_test_val(model,data,lmbda):
    data.to(device)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    criterion = torch.nn.CrossEntropyLoss()

    def train():
        model.train()
        optimizer.zero_grad()  # Clear gradients.
        reg = sum(p.pow(2.0).sum()
                  for p in model.parameters())
        out = model(data.x, data.edge_index)  # Perform a single forward pass.
        loss = criterion(out[data.train_mask], data.y[data.train_mask])+lmbda*reg  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        return loss

    def val():
        model.eval()
        out = model(data.x, data.edge_index)
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        val1=((True^data.train_mask)*(True^data.test_mask))
        val_correct = pred[val1] == data.y[val1]  # Check against ground-truth labels.
        val_acc = int(val_correct.sum()) / int(val1.sum())  # Derive ratio of correct predictions.
        return val_acc
    
    def test():
      model.eval()
      out = model(data.x, data.edge_index)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      mac_f1=f1_score(data.y[data.test_mask].cpu(), pred[data.test_mask].cpu(), average='macro');
      return test_acc,mac_f1

    for epoch in range(1, 801):
        loss = train()

    val_acc = val()
    test_acc,mac_f1=test()

    #print(f'Test Accuracy: {test_acc:.4f}')
    return val_acc,test_acc,mac_f1

In [4]:

class SGCN(torch.nn.Module):
    def __init__(self,hidden_channels=16,seed=13):
        super().__init__()
        torch.manual_seed(seed)
        self.conv1 = GCNConv(dataset.num_features,10)
        self.conv2=GCNConv(10,dataset.num_classes)
    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x=F.relu(x)
        x = self.conv2(x, edge_index)
        return x


In [6]:
def create_noisy(data,noise_level):   # Function to add SLN noise to data
    data1=data.clone()

    rand_num=torch.rand(len(data.y))
    for k in range(no_of_classes):
        for j in range(no_of_classes-1):
            pos_to_flip=(((rand_num<(j+1)*noise_level/(no_of_classes-1)).to(device))*(((j)*noise_level/(no_of_classes-1)<=rand_num).to(device))*data.train_mask)*(data.y==k)
            data1.y[pos_to_flip]=(data1.y[pos_to_flip]+(j+1))%no_of_classes
    return(data1)

def create_noisy_ccn(data,noise_level):
    data1=data.clone()
    rand_num=torch.rand(len(data.y))
    pos_to_flip=(rand_num<=noise_level).to(device)*data1.train_mask
    data1.y[pos_to_flip]=(data.y[pos_to_flip]+1)%no_of_classes
    del data
    torch.cuda.empty_cache()
    return(data1)



In [7]:
dataset = Planetoid(root='data/py311/Planetoid', name='Citeseer', split="random", num_train_per_class=250,num_val=50,num_test=1000,transform=NormalizeFeatures())
data = dataset[0]  # Get the first graph object.
datap=data.clone()
no_of_classes=int((data.y.max()+1).detach())
datap.to(device)



Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])

In [9]:
def majvote(n,p):
    val=np.float128(0);
    p=np.float128(p)
    for i in range(math.ceil(n/2),n+1):
        val=val+np.float128(math.comb(n,i))*(p**i)*((1-p)**(n-i))
    return np.float32(val)

def q(p,max_degree):
    q1=[]
    for n in range(1,int(max_degree)+1):
        q1.append(majvote(n,p))
    return q1
def veto_power(n,p):
    return(1-((1-p)**n))

def r(p,max_degree):
    r1=[]
    for n in range(1,int(max_degree)+1):
        r1.append(veto_power(n,p))
    return r1
def sequential_mod(n,p):
    k=no_of_classes
    return (((k-1)/k)*(1-(1-(k*p)/(k-1))**(n)))

def s_mod(p,max_degree):
    r1=[]
    for n in range(1,int(max_degree)+1):
        r1.append(sequential_mod(n,p))
    return r1
def s(p,max_degree):  # for sequential flipping
    s1=[p]
    temp=p;
    for n in range(2,int(max_degree)+1):
        temp=p+temp*(1-2*p)
        s1.append(temp)
    return s1

def rho_calculate(data,type='majority_vote',):
    g_nx=torch_geometric.utils.convert.to_networkx(data,to_undirected='True',remove_self_loops='True')
    max_degree=np.max(np.array(g_nx.degree)[:,1])/2
    unique, counts = np.unique(np.array(g_nx.degree)[:,1], return_counts=True)
    unique=unique/2
    no_nodes=counts.sum()
    x = np.linspace(0,1,5000)
    y = expected_noise(x,unique,counts,max_degree,no_nodes,type)
    noise_lev=[0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5]
    x_intersect=np.zeros(len(noise_lev))
    for j,i in enumerate(noise_lev):
        x_intersect[j]=(x[np.argmin(np.abs(y - i))])
    return x_intersect
def expected_noise(p,unique,counts,max_degree,no_nodes,type='majority_vote'):
    if type=='majority_vote':
        qn=q(p,max_degree)
    elif type=='veto_power':
        qn=r(p,max_degree)
    elif type=='sequential':
        qn=s_mod(p,max_degree)
    deg_dist=np.zeros(int(max_degree))
    for i,j in enumerate(unique):
        deg_dist[int(j)-1]=counts[i]/no_nodes
    exp_noise=np.dot(deg_dist,np.array(qn))
    return exp_noise

In [12]:



g_nx=torch_geometric.utils.convert.to_networkx(data,to_undirected='True',remove_self_loops='True')
max_degree=np.max(np.array(g_nx.degree)[:,1])/2
degree_list=(np.array(g_nx.degree)[:,1])/2

In [18]:
#estimating rho_values
rho_mv=rho_calculate(data,'majority_vote')
rho_veto=rho_calculate(data, 'veto_power')
rho_seq=rho_calculate(data, 'sequential')
                       


In [19]:
g_nx=torch_geometric.utils.convert.to_networkx(data,to_undirected='True',remove_self_loops='True')
max_degree=np.max(np.array(g_nx.degree)[:,1])/2
unique, counts = np.unique(np.array(g_nx.degree)[:,1], return_counts=True)
unique=unique/2
no_nodes=counts.sum()

In [27]:
noise_levels=[0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5]
iter=10
for nl in noise_levels:
    print("Noise level =",nl)
    pos=int(nl/0.05)-1
    test_acc_sln=np.zeros(iter)
    for i in range(iter):
        torch.manual_seed(i+2)
        dataset = Planetoid(root='data/py311/Planetoid', name='Citeseer', split="random", num_train_per_class=250,num_val=50,num_test=1000,transform=NormalizeFeatures())
        datap = (dataset[0]).to(device)
        data_bn=create_noisy(datap,nl)  
        model_sgcn=SGCN(hidden_channels=16,seed=i+2)     # calculating accuracy on clean data set for comaprision
        test_acc_sln[i],mac=train_and_test(model_sgcn,data_bn,lam)
        del model_sgcn
        torch.cuda.empty_cache()
    print("sln noise:",np.mean(test_acc_sln),"+-",np.std(test_acc_sln))
    
    qn=q(rho_mv[pos],max_degree)
    nl_majvote=torch.zeros(len(data.y))
    for i in range(len(degree_list)):
        nl_majvote[i]=float(qn[int(degree_list[i])-1])
    nl_majvote.to(device)
    test_acc_mv=np.zeros(iter)
    for i in range(iter):
        torch.manual_seed(i+2)
        data_bn=create_noisy(datap,nl_majvote)  
        model_sgcn=SGCN(hidden_channels=16,seed=i+2)     # calculating accuracy on clean data set for comaprision
        test_acc_mv[i],mac=train_and_test(model_sgcn,data_bn,lam)
        del model_sgcn
        torch.cuda.empty_cache()
    print("majority vote noise:",np.mean(test_acc_mv),"+-",np.std(test_acc_mv))

    qn=r(rho_veto[pos],max_degree)
    nl_veto=torch.zeros(len(data.y))
    for i in range(len(degree_list)):
        nl_veto[i]=qn[int(degree_list[i])-1]
    nl_veto.to(device)
    test_acc_veto=np.zeros(iter)
    for i in range(iter):
        torch.manual_seed(i+2)
        data_bn=create_noisy(datap,nl_veto)  
        model_sgcn=SGCN(hidden_channels=16,seed=i+2)     # calculating accuracy on clean data set for comaprision
        test_acc_veto[i],mac=train_and_test(model_sgcn,data_bn,lam)
        del model_sgcn
        torch.cuda.empty_cache()
    print("veto noise:",np.mean(test_acc_veto),"+-",np.std(test_acc_veto))

    qn=s_mod(rho_seq[pos],max_degree)
    nl_seq=torch.zeros(len(data.y))
    for i in range(len(degree_list)):
        nl_seq[i]=qn[int(degree_list[i])-1]
    nl_seq.to(device)
    test_acc_seq=np.zeros(iter)
    for i in range(iter):
        torch.manual_seed(i+2)
        data_bn=create_noisy(datap,nl_seq)  
        model_sgcn=SGCN(hidden_channels=16,seed=i+2)     # calculating accuracy on clean data set for comaprision
        test_acc_seq[i],mac=train_and_test(model_sgcn,data_bn,lam)
        del model_sgcn
        torch.cuda.empty_cache()
    print("sequential  noise:",np.mean(test_acc_seq),"+-",np.std(test_acc_seq))

    
    

Noise level = 0.05
sln noise: 0.7536 +- 0.012603174203350533
majority vote noise: 0.7526 +- 0.005868560300448489
veto noise: 0.7536 +- 0.005851495535331123
sequential  noise: 0.7522 +- 0.006720119046564584
Noise level = 0.1
sln noise: 0.7381 +- 0.012770669520428452
majority vote noise: 0.7412 +- 0.006867313885355764
veto noise: 0.7327999999999999 +- 0.003789459064299287
sequential  noise: 0.7341 +- 0.004205948168962623
Noise level = 0.15
sln noise: 0.7264 +- 0.013062924634246357
majority vote noise: 0.7414000000000001 +- 0.0068000000000000066
veto noise: 0.7336 +- 0.004800000000000005
sequential  noise: 0.7334 +- 0.004247352116319064
Noise level = 0.2


KeyboardInterrupt: 

In [None]:

noise_levels=[0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5]
iter=10
for nl in noise_levels:
    print("Noise level =",nl)
    pos=int(nl/0.05)-1
    test_acc_sln=np.zeros(iter)
    for i in range(iter):
        torch.manual_seed(i+2)
        dataset = Planetoid(root='data/py311/Planetoid', name='Citeseer', split="random", num_train_per_class=250,num_val=50,num_test=1000,transform=NormalizeFeatures())
        datap = (dataset[0]).to(device)
        data_bn=create_noisy_ccn(datap,nl)  
        model_sgcn=SGCN(hidden_channels=16,seed=i+2)     # calculating accuracy on clean data set for comaprision
        test_acc_sln[i],mac=train_and_test(model_sgcn,data_bn,lam)
        del model_sgcn
        torch.cuda.empty_cache()
    print("ccn noise:",np.mean(test_acc_sln),"+-",np.std(test_acc_sln))
    
    qn=q(rho_mv[pos],max_degree)
    nl_majvote=torch.zeros(len(data.y))
    for i in range(len(degree_list)):
        nl_majvote[i]=float(qn[int(degree_list[i])-1])
    nl_majvote.to(device)
    test_acc_mv=np.zeros(iter)
    for i in range(iter):
        torch.manual_seed(i+2)
        data_bn=create_noisy_ccn(datap,nl_majvote)  
        model_sgcn=SGCN(hidden_channels=16,seed=i+2)     # calculating accuracy on clean data set for comaprision
        test_acc_mv[i],mac=train_and_test(model_sgcn,data_bn,lam)
        del model_sgcn
        torch.cuda.empty_cache()
    print("majority vote noise:",np.mean(test_acc_mv),"+-",np.std(test_acc_mv))

    qn=r(rho_veto[pos],max_degree)
    nl_veto=torch.zeros(len(data.y))
    for i in range(len(degree_list)):
        nl_veto[i]=qn[int(degree_list[i])-1]
    nl_veto.to(device)
    test_acc_veto=np.zeros(iter)
    for i in range(iter):
        torch.manual_seed(i+2)
        data_bn=create_noisy_ccn(datap,nl_veto)  
        model_sgcn=SGCN(hidden_channels=16,seed=i+2)     # calculating accuracy on clean data set for comaprision
        test_acc_veto[i],mac=train_and_test(model_sgcn,data_bn,lam)
        del model_sgcn
        torch.cuda.empty_cache()
    print("veto noise:",np.mean(test_acc_veto),"+-",np.std(test_acc_veto))


    
    

Noise level = 0.05
ccn noise: 0.7544000000000001 +- 0.011867602959317448
majority vote noise: 0.7525999999999999 +- 0.006406246951218792
veto noise: 0.7515000000000001 +- 0.005869412236331681
Noise level = 0.1
ccn noise: 0.7442999999999999 +- 0.010109896141899785
majority vote noise: 0.7407999999999999 +- 0.00804735981549229
veto noise: 0.7304 +- 0.008879189152169254
Noise level = 0.15
ccn noise: 0.7262000000000001 +- 0.014647866738880456
majority vote noise: 0.7405999999999999 +- 0.008151073548925941
veto noise: 0.7307 +- 0.008989438247187648
Noise level = 0.2
ccn noise: 0.7043 +- 0.015199013125857872
majority vote noise: 0.7044999999999999 +- 0.006545991139621263
veto noise: 0.6658999999999999 +- 0.014679577650600163
Noise level = 0.25
ccn noise: 0.6761000000000001 +- 0.016176835290006485
majority vote noise: 0.6753 +- 0.016377118183612142
veto noise: 0.6178 +- 0.01893568060567142
Noise level = 0.3
ccn noise: 0.6302000000000001 +- 0.01643045951883271
majority vote noise: 0.6756 +- 0.