# $L_2$ Improvement Cora 2-class

Um dieses Experiment selbst auszuführen, führe die Zellen von oben nach unten aus

In [None]:
from torch_geometric.datasets import Planetoid
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import numpy as np
import copy
import time 
import matplotlib.pyplot as plt
import pickle # Lokales Speichern von Objekten
import keyboard

from GNM_Toolbox import *

dataset = load_dataset('Cora')

# Some Helping Functions

In [None]:
# Gegeben sei eine target_list (a_0, a_1, a_2, ...)
# und eine out_list ((b_0, x), (b_1, x), (b_2, x), (b_3, x), ...)
# Gesucht wird eine Liste l von Indizes, sodass für i < len(target_list): abs(target_list[i] - out_list[l[i]][0]) minimal ist
def find_each_nearest(target_list, out_list):
    # Each list is expected to be sorted
    i, j = 0, 0
    result = list()
    while True:
        diff_0 = abs(target_list[i] - out_list[j][0])
        diff_1 = abs(target_list[i] - out_list[j+1][0])
        
        if diff_0 >= diff_1:
            j += 1
        elif diff_0 < diff_1:
            result.append(j)
            i += 1
        if i >= len(target_list):
            return result
        if j+1 >= len(out_list):
            while i < len(target_list):
                result.append(j)
                i += 1
            return result
            
def get_best_values_indices(targets, lambdas):
    lambdas.sort(key = lambda x: x[0])
    return find_each_nearest(targets, lambdas)

def h0(x):
    a0, a1, a2, a3 = 13, 4, 15, 15
    return torch.exp(torch.sum(x, dim=1)/a0 - a1) - ((torch.sum(x, dim=1) - a2 ) / a3)
    
def pi0(X, y):
    y0 = F.one_hot(y, 2).type(X.dtype)
    a0, a1, a2 = -torch.log(torch.tensor(35.)), 1, torch.tensor([0, 1.6])
    return torch.sigmoid(a0 + a1 * h0(X) + a2 @ y0.T)

def h1(x):
    a0, a1, a2, a3 = 18, 4, 5, 6
    return torch.exp(torch.sum(x, dim=1)/a0 - a1) - ((torch.sum(x, dim=1) - a2 ) / a3)
    
def pi1(X, y):
    y0 = F.one_hot(y, 2)
    a0, a1, a2 = -torch.log(torch.tensor(25.)), 2., torch.tensor([2, 4])
    return torch.sigmoid(a0 + a1 * h1(X) + a2 @ y0.T)

def create_mask_from_pi(data, pi):
    p = pi(data.x, data.y)
    mask = torch.tensor((np.random.binomial(size = p.shape[0], n = 1, p = p) == 1))        
    return mask.bool()

def split_known_mask_into_val_and_train_mask(known, ratio=0.8):
    val_mask = torch.zeros_like(known) == 1
    train_mask = torch.zeros_like(known) == 1
    for i in range(len(known)):
        if known[i] == True:
            if np.random.binomial(1, ratio) == 1:
                train_mask[i] = True
            else:
                val_mask[i] = True
    return val_mask, train_mask

def calculate_lambda(train_mask, y):
    a = 0 # Anzahl an Klasse 0
    b = 0 # Anzahl an Klasse 1
    for yy in y[train_mask]:
        if yy == 0:
            a += 1
        elif yy == 1:
            b += 1
    return b/a
        
def insert_into_list(l, item, t):
    # l list, i item to insert, target
    def diff(a, b):
        return abs(a-b)
    N = len(l)
    if N == 0:
        l.insert(0, item)
        return
    d = diff(t, item[0])
    d_0 = diff(t, l[0][0])
    if d <= d_0:
        l.insert(0, item)
        return
    for i in range(N-1):
        d_0 = diff(t, l[i][0])
        d_1 = diff(t, l[i+1][0])
        if d_0 <= d and d <= d_1:
            l.insert(i+1, item)
            return
    l.append(item)

def disturb_y(y, probability): # y must have 2 classes
    y_dis = torch.zeros_like(y)
    for i in range(len(y)):
        if np.random.rand(1) < probability:
            y_dis[i] = 0 if np.random.rand(1) > 0.5 else 1
        else:
            y_dis[i] = y[i]
                
    return F.one_hot(y_dis, 2)

# root mean squared error
def RMSE(pi_est, pi_true):
    return torch.sqrt(F.mse_loss(pi_true, pi_est.view(pi_true.shape)))

# mean squared error
def MSE(pi_est, pi_true):
    return F.mse_loss(pi_true, pi_est)

# weighted mean squared error
def WMSE(pi_est, pi_true, weight):
    return torch.mean(weight * (pi_est - pi_true) ** 2)


# mean root error
def MRE(pi_est, pi_true):
    return torch.mean(torch.pow(abs(pi_est.view(pi_true.shape) - pi_true), 0.5))

# berechnet Gewichtungen für weighted mean square error
def calc_inverse_weights(pi_real):
    bins = torch.zeros(len(pi_real)).type(torch.float)
    pi = pi_real.detach().numpy()
    # Count 
    for p in pi_real:
        bins[int(p*100)] += 1
    bins = 1/bins
    bins[bins == (torch.tensor(1.)/0)] = 999999999
    return torch.sqrt(bins)

def eval_pi(pi_true, pi_est, diff=0.01):
    pi_diff = abs(pi_true.view(pi_est.shape)-pi_est)
    pi_binar = [(p < diff) for p in pi_diff]
    return sum(pi_binar)/(1. * pi_true.shape[0])

def plot_data(all_models, colors, names):
    M = len(all_models)
    
    # Set up distances
    number_boxes = len(all_models[list(all_models.keys())[0]])
    minDist = -0.4
    maxDist = 0.4
    dist = [minDist + (maxDist-minDist)/(number_boxes-1) * i for i in range(number_boxes)]
    
    # Set up 
    fig = plt.figure(figsize=(18, 8))
    ax = plt.axes()
    minimum = 1
    maximum = 0
    for i, l in zip(range(1, M+1), all_models):
        for models, color, (dd, d) in zip(all_models[l], colors, enumerate(dist)):
            acc = list()
            for model in models:
                acc.append(model) 
            # Find min und max
            if minimum > min(acc):
                minimum = min(acc)
            if maximum < max(acc):
                maximum = max(acc)
            alternative_boxplot(acc, i+d, 0.4/len(dist), color)
        
    # Set x-axis labels
    seperation_lines = [i + 0.5 for i in range(1, M)]
    plt.vlines(seperation_lines, 0, 1, colors='grey', linestyles='dashed')
    ax.set_xticks([1 + d for d in dist])
    #ax.set_yticks([0.005, 0.01, 0.015, 0.02, 0.03, 0.04, 0.05])
    lambdas = list(all_models.keys())
    lambdas.sort()
    ax.set_xticklabels(names)
    plt.ylim(minimum - 0.01, maximum + 0.01)
    plt.xlim(0.4, M + 0.6)
    
    # draw temporary red and blue lines and use them to create a legend
    helps = list()
    for color in colors:
        h, = plt.plot([1, 1], color=color)
        helps.append(h)
    ax.grid(True, axis='y')
    ax.set_xlabel(r'p', fontsize=20)
    ax.set_ylabel(r'$L_2$', fontsize=20)
    for h in helps:
        h.set_visible(False)

def alternative_boxplot(data, x_position, x_width, color):
    # data: list of floats
    # x_postition: float
    # x_width: float
    N = len(data)
    mean = np.mean(data)
    print('{}:{}'.format(sum([1 for d in data if d > mean]), sum([1 for d in data if d < mean])))
    noise = np.random.rand(N)
    x_positions = x_position - x_width/2. + noise * x_width
    plt.plot(x_positions, np.array(data), 'x', color = color)

## Data Setup

In [None]:
# Set up data
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = dataset[0].to(device)
data.num_classes = 2
# Klassen 0,1,2,4,5,6 werden zu Klasse 1, Klasse 3 wird zu Klasse 0
y = torch.zeros_like(data.y)
y[data.y == 3] = 1
data.y = y

In [None]:
masks1 = list()
for i in range(80):
    mask1 = create_mask_from_pi(data, pi1)
    val_mask1, train_mask1 = split_known_mask_into_val_and_train_mask(mask1)
    masks1.append((None, train_mask1, val_mask1))

## Analyse

In [None]:
choosen_masks = masks1
pi_true = pi1(data.x, data.y)

In [None]:
# Eval 
IT_per_mask = 2
NB_masks = len(choosen_masks)
t_0 = time.time()
noise_levels = [0, 0.1, 0.2, 0.4, 0.8, 1.1]
all_models = dict()
smn_models = [list() for _ in range(len(noise_levels))
    
for j, mask_tupel in enumerate(choosen_masks):
    _, train_mask, val_mask = mask_tupel

    # Trainiere jeweils N Modelle
    for k in range(IT_per_mask):
        for m, noise_level in enumerate(noise_levels):
            print_status(j * IT_per_mask * len(noise_levels) + k * len(noise_levels) + m, NB_masks * IT_per_mask * len(noise_levels), t_0)
            y_noise = disturb_y(data.y, noise_level)
            pi_est, loss_list, acc_r = train_modelr(data.x, data.y, data.num_classes, train_mask+val_mask, y_noise, NB_IT=30, expectation='fast')

            smn_models[m].append(loss_list[-1])
        all_models[1] = (smn_models)

In [None]:
plot_data(all_models, 
          colors = [(0.3 + i * 0.0875, 0.8 - i * 0.1, 0.2 - i * 0.01) for i in range(8)], 
          names = [0, 0.1, 0.2, 0.4, 0.8, 1])