# Setup

In [None]:
# --- Standard library ---
import sys
import os

# --- Scientific computing ---
import numpy as np
import scipy.stats as stats

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import roc_curve, auc

# --- Network science ---
import networkx as nx

# --- Data handling and visualization ---
import pandas as pd

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

# --- Project source code ---
sys.path.append("../src/")

from classifiers.features import get_degrees, get_configuration_probabilities_feature
from classifiers.features import get_distances, get_configuration_distances_feature, get_distance_ratios_feature
# from classifiers.features import get_labels, format_feature_matrix
from classifiers.logreg import *
from data.observations import get_preprocessed_data

In [2]:
SYSTEM = "drosophila"
LAYERS = [1, 2]
REP = 1

THETAS = [0.05,0.07,0.10,0.12,0.15,0.17,0.20,0.22,0.25,0.27,0.30,0.32,0.35,0.37,0.40,0.42,0.45,0.47,0.50,0.53,0.55,0.57,0.60,0.62,0.65,0.68,0.70,0.72,0.75,0.78,0.80,0.82,0.85,0.88,0.90,0.92,0.95]
# THETA = 0.50  # before threshold
# THETA = 0.65  # approaching threshold from the left
THETA = 0.68  # approaching threshold from the right
# THETA = 0.85  # after threshold

# My code

In [3]:
cache = get_preprocessed_data(SYSTEM, LAYERS, THETA, REP, ROOT="../data/input/preprocessed/")
cache.embeddings = cache.align_centers()
cache.embeddings = cache.renormalize()

In [4]:
components = sorted(nx.connected_components(cache.remnants[0]), key=len, reverse=True)

In [5]:
centers = []
norms = []
sum_norms = []
sizes = []
for component in components:
    vectors = [cache.embeddings[0][node] for node in component]
    center_of_mass = np.mean(vectors, axis=0)
    average_norm = np.mean([np.linalg.norm(vector) for vector in vectors])
    sum_norm = np.sum([np.linalg.norm(vector) for vector in vectors])

    sizes.append(len(component))
    centers.append(center_of_mass)
    norms.append(average_norm)
    sum_norms.append(sum_norm)

In [6]:
print(
    f"GCC Average Norm:\n\t {norms[0]}\n",
    f"GCC Sum of Norms:\n\t {sum_norms[0]}\n",
    f"GCC Center of mass (first two coordinates):\n\t {centers[0][:2]}\n"
)

GCC Average Norm:
	 0.00013635122741106898
 GCC Sum of Norms:
	 0.9999999403953552
 GCC Center of mass (first two coordinates):
	 [-1.2159174e-12  8.1285933e-13]



# Filippo's code

In [7]:
cache = get_preprocessed_data(SYSTEM, LAYERS, THETA, REP, ROOT="../data/input/preprocessed/")
cache.embeddings = cache.align_centers()
cache.embeddings = cache.renormalize()

In [8]:
G = cache.remnants[0]
n2v_emb_rescaled = cache.embeddings[0].copy()

nr_comp = 0
components = sorted(nx.connected_components(G), key=len, reverse=True)
for c in components:
    nr_comp += 1
    list_of_nodes = []
    norm = 0.0
    for n in c:
        list_of_nodes.append(n2v_emb_rescaled[n])
    cm = np.add.reduce(list_of_nodes) / float(len(list_of_nodes))
    for n in c:
        n2v_emb_rescaled[n] = n2v_emb_rescaled[n] - cm 
        norm += np.linalg.norm(n2v_emb_rescaled[n])
    if norm > 0.0:
        for n in c:
            n2v_emb_rescaled[n] = n2v_emb_rescaled[n] / norm 


In [9]:
centers = []
norms = []
sum_norms = []
sizes = []
for component in components:
    vectors = [n2v_emb_rescaled[node] for node in component]
    center_of_mass = np.mean(vectors, axis=0)
    average_norm = np.mean([np.linalg.norm(vector) for vector in vectors])
    sum_norm = np.sum([np.linalg.norm(vector) for vector in vectors])

    sizes.append(len(component))
    centers.append(center_of_mass)
    norms.append(average_norm)
    sum_norms.append(sum_norm)

In [10]:
print(
    f"GCC Average Norm:\n\t {norms[0]}\n",
    f"GCC Sum of Norms:\n\t {sum_norms[0]}\n",
    f"GCC Center of mass (first two coordinates):\n\t {centers[0][:2]}\n"
)

GCC Average Norm:
	 0.00013635125651489943
 GCC Sum of Norms:
	 1.0000001192092896
 GCC Center of mass (first two coordinates):
	 [-1.6345390e-13 -5.9959186e-14]

