In [None]:
import pandas as pd
import networkx as nx
import os
import seaborn as sns
import matplotlib.pyplot as plt
import statistics
import math
import numpy as np
from tqdm import tqdm
import matplotlib.colors as mcolors
%matplotlib inline

____
# CUT LAYERS

In [None]:
graphs = {}
dataset = "cifar100"
model = "ResNet110v2"
# specify folder path where files of an execution of cnn2multilayer are stored
path = ""
for f in tqdm(os.listdir(path)):
    # read csv file to graph
    if ".csv" in f and model in f and len(graphs) < 88:
      print(f)
      df = pd.read_csv(path + f)
      df["edge"] = df["edge"].str.replace("(", "")
      df["edge"] = df["edge"].str.replace(")", "")
      df[["source", "target"]] = df.edge.str.split(", ", expand=True)
      df = df.drop(["edge"], axis=1)
      graphs[int(f.split("_")[1].replace(".csv", ""))] = nx.from_pandas_edgelist(df, source="source", target="target", edge_attr=["mean", "0.5"])

In [None]:
base_graph = nx.read_gexf(path + dataset + "-" + model + ".gexf")
# uniformare i nomi dei convoluzionali
for node in base_graph.nodes(data=True):
    if dataset not in ["mnist", "cifar10", "cifar100"]:
        layer = node[1]["layer"]
        block_id = int(layer.split("_")[0].replace("block", ""))
        conv_id = int(layer.split("_")[1].replace("conv", ""))
        if block_id <= 2:
            real_id = (block_id - 1) * 2 + conv_id
        else:
            real_id = (block_id - 3) * 3 + 4 + conv_id
        base_graph.nodes[node[0]]["old_layer"] = layer
        base_graph.nodes[node[0]]["layer"] = "conv2d_" + str(real_id)
    else:
        base_graph.nodes[node[0]]["old_layer"] = node[1]["layer_name"]
        base_graph.nodes[node[0]]["layer"] = node[1]["layer_name"]

## DESCRIPTIVE ANALYSIS

In [None]:
print('Number of nodes:', len(graphs[1].nodes))
print('Number of arcs:', len(graphs[1].edges))
print('Density:', nx.density(graphs[1]))
print('Clustering:', nx.average_clustering(graphs[1]))
print('Is connected?:', nx.is_connected(graphs[1].to_undirected()))
print('3-core nodes:', len(list(nx.k_core(graphs[1], k=3).nodes)))
print('4-core nodes:', len(list(nx.k_core(graphs[1], k=4).nodes)))
print('5-core nodes:', len(list(nx.k_core(graphs[1], k=5).nodes)))
print('6-core nodes:', len(list(nx.k_core(graphs[1], k=6).nodes)))
print('9-core nodes:', len(list(nx.k_core(graphs[1], k=9).nodes)))
print('12-core nodes:', len(list(nx.k_core(graphs[1], k=12).nodes)))
sns.distplot(list(nx.clustering(graphs[1]).values()), kde=False)

In [None]:
# arcs dataframe
data = {}
for k, graph in graphs.items():
    df = pd.DataFrame(nx.get_edge_attributes(graph,'mean'), index=[0]).T
    df = df.rename({0: 'mean'}, axis=1)
    df['0.5'] = nx.get_edge_attributes(graph,'0.5').values()
    data[k] = df

In [None]:
df["mean"].describe(), df["0.5"].describe()

##  COOL LAYERS FROM MULTILAYER NETWORK

In [None]:
# degree by sum for all layers, mean for all layers, entropy
weight_type = "mean"
threshold_type = "mean"
entropy = False

nodes = {k:0 for k in list(base_graph.nodes)}

for label, graph in graphs.items():
    node_degree = dict(graph.degree(weight=weight_type))
    for k, v in node_degree.items():
      nodes[k] = nodes[k] + v

if entropy:
    for node, total_degree in nodes.items():
        entropy = 0
        for label, graph in graphs.items():
            try:
                ratio = graph.degree(node, weight=weight_type) / total_degree
                entropy += ratio * math.log(ratio, 2)
            except:
                entropy += 0
        entropy *= -1
        nodes[node] = entropy
else:  
  if weight_type == "sum":
      pass
  elif weight_type == "mean":
      for k, v in nodes.items():
          nodes[k] = v / len(graphs)
  elif weight_type == "0.5":
      for k, v in nodes.items():
          if len(nodes[k]) > 0:
              nodes[k] = statistics.median(nodes[k])
          else:
              nodes[k] = 0


In [None]:
total_layers = list(set([x[1]["layer_name"] for x in base_graph.nodes(data=True)]))

# lol contains for each threshold the layer to remove and other information
lol = {}
for th, i in zip(np.arange(.05, 8., 0.05), range(len(np.arange(.05, 8., 0.05)))):
    # mean
    threshold = statistics.median(list(nodes.values())) * th
    cool_nodes = []
    cool_layers = []
    count_layers = {}
    for k, v in nodes.items():
        if v >= threshold:
            layer = base_graph.nodes[k]["layer_name"]
            cool_nodes.append(k)
            cool_layers.append(layer)
            if layer in count_layers:
                count_layers[layer] = count_layers[layer] + 1
            else:
                count_layers[layer] = 0
            
    lol[i] = {"th": round(th, 2),
              "n_convs": len(set(total_layers).difference(set(cool_layers))),
              "convs": list(set(total_layers).difference(set(cool_layers))),
              "dataset": dataset,
              "model": model
            }

In [None]:
dis = {}
c = 0
for i in range(1, len(lol)):
  if lol[i-1]["n_convs"] != lol[i]["n_convs"]:
    dis[c] = lol[i]
    c+= 1
dis = pd.DataFrame.from_dict(dis, orient="index")
dis.to_csv("cut-" + dataset + "-" + model + ".csv", index=False)