<a href="https://colab.research.google.com/github/mariemtouihri/GRAM-Metric/blob/main/Diversity_Index.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1 - Import libraries and data

In [None]:
import numpy as np
import networkx as nx
from scipy.stats import spearmanr, kendalltau
import matplotlib.pyplot as plt
import math

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
"""
  Load all matrices from my drive in a dictionary

"""

# Read the adjacency matrix
loaded_data_dict = {}
noises = [i for i in range(11)]
j=1 # To select G1 and so on
for noise in noises:
  noise *=10
  file_path = f'/content/drive/MyDrive/Colab_Notebooks/matrices_noise_G{j}_{noise}%.npy'
  loaded_data = np.load(file_path)
  loaded_data_dict[noise] = loaded_data

print(f"Matrices G{j} are loaded successfully!")

In [None]:
len(loaded_data_dict[0])

### Define needed functions

In [None]:
def shannon_diversity_index(graph):
    diversity_index = 0.0

    for node in graph.nodes():
        # Get the outgoing edge weights of the node
        outgoing_weights = [data['weight'] for _, _, data in graph.out_edges(node, data=True)]

        # Calculate the relative proportion of each outgoing edge weight
        total_weight = sum(outgoing_weights)
        proportions = [weight / total_weight for weight in outgoing_weights]

        # Calculate the Shannon Diversity Index for the node
        node_diversity = -sum(p * math.log(p) for p in proportions if p > 0)

        # Add the node's diversity to the overall diversity index
        diversity_index += node_diversity

    return diversity_index

In [None]:
def diversity (matrices):

    div_values = []
    for i in range(len(matrices)):
      matrix = matrices[i]
      G = nx.DiGraph(matrix)
      diversity_index = shannon_diversity_index(G)

      div_values.append(diversity_index)

    return div_values

In [None]:
def calculate_correlations(original_values, modified_values):
  p = []
  sp = []
  ktau = []
  for i in range(len(original_values)):

    # Calculate Pearson Correlation
    p_correlation = np.corrcoef(original_values, modified_values)[0, 1]
    p.append(p_correlation)

    # Calculate Spearman correlation
    sp_correlation, _ = spearmanr(original_values, modified_values)
    sp.append(sp_correlation)

    # Calculate Kendall Tau correlation
    ktau_correlation, p_value = kendalltau(original_values, modified_values)
    ktau.append(ktau_correlation)


  # Calculate avg of correlations of all matrices
  pearson_value = np.mean(p, axis=0)
  spearman_value = np.mean(sp, axis=0)
  ktau_value = np.mean(ktau, axis=0)

  return pearson_value, spearman_value, ktau_value

### Start calculations and save them to files

In [None]:
diversity_dict = {} # to store results in a dictionary

for percent, loaded_data in loaded_data_dict.items():
    diversity_dict[percent] = diversity(loaded_data)


In [None]:
# store results in an .npy file

dict_values = list(diversity_dict.values())
dict_array = np.array(dict_values)

file_path = f'/content/drive/MyDrive/Colab_Notebooks/diversity/diversity_values_matrices_G{j}.npy'

np.save(file_path, dict_array)

In [None]:
pearson_values = [1]
spearman_values = [1]
ktau_values = [1]

for i in range(10,110,10):
  pearson_value, spearman_value, ktau_value = calculate_correlations(diversity_dict[0], diversity_dict[i])
  pearson_values.append(pearson_value)
  spearman_values.append(spearman_value)
  ktau_values.append(ktau_value)


In [None]:
# Store correlation results to a json file for each G
import json

corr_dict = {}
corr_dict['pearson_values'] = pearson_values
corr_dict['spearman_values'] = spearman_values
corr_dict['ktau_values'] = ktau_values



# Define the file path for the new JSON file
file_path = f'/content/drive/MyDrive/Colab_Notebooks/diversity/diversity_correlations_matrices_G{j}.json'

# Serialize and save the dictionary to the new file
with open(file_path, 'w') as json_file:
    json.dump(corr_dict, json_file)

print(f"Results saved to {file_path}")


In [None]:
# Plot results
modification_rates = [f"{i}%" for i in range(0,110,10)]

plt.figure(figsize=(8, 6))

plt.plot(modification_rates, pearson_values, marker='o', label= "Pearson")
plt.plot(modification_rates, spearman_values, marker='o', label= "Spearman")
plt.plot(modification_rates, ktau_values, marker='o', label= "Kendall Tau")

plt.xlabel("Modification Percentage")
plt.ylabel("Correlation")
plt.title("Correlation of Diversity Index (Original vs Modified)")
plt.legend()
plt.grid(True)
plt.show()