# Graph Construction: Network X

In [33]:
import pickle
import os
import glob
import numpy as np
import pandas as pd
import networkx as nx
from datetime import datetime
from tqdm import tqdm
from scipy.sparse import csr_matrix
from scipy.sparse import lil_matrix

In [20]:
with open('../fcastrillon/Data/user_indices','rb') as file:
    user_indices = pickle.load(file)

with open('../fcastrillon/Data/user_to_party.pkl','rb') as file:
    user_to_party = pickle.load(file)

37308

In [21]:
# from user_indices we transform the keys as values and the values as keys
user_indices_2 = {value: key for key, value in user_indices.items()}

Here we create the ```construct_graph``` that will allow us to create the required graph

In [37]:
def construct_graph(adj, date_value):
    """
    Description of your function.

    Args:
        adj (type): The adjacency matrix used to construct the graph. This matrices are saved as _.npz_ files in the _Data/Matrices_ Folder
        date_value (type): Date in datetime format This will be the atributte that will characterices each graph belonging to a specific Date

    Returns:
        type: Graph

    """
    # Make sure the diagonal is zeros.
    csr = csr_matrix(adj)
    lil_matrix = csr.tolil()
    graph = nx.Graph()
    num_nodes = adj.shape[0]
    graph.add_nodes_from(range(num_nodes))

    # Add edges to the graph
    for i in range(num_nodes):
        for j in lil_matrix.rows[i]:
            graph.add_edge(i, j, weight = lil_matrix[i, j])

    # Assign 'affiliation' attribute using list comprehension
    aff_list = [user_to_party.get(user_indices_2.get(node)) for node in graph.nodes]
    attributes = dict(zip(graph.nodes, aff_list))
    nx.set_node_attributes(graph, attributes, 'affiliation')

    # Date attribute
    date_attribute = {node: date_value for node in graph.nodes}
    nx.set_node_attributes(graph, date_attribute, 'date')
    
    # Date attribute
    date_attribute_edges = {(u,v): date_value for u,v in graph.edges}
    nx.set_edge_attributes(graph, date_attribute_edges, 'date')

    # re Labels
    new_labels = {node: user_indices_2.get(node) for node in graph.nodes}
    nx.relabel_nodes(graph, new_labels, copy = False)

    return graph

In [35]:
# Dates of the Paro Nacional
v1_start = '2021-04-28 00:00:00'
v1_end = '2021-06-27 00:00:00'
date1 = pd.date_range(start = v1_start, end = v1_end, freq = 'D')

v2_start = '2021-04-30 23:59:59'
v2_end = '2021-06-29 23:59:59'
date2 = pd.date_range(start = v2_start, end = v2_end, freq = 'D')


In [None]:
k = 0
files = glob.glob('../fcastrillon/Data/Matrices/*.npz')
for file in tqdm(files):
    
    # First, we load the stored info into a normal CSR matrix.
    data = np.load(file)
    indices = data['indices']
    indptr = data['indptr']
    shape = data['shape']
    data = data['data']
    A = csr_matrix((data, indices, indptr), shape = shape)
    
    # Now we can create the graph using 'graph_tool'.
    date = date1[k]
    date = datetime.strftime(date, '%Y-%m-%d')
    graph = construct_graph(A, date)
         
    # Finally we save the graph in .graphml format.
    filename = f'graph_{k}.graphml'
    output_filepath = '../fcastrillon/Data/Graphs/' + filename
    nx.write_graphml(graph, output_filepath)
    print(f"File '{filename}' successfully created and stored.")
    k += 1

In [39]:
data = np.load(file)
indices = data['indices']
indptr = data['indptr']
shape = data['shape']
data = data['data']
A = csr_matrix((data, indices, indptr), shape = shape)
    
    # Now we can create the graph using 'graph_tool'.
date = date1[k]
date = datetime.strftime(date, '%Y-%m-%d')
graph = construct_graph(A, date)
         
    # Finally we save the graph in .graphml format.
filename = f'graph_{k}.graphml'
output_filepath = '../fcastrillon/Data/Graphs/' + filename
nx.write_graphml(graph, output_filepath)
print(f"File '{filename}' successfully created and stored.")
 

61