In [None]:
# Prepare data
import sys

sys.path.append("../")
import pandas as pd

from python.risk_networks import config
from python.risk_networks.config import AttributeColumnType, LinkType
from python.risk_networks.graph_functions import build_undirected_graph
from python.risk_networks.model import prepare_entity_attribute
from python.risk_networks.text_format import format_data_columns

df = pd.read_csv("./input/rn_test.csv")

link_type = LinkType.EntityAttribute
entity_id_column = "ID"
attribute_type = AttributeColumnType.ColumnName
attribute_name = ""
columns_to_link = ["Event Description", "Country"]

data_df = format_data_columns(df, columns_to_link, entity_id_column)
attribute_links, node_types = prepare_entity_attribute(
    data_df, entity_id_column, attribute_type, columns_to_link
)
overall_graph = build_undirected_graph(network_attribute_links=attribute_links)

In [None]:
all_nodes = overall_graph.nodes()

entity_nodes = [node for node in all_nodes if node.startswith(config.entity_label)]

attributes_list = [
    node for node in all_nodes if not node.startswith(config.entity_label)
]

num_entities = len(entity_nodes)
num_attributes = len(all_nodes) - num_entities
num_edges = len(overall_graph.edges())

original_df = pd.DataFrame(attributes_list, columns=["Attribute"])

attributes_renamed = []
unique_names = original_df["Attribute"].unique()
for i, name in enumerate(unique_names, start=1):
    name_format = name.split("==")[0].strip()
    attributes_renamed.append((
        name,
        f"{name_format}=={name_format}_{i!s}",
    ))

In [None]:
if num_entities > 0:
    print("##### Data model summary")
    print(f"Number of entities*: {num_entities}")
    print(f"Number of attributes*: {num_attributes}")
    print(f"Number of links*: {num_edges}")
else:
    print("No entities.")


# Attributes:
print("##### Attributes")
pd.DataFrame(original_df)