In [None]:
# Prepare data
import sys

sys.path.append("../")
import pandas as pd

from toolkit.risk_networks import config
from toolkit.risk_networks.config import AttributeColumnType, LinkType
from toolkit.risk_networks.graph_functions import build_undirected_graph
from toolkit.risk_networks.model import prepare_entity_attribute
from toolkit.risk_networks.text_format import format_data_columns

_df = pd.read_csv("./input/rn_test.csv")

link_type = LinkType.EntityAttribute
entity_id_column = "Subject ID"
attribute_type = AttributeColumnType.ColumnName
columns_to_link = ["Event Description"]

data_df = format_data_columns(_df, columns_to_link, entity_id_column)
attribute_links, node_types = prepare_entity_attribute(
    data_df, entity_id_column, attribute_type, columns_to_link
)
overall_graph = build_undirected_graph(network_attribute_links=attribute_links)

In [None]:
all_nodes = overall_graph.nodes()

entity_nodes = [node for node in all_nodes if node.startswith(config.entity_label)]

attributes_list = [
    node for node in all_nodes if not node.startswith(config.entity_label)
]

num_entities = len(entity_nodes)
num_attributes = len(all_nodes) - num_entities
num_edges = len(overall_graph.edges())

original_df = pd.DataFrame(attributes_list, columns=["Attribute"])

In [None]:
if num_entities > 0:
    print("##### Data model summary")
    print(f"Number of entities*: {num_entities}")
    print(f"Number of attributes*: {num_attributes}")
    print(f"Number of links*: {num_edges}")
else:
    print("No entities.")


# Attributes:
print("##### Attributes")
pd.DataFrame(original_df)