Steps:


**1. Create data model**  
    - **attributes**  
    - **flags**  
    - **groups** 

2. Process data model
3. Explore networks
4. Generate AI reports

# 1. Create data model

In [None]:
import sys
sys.path.append("../..")

## Add Entity-Attributes

In [None]:
import polars as pl

input_dataframe = pl.read_csv("./input/rn_test.csv")

entity_id_column = "Country ISO"
columns_to_link = ["Date", "Event Description","Location Where Sexual Violence Was Committed", "Reported Perpetrator"]

In [None]:
from toolkit.risk_networks import config
from toolkit.risk_networks.prepare_model import build_model_with_attributes
from toolkit.risk_networks.config import ENTITY_LABEL

graph = build_model_with_attributes(input_dataframe, entity_id_column, columns_to_link)
all_nodes = graph.nodes()
entity_nodes = [
    node for node in all_nodes if node.startswith(ENTITY_LABEL)
]

num_entities = len(entity_nodes)
num_attributes = len(all_nodes) - num_entities
num_edges = len(graph.edges())

print(
    f"*Number of entities*: {num_entities}\n*Number of attributes*: {num_attributes}\n*Number of links*: {num_edges}"
)


## Add flags (Optional)

In [None]:
from toolkit.risk_networks.config import FlagAggregatorType
from toolkit.risk_networks.prepare_model import get_flags


entity_col = "Country ISO"
flag_agg = FlagAggregatorType.Instance.value
value_cols = ["FLAGS"]

flags_dataframe = pl.read_csv("./input/rn_test_flags.csv")

(integrated_flags, max_entity_flags, mean_flagged_flags) = get_flags(flags_dataframe, entity_col, flag_agg, value_cols)

num_flags = len(integrated_flags)
print(f"*Number of flags*: {num_flags}")



## Add groups (Optional)

In [None]:
from toolkit.risk_networks.prepare_model import build_groups
from toolkit.helpers.constants import ATTRIBUTE_VALUE_SEPARATOR

entity_col = "Country ISO"
flag_agg = FlagAggregatorType.Instance.value
value_cols = ["GROUP"]

groups_dataframe = pl.read_csv("./input/rn_test_groups.csv")

group_links = build_groups(
                            value_cols,
                            groups_dataframe,
                            entity_col,
                        )

groups = set()
for link_list in group_links:
    for link in link_list:
        groups.add(f"{link[1]}{ATTRIBUTE_VALUE_SEPARATOR}{link[2]}")

num_groups = len(groups)
print(f"*Number of groups*: {num_groups}")


### Summary

In [None]:
print(f"*Number of entities*: {num_entities}\n*Number of attributes*: {num_attributes}\n*Number of links*: {num_edges}\n*Number of flags*: {num_flags}\n*Number of groups*: {num_groups}")
