Steps:


1. Create data model
    - attributes
    - flags
    - groups

**2. Process data model**  
    - index nodes <br>
    - infer nodes <br>
    - **detect networks**
    
3. Explore networks
4. Generate AI reports

# 2. Process data model

In [None]:
import sys
sys.path.append("../..")

## Detect networks

In [None]:
from toolkit.risk_networks.prepare_model import build_model_with_attributes
import polars as pl
from toolkit.risk_networks.config import ENTITY_LABEL

# Prepare
input_dataframe = pl.read_csv("./input/rn_test.csv")

entity_id_column = "Country ISO"
columns_to_link = ["Date", "Event Description","Location Where Sexual Violence Was Committed", "Reported Perpetrator"]
main_graph = build_model_with_attributes(input_dataframe, entity_id_column, columns_to_link)


attributes_list = [
    node
    for node in main_graph.nodes()
    if not node.startswith(ENTITY_LABEL)
]

# should any attribute be removed?
original_df = pl.DataFrame(
    attributes_list, schema=["Attribute"]
)
original_df.head()

additional_trimmed_attributes=[] #attributes to remove



In [None]:
max_attribute_degree = 10
max_network_size=20
supporting_attribute_types=[]
inferred_links = set()
integrated_flags = pl.DataFrame()

In [None]:
from toolkit.risk_networks.identify_networks import build_entity_records, build_networks, trim_nodeset


(trimmed_degrees, trimmed_nodes) = trim_nodeset(
    main_graph,
    additional_trimmed_attributes,
    max_attribute_degree,
)

(
    community_nodes,
    entity_to_community,
) = build_networks(
    main_graph,
    trimmed_nodes,
    inferred_links,
    supporting_attribute_types,
    max_network_size,
)

entity_records = build_entity_records(
    community_nodes,
    integrated_flags,
)

### Attributes removed because of high degree

In [None]:
print("Total: ", len(trimmed_degrees))
pl.DataFrame(
    list(trimmed_degrees),
    schema=["Attribute", "Linked Entities"],
).sort("Linked Entities", descending=True).head()

In [None]:
comm_count = len(community_nodes)

if comm_count > 0:
    comm_sizes = [
        len(comm)
        for comm in community_nodes
        if len(comm) > 1
    ]
    max_comm_size = max(comm_sizes)
    trimmed_atts = len(trimmed_degrees)
    print(
        f"Networks identified: {comm_count} ({len(comm_sizes)} with multiple entities, maximum {max_comm_size})"
    )