Steps:


1. Create data model
    - attributes
    - flags
    - groups

**2. Process data model**  
    - **index nodes**  
    - **infer nodes**  
    - detect networks
    
3. Explore networks
4. Generate AI reports

# 2. Process data model

In [None]:
import sys
sys.path.append("../")

## Index nodes (Optional)

In [None]:
from toolkit.risk_networks.main import build_model_with_attributes
from toolkit.risk_networks.index_and_infer import index_nodes
from toolkit.AI.openai_configuration import OpenAIConfiguration
import os
import polars as pl

input_dataframe = pl.read_csv("./input/rn_test.csv")

entity_id_column = "Country ISO"
columns_to_link = ["Date", "Event Description","Location Where Sexual Violence Was Committed", "Reported Perpetrator"]
main_graph = build_model_with_attributes(input_dataframe, entity_id_column, columns_to_link)


openai_config = OpenAIConfiguration({
    # "api_type": "OpenAI", #Azure OpenAI or OpenAI, (default: Azure OpenAI)
    # "api_key": os.environ.get("OPENAI_API_KEY"),
    "api_base": os.environ.get("AZURE_OPENAI_ENDPOINT"), # if Azure OpenAI
    "model": os.environ.get("OPENAI_API_MODEL", "gpt-4o")
})


columns_to_index = ["Reported Perpetrator", "Event Description"]
(embedded_texts, nearest_text_distances, nearest_text_indices) = index_nodes(
    columns_to_index,
    main_graph,
    openai_configuration=openai_config
)

print(f"*Number of nodes indexed*: {len(embedded_texts)}")


In [None]:
from toolkit.risk_networks.index_and_infer import infer_nodes, create_inferred_links, build_inferred_df

similarity_threshold = 0.05

inferred_links = infer_nodes(
                    similarity_threshold,
                    embedded_texts,
                    nearest_text_indices,
                    nearest_text_distances,
                )

inferred_links_list = create_inferred_links(inferred_links)

print(f"*Number of links inferred*: {len(inferred_links_list)}")

df_list = build_inferred_df(inferred_links_list)
print(df_list.head())