Steps:


1. Create data model
    - attributes
    - flags
    - groups

**2. Process data model**  
    - **index nodes**  
    - **infer nodes**  
    - detect networks
    
3. Explore networks
4. Generate AI reports

# 2. Process data model

In [None]:
#%pip install ucimlrepo
import sys
from ucimlrepo import fetch_ucirepo
import polars as pl


sys.path.append("../..")
computer_hardware  = fetch_ucirepo(id=29)


input_dataframe = pl.DataFrame(computer_hardware.data.original)
pl.Config.set_fmt_str_lengths(200)
input_dataframe.head()

entity_id_column = "VendorName"
columns_to_link = ["ModelName", "MYCT", "MMIN", "MMAX", "CACH", "CHMIN", "CHMAX", "PRP", "ERP"]

In [None]:
from toolkit.risk_networks.prepare_model import build_model_with_attributes

main_graph = build_model_with_attributes(input_dataframe, entity_id_column, columns_to_link)

## Index nodes (Optional)

In [None]:

import os
from toolkit.AI.openai_configuration import OpenAIConfiguration
from toolkit.risk_networks.index_and_infer import index_and_infer, build_inferred_df


ai_configuration = OpenAIConfiguration({
    "api_type": "OpenAI",
    "api_key": os.getenv("OPENAI_API_KEY"),
    "model": "gpt-4o-2024-08-06",
})


columns_to_index = ["ModelName"]

similarity_threshold = 0.05

inferred_links_list, embedded_texts_count = index_and_infer(
                    columns_to_index,
                    main_graph,
                    similarity_threshold,
                    openai_configuration=ai_configuration
                )


In [None]:

print(f"*Number of nodes indexed*: {embedded_texts_count}")
print(f"*Number of links inferred*: {len(inferred_links_list)}")

df_list = build_inferred_df(inferred_links_list)
print(df_list.head())