In [1]:
import sys

sys.path.append("..")
import os
from intelligence_toolkit.detect_entity_networks.api import DetectEntityNetworks
from intelligence_toolkit.AI.openai_configuration import OpenAIConfiguration
import polars as pl

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Create the workflow object
den = DetectEntityNetworks()
# Set the AI configuration
ai_configuration = OpenAIConfiguration(
    {
        "api_type": "OpenAI",
        "api_key": os.environ["OPENAI_API_KEY"],
        "model": "gpt-4o",
    }
)
den.set_ai_configuration(ai_configuration)

data_path = "../example_outputs/detect_entity_networks/company_grievances/company_grievances_input.csv"
entity_df = pl.read_csv(data_path)

print("Loaded data")

Loaded data


In [3]:
# set entity-attributes
from intelligence_toolkit.detect_entity_networks.prepare_model import (
    format_data_columns,
)


entity_id_column = "name"
columns_to_link = ["address", "city", "email", "phone", "owner"]
entity_df = format_data_columns(entity_df, columns_to_link, entity_id_column)
den.add_attribute_links(entity_df, entity_id_column, columns_to_link)

summary = den.get_model_summary_value()
print("Summary")
print(summary)

Summary
Number of entities: 3602, Number of attributes: 18549, Number of flags: 0, Number of groups: 0, Number of links: 41727


In [4]:
# set flags
from intelligence_toolkit.detect_entity_networks.classes import FlagAggregatorType


entity_id_column = "name"
columns_to_link = [
    "safety_grievances",
    "pay_grievances",
    "conditions_grievances",
    "treatment_grievances",
    "workload_grievances",
]
flag_format = FlagAggregatorType.Count
den.add_flag_links(entity_df, entity_id_column, columns_to_link, flag_format)
summary = den.get_model_summary_value()
print("Summary")
print(summary)

Summary
Number of entities: 3602, Number of attributes: 18549, Number of flags: 8108, Number of groups: 0, Number of links: 41727


In [5]:
# set groups
entity_id_column = "name"
columns_to_link = ["sector", "country"]
den.add_group_links(entity_df, entity_id_column, columns_to_link)

summary = den.get_model_summary_value()
print("Summary")
print(summary)

Summary
Number of entities: 3602, Number of attributes: 18549, Number of flags: 8108, Number of groups: 634, Number of links: 41727


In [6]:
await den.index_nodes(["ENTITY"])
if len(den.embedded_texts) > 0:
    print(f"Number of nodes indexed: {len(den.embedded_texts)}")

100%|██████████| 600/600 [00:12<00:00, 46.22it/s] 
100%|██████████| 600/600 [00:14<00:00, 41.83it/s] 
100%|██████████| 600/600 [00:08<00:00, 73.08it/s] 
100%|██████████| 600/600 [00:29<00:00, 20.37it/s]
100%|██████████| 600/600 [00:30<00:00, 19.36it/s]
100%|██████████| 600/600 [00:26<00:00, 22.25it/s]
100%|██████████| 2/2 [00:00<00:00,  3.68it/s]


Got 0 existing texts
Got 3602 new texts
Number of nodes indexed: 3602


In [7]:
# infer nodes with similar names

threshold = 0.03
den.infer_nodes(threshold)

inferred_links_count = len(den.inferred_links)
if inferred_links_count > 0:
    print(f"Number of links inferred: {inferred_links_count}")
    inferred_df = den.inferred_nodes_df()
    print(inferred_df)
else:
    print("No inferred links")

Number of links inferred: 2242
shape: (3_547, 2)
┌───────────────────────┬────────────────────────┐
│ text                  ┆ similar                │
│ ---                   ┆ ---                    │
│ str                   ┆ str                    │
╞═══════════════════════╪════════════════════════╡
│ Adventure Gear        ┆ Adventure Gear Co      │
│ Adventure Gear        ┆ Adventure Gear Company │
│ Adventure Gear        ┆ AdventureGear          │
│ Adventure Gear Co     ┆ Adventure Gear Company │
│ Adventure Gear Co     ┆ AdventureGear Co       │
│ …                     ┆ …                      │
│ WindPower Corp        ┆ WindPower Inc          │
│ WindPower Solutions   ┆ WindPower Solutons     │
│ Windy Heights Limited ┆ Windy Heights Ltd      │
│ WindyCity Energy      ┆ WindyCity Power        │
│ Zephyr Energy Co      ┆ Zephyr Energy Inc      │
└───────────────────────┴────────────────────────┘


In [9]:
den.identify()

[('TechnoCraft', 0, 0, 9, 16, 5, 1.78, 1.25),
 ('TechnoCraft Solutions', 2, 0, 9, 16, 5, 1.78, 1.25),
 ('TechnoCraft Industries', 3, 0, 9, 16, 5, 1.78, 1.25),
 ('Innovative Machines', 0, 0, 9, 16, 5, 1.78, 1.25),
 ('TechnoCraft Innovations', 7, 0, 9, 16, 5, 1.78, 1.25),
 ('CraftTech Solutions', 1, 0, 9, 16, 5, 1.78, 1.25),
 ('TechnoCraft Industires', 3, 0, 9, 16, 5, 1.78, 1.25),
 ('Forge Industries', 0, 0, 9, 16, 5, 1.78, 1.25),
 ('FarmFresh Foods', 0, 1, 27, 42, 5, 1.56, 0.23),
 ('Fresh Farms Co', 0, 1, 27, 42, 5, 1.56, 0.23),
 ('FarmFresh', 0, 1, 27, 42, 5, 1.56, 0.23),
 ('Farm Fresh Co', 0, 1, 27, 42, 5, 1.56, 0.23),
 ('FarmFresh Produce', 0, 1, 27, 42, 5, 1.56, 0.23),
 ('Build It Green', 0, 1, 27, 42, 5, 1.56, 0.23),
 ('FreshFarm Organics', 0, 1, 27, 42, 5, 1.56, 0.23),
 ('FreshFarm Produce', 0, 1, 27, 42, 5, 1.56, 0.23),
 ('Farm Fresh Produce', 0, 1, 27, 42, 5, 1.56, 0.23),
 ('FarmFresh Inc', 0, 1, 27, 42, 5, 1.56, 0.23),
 ('Agriculture Pioneers', 7, 1, 27, 42, 5, 1.56, 0.23),
 ('

In [10]:
print(den.get_records_summary())
print(f"Attributes removed because of high degree: {len(den.trimmed_attributes)}")
print(den.trimmed_attributes)

Networks identified: 426 (426 with multiple entities, maximum 19)
Attributes removed because of high degree: 443
shape: (443, 2)
┌─────────────────────────┬─────────────────┐
│ Attribute               ┆ Linked Entities │
│ ---                     ┆ ---             │
│ str                     ┆ i64             │
╞═════════════════════════╪═════════════════╡
│ owner==Chris Care       ┆ 11              │
│ phone==2234567890       ┆ 11              │
│ address==505 Journey Rd ┆ 11              │
│ owner==Eli Green        ┆ 11              │
│ phone==9876543220       ┆ 11              │
│ …                       ┆ …               │
│ city==Eco City          ┆ 189             │
│ city==Construct City    ┆ 189             │
│ city==Techville         ┆ 223             │
│ phone==5566778899       ┆ 225             │
│ phone==1122334455       ┆ 335             │
└─────────────────────────┴─────────────────┘


In [11]:
# with entities
den.get_entity_df().head()

entity_id,entity_flags,network_id,network_entities,network_flags,flagged,flags/entity,flagged/unflagged
str,i64,i64,i64,i64,i64,f64,f64
"""Skyline Engine…",18,226,6,134,5,22.33,5.0
"""Skyline Engine…",18,226,6,134,5,22.33,5.0
"""Skyline Soluti…",19,226,6,134,5,22.33,5.0
"""Skyline Innova…",16,226,6,134,5,22.33,5.0
"""MediaWave Ente…",10,173,11,93,9,8.45,4.5


In [12]:
# with entities and groups
group_df = den.get_grouped_df()
group_df.head()

entity_id,entity_flags,network_id,network_entities,network_flags,flagged,flags/entity,flagged/unflagged,sector,country
str,i64,i64,i64,i64,i64,f64,f64,str,str
"""Skyline Engine…",18,226,6,134,5,22.33,5.0,"""Engineering""","""Buildland"""
"""Skyline Engine…",18,226,6,134,5,22.33,5.0,"""Engineering""","""BuildLand"""
"""Skyline Soluti…",19,226,6,134,5,22.33,5.0,"""Engineering""","""Buildland"""
"""Skyline Innova…",16,226,6,134,5,22.33,5.0,"""Engineering""","""Buildland"""
"""MediaWave Ente…",10,173,11,93,9,8.45,4.5,"""Media""","""Waveland"""


In [13]:
group_df.filter(pl.col("entity_id") == "Earths Bounty").head()

entity_id,entity_flags,network_id,network_entities,network_flags,flagged,flags/entity,flagged/unflagged,sector,country
str,i64,i64,i64,i64,i64,f64,f64,str,str
"""Earths Bounty""",5,94,17,76,9,4.47,1.12,"""Agriculture""","""EcoLand"""


In [15]:
den.get_exposure_report("Earths Bounty", 94)

'##### Flag Exposure Paths\n\nThe selected entity **Earths Bounty** has **5** direct flags and is linked to **71** indirect flags via **6** paths from **8** related entities:\n\n**Path 1**\n\n```\nENTITY==EcoHarvest [linked to 6 flags]\nENTITY==GreenLeaf Organics [linked to 38 flags]\nENTITY==GreenLeaf Organics Ltd [linked to 4 flags]\nENTITY==GreenLeaf Organix [linked to 3 flags]\nENTITY==GreenLeaf Ventures [linked to 8 flags]\n--->\n  city==Greenville [linked to 9 entities]\n  --->\n    ENTITY==Earths Bounty [linked to 5 flags]\n```\n\n**Path 2**\n\n```\nENTITY==EcoHarvest [linked to 6 flags]\nENTITY==GreenLeaf Organics International [linked to 4 flags]\nENTITY==GreenLeaf Organics Ltd [linked to 4 flags]\nENTITY==GreenLeaf Ventures [linked to 8 flags]\n--->\n  owner==Jamie Earth [linked to 5 entities]\n  --->\n    ENTITY==Earths Bounty [linked to 5 flags]\n```\n\n**Path 3**\n\n```\nENTITY==EcoHarvest Co [linked to 1 flags]\nENTITY==EcoHarvest Farms [linked to 7 flags]\n--->\n  ENTITY

In [17]:
# report
selected_network = 94
selected_entity = "Earths Bounty"
report = den.generate_report(selected_network=selected_network)
print(report)

##### Evaluation of Entity Network 94

In this network, we are examining the relationships and potential flag exposure among various entities connected through shared attributes such as city, owner, address, phone, and email. The network includes several entities, some of which are directly flagged, while others are indirectly linked to flags through their connections.

### Entity Connections and Flag Exposure

1. **Leafy Greens Co** is connected to the city of Greenville, and its owner is listed as Leafy Green. It shares an address at 34 Greenway. This entity does not have direct flags but is connected to other entities through shared attributes.

2. **GreenGrow** is linked to the city of Greenville and has a phone number and owner named Pat Green. It does not have direct flags but is part of the network through these connections.

3. **Earths Bounty** is directly flagged with 5 flags. It is connected to the city of Greenville and has an owner named Jordan Earth. This entity is signif