In [1]:
import sys

sys.path.append("..")
import os
from intelligence_toolkit.detect_entity_networks.api import DetectEntityNetworks
from intelligence_toolkit.AI.openai_configuration import OpenAIConfiguration
import polars as pl

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Create the workflow object
den = DetectEntityNetworks()
# Set the AI configuration
ai_configuration = OpenAIConfiguration(
    {
        "api_type": "OpenAI",
        "api_key": os.environ["OPENAI_API_KEY"],
        "model": "gpt-4o",
    }
)
den.set_ai_configuration(ai_configuration)

data_path = "../example_outputs/detect_entity_networks/company_grievances/company_grievances_input.csv"
entity_df = pl.read_csv(data_path)

print("Loaded data")

Loaded data


In [3]:
# set entity-attributes
from intelligence_toolkit.detect_entity_networks.prepare_model import (
    format_data_columns,
)


entity_id_column = "name"
columns_to_link = ["address", "city", "email", "phone", "owner"]
entity_df = format_data_columns(entity_df, columns_to_link, entity_id_column)
den.add_attribute_links(entity_df, entity_id_column, columns_to_link)

summary = den.get_model_summary_value()
print("Summary")
print(summary)

Summary
Number of entities: 3602, Number of attributes: 18549, Number of flags: 0, Number of groups: 0, Number of links: 41727


In [4]:
# set flags
from intelligence_toolkit.detect_entity_networks.classes import FlagAggregatorType


entity_id_column = "name"
columns_to_link = [
    "safety_grievances",
    "pay_grievances",
    "conditions_grievances",
    "treatment_grievances",
    "workload_grievances",
]
flag_format = FlagAggregatorType.Count
den.add_flag_links(entity_df, entity_id_column, columns_to_link, flag_format)
summary = den.get_model_summary_value()
print("Summary")
print(summary)

Summary
Number of entities: 3602, Number of attributes: 18549, Number of flags: 8108, Number of groups: 0, Number of links: 41727


In [5]:
# set groups
entity_id_column = "name"
columns_to_link = ["sector", "country"]
den.add_group_links(entity_df, entity_id_column, columns_to_link)

summary = den.get_model_summary_value()
print("Summary")
print(summary)

Summary
Number of entities: 3602, Number of attributes: 18549, Number of flags: 8108, Number of groups: 634, Number of links: 41727


In [6]:
await den.index_nodes(["ENTITY"])
if len(den.embedded_texts) > 0:
    print(f"Number of nodes indexed: {len(den.embedded_texts)}")

100%|██████████| 500/500 [00:13<00:00, 37.42it/s] 
100%|██████████| 500/500 [00:03<00:00, 156.69it/s]
100%|██████████| 500/500 [00:07<00:00, 66.89it/s] 
100%|██████████| 500/500 [00:02<00:00, 173.54it/s]
100%|██████████| 500/500 [00:03<00:00, 148.57it/s]
100%|██████████| 500/500 [00:07<00:00, 66.52it/s] 
100%|██████████| 500/500 [00:03<00:00, 151.56it/s]
100%|██████████| 102/102 [00:00<00:00, 127.61it/s]


Got 0 existing texts
Got 3602 new texts
Number of nodes indexed: 3602


In [7]:
# infer nodes with similar names

threshold = 0.03
den.infer_nodes(threshold)

inferred_links_count = len(den.inferred_links)
if inferred_links_count > 0:
    print(f"Number of links inferred: {inferred_links_count}")
    inferred_df = den.inferred_nodes_df()
    print(inferred_df)
else:
    print("No inferred links")

Number of links inferred: 506
shape: (289, 2)
┌──────────────────────────┬──────────────────────────┐
│ text                     ┆ similar                  │
│ ---                      ┆ ---                      │
│ str                      ┆ str                      │
╞══════════════════════════╪══════════════════════════╡
│ Adventure Gear Co        ┆ Adventure Gear Company   │
│ Adventure Gear Co        ┆ AdventureGear Co         │
│ Adventure Seekers        ┆ AdventureSeekers         │
│ Aero Dynamics            ┆ Aero Dynamics Inc        │
│ Aero Dynamics Inc        ┆ AeroDynamics Inc         │
│ …                        ┆ …                        │
│ Wellness World           ┆ WellnessWorld            │
│ WellnessFirst            ┆ WellnessFirst Health     │
│ WellnessFirst HealthCare ┆ WellnessFirst Healthcare │
│ Wind Power Inc           ┆ WindPower Inc            │
│ Wind Power Solutions     ┆ WindPower Solutions      │
└──────────────────────────┴──────────────────────────┘


In [8]:
den.identify()

[('BuildSmart Developments', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('BuildSmart Corp', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('GreenBuild Innovations', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('GreenBuild Corp', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('GreenBuild Developments', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('BuildSmart Innovations Ltd', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('BuildSmart Construction Co', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('BuildSmart Innov8tions Ltd', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('BuildSmart Corporation', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('GreenBuild Enterprises', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('GreenBuild Ventures', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('BuildSmart Group', 0, 0, 12, 0, 0, 0.0, 0.0),
 ('Build It Smart', 0, 1, 13, 23, 2, 1.77, 0.18),
 ('AgriSolutions', 0, 1, 13, 23, 2, 1.77, 0.18),
 ('Wellbeing Inc', 0, 1, 13, 23, 2, 1.77, 0.18),
 ('AquaEnergy Corp', 11, 1, 13, 23, 2, 1.77, 0.18),
 ('Wellbeing Incorporated', 0, 1, 13, 23, 2, 1.77, 0.18),
 ('BuildRight Corp', 0, 1, 13, 23, 2, 1.77, 0.18),
 ('BuildFuture Enterprises', 0, 1, 13, 2

In [9]:
print(den.get_records_summary())
print(f"Attributes removed because of high degree: {len(den.trimmed_attributes)}")
print(den.trimmed_attributes)

Networks identified: 403 (403 with multiple entities, maximum 19)
Attributes removed because of high degree: 443
shape: (443, 2)
┌────────────────────────┬─────────────────┐
│ Attribute              ┆ Linked Entities │
│ ---                    ┆ ---             │
│ str                    ┆ i64             │
╞════════════════════════╪═════════════════╡
│ owner==Build Group     ┆ 11              │
│ owner==Chic Trend      ┆ 11              │
│ owner==Jordan Explorer ┆ 11              │
│ city==Logistics City   ┆ 11              │
│ city==Port City        ┆ 11              │
│ …                      ┆ …               │
│ city==Construct City   ┆ 189             │
│ city==Eco City         ┆ 189             │
│ city==Techville        ┆ 223             │
│ phone==5566778899      ┆ 225             │
│ phone==1122334455      ┆ 335             │
└────────────────────────┴─────────────────┘


In [10]:
# with entities
den.get_entity_df().head()

entity_id,entity_flags,network_id,network_entities,network_flags,flagged,flags/entity,flagged/unflagged
str,i64,i64,i64,i64,i64,f64,f64
"""GreenGrow""",0,88,6,25,5,4.17,5.0
"""GreenLeaf Vent…",8,88,6,25,5,4.17,5.0
"""GreenLeaf Orga…",4,88,6,25,5,4.17,5.0
"""GreenLeaf Orga…",4,88,6,25,5,4.17,5.0
"""EcoHarvest""",6,88,6,25,5,4.17,5.0


In [11]:
# with entities and groups
group_df = den.get_grouped_df()
group_df.head()

entity_id,entity_flags,network_id,network_entities,network_flags,flagged,flags/entity,flagged/unflagged,sector,country
str,i64,i64,i64,i64,i64,f64,f64,str,str
"""GreenGrow""",0,88,6,25,5,4.17,5.0,"""Agriculture""","""Agroland"""
"""GreenLeaf Vent…",8,88,6,25,5,4.17,5.0,"""Agriculture""","""Farmville"""
"""GreenLeaf Orga…",4,88,6,25,5,4.17,5.0,"""Agriculture""","""EcoLand"""
"""GreenLeaf Orga…",4,88,6,25,5,4.17,5.0,"""Agriculture""","""EcoLand"""
"""EcoHarvest""",6,88,6,25,5,4.17,5.0,"""Agriculture""","""EcoLand"""


In [12]:
earths = group_df.filter(pl.col("entity_id") == "Earths Bounty").head()
print(earths)

shape: (1, 10)
┌────────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬─────────┐
│ entity_id  ┆ entity_fl ┆ network_i ┆ network_e ┆ … ┆ flags/ent ┆ flagged/u ┆ sector    ┆ country │
│ ---        ┆ ags       ┆ d         ┆ ntities   ┆   ┆ ity       ┆ nflagged  ┆ ---       ┆ ---     │
│ str        ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ ---       ┆ str       ┆ str     │
│            ┆ i64       ┆ i64       ┆ i64       ┆   ┆ f64       ┆ f64       ┆           ┆         │
╞════════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═════════╡
│ Earths     ┆ 5         ┆ 89        ┆ 9         ┆ … ┆ 9.78      ┆ 3.5       ┆ Agricultu ┆ EcoLand │
│ Bounty     ┆           ┆           ┆           ┆   ┆           ┆           ┆ re        ┆         │
└────────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴─────────┘


In [13]:
selected_network = earths[0].get_column("network_id")[0]
print("Selected Network: ", selected_network)

Selected Network:  89


In [14]:
den.get_exposure_report("Earths Bounty", selected_network)

'##### Flag Exposure Paths\n\nThe selected entity **Earths Bounty** has **5** direct flags and is linked to **83** indirect flags via **4** paths from **6** related entities:\n\n**Path 1**\n\n```\nENTITY==GreenLeaf Farm Co [linked to 6 flags]\nENTITY==GreenLeaf Farms [linked to 16 flags]\nENTITY==GreenLeaf Organics [linked to 38 flags]\nENTITY==GreenLeaf Produce [linked to 8 flags]\n--->\n  owner==Jordan Earth [linked to 5 entities]\n  --->\n    ENTITY==Earths Bounty [linked to 5 flags]\n```\n\n**Path 2**\n\n```\nENTITY==GreenLeaf Agriculture [linked to 10 flags]\nENTITY==GreenLeaf Gardens [linked to 5 flags]\n--->\n  email==info@greenleafcom [linked to 3 entities]\n  --->\n    ENTITY==GreenLeaf Organics [linked to 38 flags]\n    --->\n      owner==Jordan Earth [linked to 5 entities]\n      --->\n        ENTITY==Earths Bounty [linked to 5 flags]\n```\n\n**Path 3**\n\n```\nENTITY==GreenLeaf Agriculture [linked to 10 flags]\n--->\n  email==contact@greenleafcom [linked to 2 entities]\n  -

In [15]:
# report
selected_entity = "Earths Bounty"
report = den.generate_report(selected_network=selected_network)
print(report)

##### Evaluation of Earths Bounty in Network 89

In this analysis, we focus on the entity "Earths Bounty" within Network 89. This entity has a direct flag count of 5 and is indirectly linked to an additional 83 flags through various connections with other entities in the network. The connections and their implications are detailed below.

**Entity Connections and Flag Exposure:**

1. **Path 1:**
   - "Earths Bounty" is connected to "GreenLeaf Farm Co," "GreenLeaf Farms," "GreenLeaf Organics," and "GreenLeaf Produce" through the owner "Jordan Earth."
   - "GreenLeaf Farm Co" has 6 flags, "GreenLeaf Farms" has 16 flags, "GreenLeaf Organics" has 38 flags, and "GreenLeaf Produce" has 8 flags.
   - The owner, Jordan Earth, links these entities, which collectively contribute to the indirect flag exposure of "Earths Bounty."

2. **Path 2:**
   - This path involves "GreenLeaf Agriculture" and "GreenLeaf Gardens," which are connected via the email "info@greenleafcom."
   - "GreenLeaf Agricultur