In [1]:
import sys

sys.path.append("..")
import os
from toolkit.detect_entity_networks.api import DetectEntityNetworks
from toolkit.AI.openai_configuration import OpenAIConfiguration
import polars as pl

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Create the workflow object
den = DetectEntityNetworks()
# Set the AI configuration
ai_configuration = OpenAIConfiguration(
    {
        "api_type": "OpenAI",
        "api_key": os.environ["OPENAI_API_KEY"],
        "model": "gpt-4o",
    }
)
den.set_ai_configuration(ai_configuration)

data_path = "../example_outputs/detect_entity_networks/company_grievances/company_grievances_input.csv"
entity_df = pl.read_csv(data_path)

print("Loaded data")

Loaded data


In [3]:
# set entity-attributes
from toolkit.detect_entity_networks.prepare_model import format_data_columns


entity_id_column = "name"
columns_to_link = ["address", "city", "email", "phone", "owner"]
entity_df = format_data_columns(entity_df, columns_to_link, entity_id_column)
den.add_attribute_links(entity_df, entity_id_column, columns_to_link)

summary = den.get_model_summary_value()
print("Summary")
print(summary)

Summary
Number of entities: 3602, Number of attributes: 18549, Number of flags: 0, Number of groups: 0, Number of links: 41727


In [4]:
# set flags
from toolkit.detect_entity_networks.classes import FlagAggregatorType


entity_id_column = "name"
columns_to_link = [
    "safety_grievances",
    "pay_grievances",
    "conditions_grievances",
    "treatment_grievances",
    "workload_grievances",
]
flag_format = FlagAggregatorType.Count
den.add_flag_links(entity_df, entity_id_column, columns_to_link, flag_format)
summary = den.get_model_summary_value()
print("Summary")
print(summary)

flags shape: (18_010, 4)
┌─────────────────────┬─────────────────────┬─────────────────────┬───────┐
│ entity              ┆ type                ┆ flag                ┆ count │
│ ---                 ┆ ---                 ┆ ---                 ┆ ---   │
│ str                 ┆ str                 ┆ str                 ┆ i64   │
╞═════════════════════╪═════════════════════╪═════════════════════╪═══════╡
│ VentureTech Labs    ┆ safety_grievances   ┆ safety_grievances   ┆ 0     │
│ HydroFlow Systems   ┆ safety_grievances   ┆ safety_grievances   ┆ 2     │
│ StreamFlow Services ┆ safety_grievances   ┆ safety_grievances   ┆ 3     │
│ Green Earth Farms   ┆ safety_grievances   ┆ safety_grievances   ┆ 0     │
│ AgriPros            ┆ safety_grievances   ┆ safety_grievances   ┆ 2     │
│ …                   ┆ …                   ┆ …                   ┆ …     │
│ GreenTech Venturez  ┆ workload_grievances ┆ workload_grievances ┆ 0     │
│ HomeComfort Living  ┆ workload_grievances ┆ workload_grievanc

In [5]:
# set groups
entity_id_column = "name"
columns_to_link = ["sector", "country"]
den.add_group_links(entity_df, entity_id_column, columns_to_link)

summary = den.get_model_summary_value()
print("Summary")
print(summary)

Summary
Number of entities: 3602, Number of attributes: 18549, Number of flags: 8108, Number of groups: 634, Number of links: 41727


In [6]:
await den.index_nodes(["ENTITY"])
if len(den.embedded_texts) > 0:
    print(f"Number of nodes indexed: {len(den.embedded_texts)}")

Got 3602 existing texts
Got 0 new texts
Number of nodes indexed: 3602


In [7]:
# infer nodes with similar names

threshold = 0.03
den.infer_nodes(threshold)

inferred_links_count = len(den.inferred_links)
if inferred_links_count > 0:
    print(f"Number of links inferred: {inferred_links_count}")
    inferred_df = den.inferred_nodes_df()
    print(inferred_df)
else:
    print("No inferred links")

Number of links inferred: 2242
shape: (3_547, 2)
┌───────────────────────┬────────────────────────┐
│ text                  ┆ similar                │
│ ---                   ┆ ---                    │
│ str                   ┆ str                    │
╞═══════════════════════╪════════════════════════╡
│ Adventure Gear        ┆ Adventure Gear Co      │
│ Adventure Gear        ┆ Adventure Gear Company │
│ Adventure Gear        ┆ AdventureGear          │
│ Adventure Gear Co     ┆ Adventure Gear Company │
│ Adventure Gear Co     ┆ AdventureGear Co       │
│ …                     ┆ …                      │
│ WindPower Corp        ┆ WindPower Inc          │
│ WindPower Solutions   ┆ WindPower Solutons     │
│ Windy Heights Limited ┆ Windy Heights Ltd      │
│ WindyCity Energy      ┆ WindyCity Power        │
│ Zephyr Energy Co      ┆ Zephyr Energy Inc      │
└───────────────────────┴────────────────────────┘


In [8]:
# Remove attributes


In [9]:
den.identify()

[('Sunshine Power', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('ForestEdge Lumber Company', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('ForestEdge Lumber Co', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('Rays Solar Tech', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('Sunland Solar Solutions', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('ForestEdge Lumber', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('Leafland Woodworks', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('Bark & Leaf Industries', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('DesertTech Innovations', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('Bark & Branch Co', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('DesertSun Solr', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('ForestWood Products', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('Timberland Creations', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('Woodland Timber Supplies', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('DesertSun Solar', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('Timberland Crafts', 0, 0, 16, 0, 0, 0.0, 0.0),
 ('Innovative Creations', 0, 1, 8, 0, 0, 0.0, 0.0),
 ('Innovative Media Inc', 0, 1, 8, 0, 0, 0.0, 0.0),
 ('Dreamland Studios', 0, 1, 8, 0, 0, 0.0, 0.0),
 ('Innovative Medi

In [10]:
print(den.get_records_summary())
print(f"Attributes removed because of high degree: {len(den.trimmed_attributes)}")
print(den.trimmed_attributes)

Networks identified: 436 (436 with multiple entities, maximum 19)
Attributes removed because of high degree: 443
shape: (443, 2)
┌────────────────────────────┬─────────────────┐
│ Attribute                  ┆ Linked Entities │
│ ---                        ┆ ---             │
│ str                        ┆ i64             │
╞════════════════════════════╪═════════════════╡
│ phone==4455667795          ┆ 11              │
│ address==123 Silicon Ave   ┆ 11              │
│ address==1500 Wellness Way ┆ 11              │
│ owner==Chris Care          ┆ 11              │
│ city==Logistics City       ┆ 11              │
│ …                          ┆ …               │
│ city==Construct City       ┆ 189             │
│ city==Eco City             ┆ 189             │
│ city==Techville            ┆ 223             │
│ phone==5566778899          ┆ 225             │
│ phone==1122334455          ┆ 335             │
└────────────────────────────┴─────────────────┘


In [11]:
# with entities
den.get_entity_df().head()

entity_id,entity_flags,network_id,network_entities,network_flags,flagged,flags/entity,flagged/unflagged
str,i64,i64,i64,i64,i64,f64,f64
"""Creative Visio…",0,217,10,93,9,9.3,9.0
"""MediaWave Ente…",10,217,10,93,9,9.3,9.0
"""MediaWave Prod…",16,217,10,93,9,9.3,9.0
"""MediaWave Prod…",25,217,10,93,9,9.3,9.0
"""WaveTech Produ…",7,217,10,93,9,9.3,9.0


In [12]:
# with entities and groups
group_df = den.get_grouped_df()
group_df.head()

entity_id,entity_flags,network_id,network_entities,network_flags,flagged,flags/entity,flagged/unflagged,sector,country
str,i64,i64,i64,i64,i64,f64,f64,str,str
"""Creative Visio…",0,217,10,93,9,9.3,9.0,"""Art""","""Artland"""
"""MediaWave Ente…",10,217,10,93,9,9.3,9.0,"""Media""","""Waveland"""
"""MediaWave Prod…",16,217,10,93,9,9.3,9.0,"""Media""","""Broadcastland"""
"""MediaWave Prod…",25,217,10,93,9,9.3,9.0,"""Media""","""MediaLand"""
"""WaveTech Produ…",7,217,10,93,9,9.3,9.0,"""Media""","""Broadcastland"""


In [13]:
group_df.filter(pl.col("entity_id") == "Earths Bounty").head()

entity_id,entity_flags,network_id,network_entities,network_flags,flagged,flags/entity,flagged/unflagged,sector,country
str,i64,i64,i64,i64,i64,f64,f64,str,str
"""Earths Bounty""",5,70,17,76,9,4.47,1.12,"""Agriculture""","""EcoLand"""


In [15]:
den.get_exposure_report("Earths Bounty", 70)

'##### Flag Exposure Paths\n\nThe selected entity **Earths Bounty** has **5** direct flags and is linked to **71** indirect flags via **6** paths from **8** related entities:\n\n**Path 1**\n\n```\nENTITY==GreenLeaf Ventures [linked to 8 flags]\n--->\n  phone==9876543214 [linked to 2 entities]\n  --->\n    ENTITY==Earths Bounty [linked to 5 flags]\n```\n\n**Path 2**\n\n```\nENTITY==EcoHarvest [linked to 6 flags]\nENTITY==GreenLeaf Organics International [linked to 4 flags]\nENTITY==GreenLeaf Organics Ltd [linked to 4 flags]\nENTITY==GreenLeaf Ventures [linked to 8 flags]\n--->\n  owner==Jamie Earth [linked to 5 entities]\n  --->\n    ENTITY==Earths Bounty [linked to 5 flags]\n```\n\n**Path 3**\n\n```\nENTITY==EcoHarvest [linked to 6 flags]\nENTITY==GreenLeaf Organics [linked to 38 flags]\nENTITY==GreenLeaf Organics Ltd [linked to 4 flags]\nENTITY==GreenLeaf Organix [linked to 3 flags]\nENTITY==GreenLeaf Ventures [linked to 8 flags]\n--->\n  city==Greenville [linked to 9 entities]\n  ---

In [16]:
# report
selected_network = 70
selected_entity = "Earths Bounty"
report = den.generate_report(selected_network=selected_network)
print(report)

##### Evaluation of Earths Bounty in Network 70

In this analysis, we focus on the entity "Earths Bounty" within Network 70. This entity is directly associated with 5 flags and is indirectly linked to an additional 71 flags through various connections with other entities in the network. The connections are established through shared attributes such as phone numbers, ownership, and city locations.

**Direct and Indirect Flag Exposure:**

1. **Direct Flags:**
   - Earths Bounty itself has 5 direct flags, indicating a moderate level of interest or concern.

2. **Indirect Flags:**
   - Earths Bounty is connected to 71 indirect flags through 6 distinct paths involving 8 related entities. These paths highlight the interconnected nature of the network and the potential for indirect exposure to flagged activities.

**Key Connection Paths:**

- **Path 1:** 
  - Earths Bounty is linked to GreenLeaf Ventures, which has 8 flags, through a shared phone number (9876543214).

- **Path 2:**
  - The ow