In [15]:
from pyvis.network import Network
import pandas as pd

# load data... hell
try:
    df = pd.read_csv('trainset_reconstructed.csv')
    
    # drop empty rows just in case (pandas hates NaNs)
    df = df.dropna(subset=['parent_id', 'child_id'])
    
except:
    # fallback to empty dataframe
    df = pd.DataFrame() 

# thresholds
thresh_safe = 0.88
thresh_review = 0.50

def generate_family_graph(focus_parent_id, df):
    # filter for the "immediate family" (my direct matches)
    my_matches = df[(df['parent_id'] == focus_parent_id) & (df['%'] > thresh_review)]
    
    if my_matches.empty:
        print("no matches found for this parent.")
        return None

    # get the list of article IDs (the kids)
    my_kid_ids = my_matches['child_id'].unique()
    
    # "EXTENDED FAMILY" LOGIC:
    # find ALL rows where these specific articles appear. this finds OTHER parents who matched with the same news stories.
    extended_family = df[df['child_id'].isin(my_kid_ids) & (df['%'] > thresh_review)]
    
    net = Network(height="700px", width="100%", bgcolor="#ffffff", font_color="black")
    
    added_nodes = set()

    print(f"building family tree for parent {focus_parent_id}.")
    
    for _, row in extended_family.iterrows():
        score = row['%']
        
        # names as floats problem, so switch into int/str (prevents wrong name crash)
        p_id = str(int(row['parent_id']))
        c_id = str(int(row['child_id']))
        
        # ADD PARENTS
        if p_id not in added_nodes:
            # compare string to string OR int to int
            if int(p_id) == focus_parent_id:
                # Main parent, big central blue node:
                net.add_node(p_id, label=f"MAIN REPORT\n{p_id}", color="#003366", size=50)
            else:
                # other "sibling" parents - small purple dots
                net.add_node(p_id, label=f"Rel. Report\n{p_id}", color="#6f42c1", size=30)
            added_nodes.add(p_id)
            
        # ADD CHILDREN (Articles)
        if c_id not in added_nodes:
            if score >= thresh_safe:
                col = "#28a745"
                grp = "Safe"
            else:
                col = "#ffc107"
                grp = "Check"
            
            # small dot for the article
            net.add_node(c_id, label=" ", title=f"Article {c_id}\nScore: {score}", color=col, size=15)
            added_nodes.add(c_id)

        # ADD EDGES (take into account thickness)
        dist = (1 - score) * 800
        net.add_edge(p_id, c_id, value=score, length=dist, color="#cccccc")

    # PHYSICS: Should stabilize before showing (might wiggle a bit, but should not look like a jellyfish)
    net.set_options("""
    var options = {
      "physics": {
        "barnesHut": {
          "gravitationalConstant": -30000,
          "centralGravity": 0.3,
          "springLength": 100,
          "springConstant": 0.05,
          "avoidOverlap": 1
        },
        "stabilization": {
            "enabled": true,
            "iterations": 2000,
            "fit": true
        }
      }
    }
    """)
    
    return net

if not df.empty:
    # ensuring the input is an int too
    net = generate_family_graph(979028, df)
    if net:
        net.save_graph("family_cluster.html")
        print("saved to family_cluster.html.")

building family tree for parent 979028.
saved to family_cluster.html.
