In [131]:
import openai
from openai import OpenAI
import networkx as nx
import matplotlib.pyplot as plt
from networkx.drawing.nx_agraph import graphviz_layout
import pandas as pd
import numpy as np
import json
import random
import re
from tqdm import tqdm
with open("key.txt", "r") as file:
    key = file.read().strip() 
openai.api_key=key
client = OpenAI(api_key=key)

In [116]:
def generate_hierarchy(topics, theme, terms=None, depth=2, temperature=0.7, model="gpt-4o-mini", num=3,with_synonyms=0,branching="constant",max_num=20):
    """
    Generate hierarchical data using the OpenAI API while maintaining context across API calls.
    """
    hierarchy = {}
    seen_nodes = set()
    
    messages_init = [
        {"role": "system", "content": """
            You generate hierarchical topic structures and maintain consistency across responses.
            - Subtopics must be **distinct** and **separate** from one another.
            - Subtopics must be more specific that than the parent but still general
            - Each subtopic must be **measurable** in magnitude (able to increase or decrease, but does not include whether it is an increase or decrease).
            - You have flexibility to generate **fewer** subtopics if meaningful distinctions become unclear.
            - Subtopics should not describe **change itself** (e.g., "fluctuation in X"), but rather be **the thing that changes**.
            - Subtopics should not merely influence the parent topic but be an sub example or more specific application of the parent.   
        """}
    ]

    response = client.chat.completions.create(
        model=model,
        messages=messages_init,
        temperature=temperature
    )
    max_num=max_num

    content_init = response.choices[0].message.content.strip()
    messages_init.append({"role": "assistant", "content": content_init})

    def get_dynamic_num(level):
        """ Adjust number of children based on branching strategy with a smooth decrease. """
        if branching == "constant":
            return num
        elif branching == "decreasing":
            # Scale `num` down based on the proportion of depth remaining, ensuring a minimum of 2
            return max(2, round(num * (1 - (level - 1) / depth)))
        elif branching == "increasing":
            nonlocal max_num
            if max_num is None:
                max_num = num * 2  # Default to twice the starting value if no max is provided
            return min(max_num, max(2, round(num + (max_num - num) * (level - 1) / (depth - 1)))) 
        elif branching == "random":
            return random.randint(2, max(2, 2 * num - 2))

    def parse_json_response(response_text):
        """ Extract and parse JSON safely from model output """
        try:
            json_data = json.loads(response_text)
            if isinstance(json_data, list):
                return json_data
        except json.JSONDecodeError:
            pass
        match = re.search(r'\[.*?\]', response_text, re.DOTALL)
        if match:
            try:
                json_data = json.loads(match.group(0))
                if isinstance(json_data, list):
                    return json_data
            except json.JSONDecodeError:
                pass

        extracted = re.findall(r'^\d+\.\s*(.+)$', response_text, re.MULTILINE) 
        if not extracted:
            extracted = re.findall(r'^\*\s*(.+)$', response_text, re.MULTILINE)

        if extracted:
            return extracted

        print(f"Warning: Failed to parse valid JSON from response: {response_text}")
        return []

    def get_synonyms(topic, model="gpt-4o-mini", num=3):
        """Generate synonyms for a given topic using GPT and store separately."""
        messages = [
            {"role": "system", "content": "You provide synonyms and related phrases for a given term."},
            {"role": "user", "content": f"Generate {num} synonyms or closely related terms for '{topic}', in a JSON list format."}
        ]

        response = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=0.5
        )
        
        response_content = response.choices[0].message.content.strip()

        try:
            # Attempt to parse the response as a JSON array
            return json.loads(response_content)
        except json.JSONDecodeError:
            # If JSON parsing fails, try to extract the list manually
            list_pattern = r'(\[.*\])'
            match = re.search(list_pattern, response_content)
            
            if match:
                try:
                    # Attempt to load the matched part as JSON
                    return json.loads(match.group(0))
                except json.JSONDecodeError:
                    pass
                
            # As a last resort, manually extract comma-separated items (in case no valid JSON is provided)
            # Fix the fallback pattern to avoid capturing quotes
            fallback_pattern = r'[“"]([^”"]+)[”"]'  # Match text inside either “ ” or " "
            fallback_matches = re.findall(fallback_pattern, response_content)

            # Clean the fallback matches and return them as a list
            if fallback_matches:
                return [item.strip() for item in fallback_matches[:num]]
            
            # If everything fails, return an empty list
            print("Warning: Unable to extract synonyms.")
            return []

    def expand_topic(topic, parent_dict, level, sibling_topics, core_topic, other_core_topics):
        """ Recursively expand a topic while maintaining API call context (without expanding synonyms). """
        if level > depth:
            return
        nonlocal messages_init
        nonlocal content_init

        current_num = get_dynamic_num(level)
        # Generate subtopics first (without including synonyms yet)
        prompt = f"""
            Expand the given **Parent Topic** into a list of up to **{current_num}** or less specific **subtopics** that:  
            - Are related to **{core_topic}**, especially **{topic}**.
            - Clearly reflect their connection to the **Parent Topic** and **Core Topic** through their nature and scope. 
            - Are **Not** related to the other core topics: **{other_core_topics}**.  
            - Much Less related to sibling topics: **{sibling_topics}**.  
            - Fit within the given **theme**: **{theme}**. 
            - Each subtopic should be a **subcategory** of the parent topic, such that a change in the subtopic implies a change in the broader topic.  

            **Parent Topic:** {topic} 
            **Core Topic:** {core_topic}

            **Output Format:** A JSON list of up to **{current_num}** subtopics, e.g.:  

            '["Subtopic 1", "...", "Subtopic {current_num}"]'

            **Ensure that:**  
            - All subtopics can increase and decrease in magnitude, but **does not** contain increase or decrease or any causal relations 
            - Each subtopic is distinct and separate from others.  
        """

        messages = messages_init + [{"role": "user", "content": prompt}]
        
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=temperature
        )

        content = response.choices[0].message.content.strip()
        subtopics = parse_json_response(content)

        parent_dict[topic] = {}  # Initialize the topic as a dictionary

        # Add subtopics to the parent dictionary as keys with empty dictionaries as their values
        for subtopic in subtopics:
            if subtopic not in seen_nodes:
                seen_nodes.add(subtopic)
                parent_dict[topic][subtopic] = {}  # Add as an empty dictionary (no children)
                remaining_subtopics = [s for s in subtopics if s != subtopic]
                expand_topic(subtopic, parent_dict[topic], level + 1, remaining_subtopics, core_topic, other_core_topics)

        # Generate and append synonyms directly to the parent dictionary
        if with_synonyms>0:
            synonyms = get_synonyms(topic,num=round(with_synonyms*(current_num/num)))
            for synonym in synonyms:
                if synonym not in seen_nodes:
                    seen_nodes.add(synonym)
                    parent_dict[topic][synonym] = {}  # Add synonym as a key with an empty dictionary

    # Step 1: Generate hierarchy first (without synonyms)
    for topic in topics:
        if topic not in seen_nodes:
            seen_nodes.add(topic)
            remaining_topics = [s for s in topics if s != topic]
            expand_topic(topic, hierarchy, 1, remaining_topics, topic, remaining_topics)

    return hierarchy

In [118]:
def clean_strings(data):
    if isinstance(data, dict):
        return {clean_strings(key): clean_strings(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [clean_strings(item) for item in data]
    elif isinstance(data, str):
        return data.replace("increase", "").replace("decrease", "").replace("Increase", "").replace("Decrease", "")
    else:
        return data


In [120]:
def flatten_hierarchy_to_df(hierarchy, parent_labels=None, level=0):
    """
    Recursively flattens the hierarchical JSON into a list of rows.
    Each row contains the subtopic and the hierarchy of labels (categories) for each level.

    :param hierarchy: The hierarchical JSON structure (nested dictionary).
    :param parent_labels: A list of labels for each level of the hierarchy (used to track the hierarchical levels).
    :param level: The current depth level in the hierarchy.
    :return: DataFrame containing the flattened structure.
    """
    rows = []

    if parent_labels is None:
        parent_labels = []

    for topic, subtopics in hierarchy.items():
        # Create a row for the current topic
        row = {"topic": topic}
        
        # Assign the parent topic to itself at level 0
        row.update({f"category {i}": label for i, label in enumerate(parent_labels)})
        
        # Add the current topic to its own category
        row[f"category {level}"] = topic
        
        rows.append(row)

        # If the topic has subtopics, recurse into them
        if isinstance(subtopics, dict) and subtopics:
            # Call the function recursively for subtopics
            rows.extend(flatten_hierarchy_to_df(subtopics, parent_labels + [topic], level + 1))

    return rows

def hierarchy_to_df(hierarchy):
    """
    Converts hierarchical data (JSON-like structure) into a pandas DataFrame,
    where each row represents a subtopic and each category corresponds to a level of the hierarchy.

    :param hierarchy: The hierarchical JSON structure (nested dictionary).
    :return: DataFrame containing the flattened structure.
    """
    rows = flatten_hierarchy_to_df(hierarchy)
    df = pd.DataFrame(rows)
    return df


Unnamed: 0,topic,category 0,category 1,category 2
0,Political Indicators,Political Indicators,,
1,Voter Turnout Rate,Political Indicators,Voter Turnout Rate,
2,Voter Registration Levels,Political Indicators,Voter Turnout Rate,Voter Registration Levels
3,Voter Engagement in Local Elections,Political Indicators,Voter Turnout Rate,Voter Engagement in Local Elections
4,Political Party Representation,Political Indicators,Political Party Representation,
5,Number of Seats Held by Major Parties,Political Indicators,Political Party Representation,Number of Seats Held by Major Parties
6,Diversity of Political Affiliations in Local C...,Political Indicators,Political Party Representation,Diversity of Political Affiliations in Local C...
7,Military Operations,Military Operations,,
8,Troop Deployment Levels,Military Operations,Troop Deployment Levels,
9,Number of Active Military Bases,Military Operations,Troop Deployment Levels,Number of Active Military Bases


In [134]:
themes =["Energy, Ecosystems, and Humans","Offshore energy impacts on fisheries","West Java, Indonesia"]
top_level_topics1 =[
    "Ecosystem Health Indicators",
    "Economic Impact Metrics",
    "Energy Security Performance",
    "Human Wellbeing Measures",
    "Social Conflict Intensity"
]
top_level_topics2 = [
    "Fish Population Metrics",
    "Fishing Effort & Utilization",
    "Governance Effectiveness & Stakeholder Engagement",
    "Offshore Energy Infrastructure Development",
    "Marine Environmental Quality",
    "Fisheries Economic Performance"
]
top_level_topics3 = [
    "Political Indicators",
    "Military Operations",
    "Economic Metrics",
    "Social Metrics",
    "Information Environment",
    "Infrastructure Resilience"
]
all_topics= [top_level_topics1,top_level_topics2,top_level_topics3]


In [142]:

t=1.0
max_sub = 2
depth = 2
with_synonyms= 0
branching='random'

In [143]:
for i in tqdm(range(len(themes))):
    theme = themes[i]
    top_level_topics = all_topics[i]
    file_name = f'Generated Data/{theme}_hierarchy_t{t}_maxsub{max_sub}_depth{depth}_synonyms{with_synonyms}_{branching}.json'


    hierarchy = generate_hierarchy(top_level_topics, 
                                theme, depth=depth, 
                                temperature=t, 
                                num=max_sub,model = "gpt-4o",
                                with_synonyms=with_synonyms,
                                branching=branching)
    hierarchy =clean_strings(hierarchy)

    with open(file_name, 'w') as f:
        json.dump(hierarchy, f, indent=2)
    
    df = hierarchy_to_df(hierarchy)
    df.to_csv(f'Generated Data/{theme}_hierarchy_t{t}_maxsub{max_sub}_depth{depth}_synonyms{with_synonyms}_{branching}.csv')

100%|██████████| 3/3 [01:10<00:00, 23.34s/it]


In [145]:
import dash
from dash import html
import dash_cytoscape as cyto

def generate_cytoscape_elements(hierarchy):
    """
    Converts hierarchical data to a Cytoscape-compatible format with tooltips.
    """
    elements = []
    
    def add_elements(hierarchy, parent=None):
        for topic, subtopics in hierarchy.items():
            elements.append({
                'data': {'id': topic, 'title': f"Information about {topic}"}  # Tooltip added here
            })
            if parent:
                elements.append({
                    'data': {'source': parent, 'target': topic}
                })
            if isinstance(subtopics, dict):
                add_elements(subtopics, parent=topic)

    add_elements(hierarchy)
    
    return elements




def create_app(hierarchy_plot):
    app = dash.Dash(__name__)

    elements = generate_cytoscape_elements(hierarchy_plot)

    # Debug output to check the elements before rendering
    if not elements:
        print("No elements generated. Check your hierarchy structure.")
    
    app.layout = html.Div([
        html.H1("Interactive Hierarchical Visualization"),
        cyto.Cytoscape(
            id='cytoscape',
            elements=elements,
            layout={'name': 'breadthfirst', 'directed': True, 'padding': 5},
            style={'width': '100%', 'height': '600px', 'backgroundColor': 'white'},
            zoomingEnabled=True,
            userPanningEnabled=True,
            minZoom=0.1,
            maxZoom=2,
            stylesheet=[
                {
                    'selector': 'node',
                    'style': {
                        'content': 'data(id)',  # Shows the node ID as a label
                        'background-color': '#0074D9',
                        'color': 'black',
                        'font-size': '18px',
                        'font-weight': 'bold',  # Make the text bold
                        'text-rotation': '-10deg',  # Rotate the labels by 45 degrees
                        'width': 50,  # Set node width
                        'height': 50,  # Set node height
                    }
                },
                {
                    'selector': 'edge',
                    'style': {
                        'width': 3,
                        'line-color': '#ccc',
                    }
                }
            ]
        ),
    ])

    return app

if __name__ == '__main__':
    app = create_app(hierarchy)
    app.run_server(debug=True)

In [51]:
# import os
# import re

# # Define the folder path containing the files
# folder_path = 'Generated Data'



# # Loop through all files in the folder
# for filename in os.listdir(folder_path):
#     # Check if the file is a .json or .csv file
#     if filename.endswith('.json') or filename.endswith('.csv'):
#         # Check if the file name already ends with '_synonyms' followed by a number

#         # Split the filename to get the base name and extension
#         base_name, ext = os.path.splitext(filename)
#         # Create the new file name by appending '_synonyms0'
#         new_filename = base_name + '_constant' + ext
#         # Get the full file paths
#         old_file = os.path.join(folder_path, filename)
#         new_file = os.path.join(folder_path, new_filename)
#         # Rename the file
#         os.rename(old_file, new_file)
#         print(f'Renamed: {filename} -> {new_filename}')


Renamed: United States_hierarchy_t0.5_maxsub2_depth9_synonyms0.json -> United States_hierarchy_t0.5_maxsub2_depth9_synonyms0_constant.json
Renamed: Offshore energy impacts on fisheries_hierarchy_t1.0_maxsub5_depth3_synonyms0.csv -> Offshore energy impacts on fisheries_hierarchy_t1.0_maxsub5_depth3_synonyms0_constant.csv
Renamed: Offshore energy impacts on fisheries_hierarchy_t1.0_maxsub15_depth2_synonyms0.csv -> Offshore energy impacts on fisheries_hierarchy_t1.0_maxsub15_depth2_synonyms0_constant.csv
Renamed: Indonesia_hierarchy_t0.5_maxsub10_depth3_synonyms0.csv -> Indonesia_hierarchy_t0.5_maxsub10_depth3_synonyms0_constant.csv
Renamed: offshore wind energy impacts on Gulf of Mexico fisheries_hierarchy_t0.5_maxsub4_depth4_synonyms0.json -> offshore wind energy impacts on Gulf of Mexico fisheries_hierarchy_t0.5_maxsub4_depth4_synonyms0_constant.json
Renamed: Indonesia_hierarchy_t0.1_maxsub10_depth1_synonyms0.csv -> Indonesia_hierarchy_t0.1_maxsub10_depth1_synonyms0_constant.csv
Rename