In [5]:
import matplotlib.pyplot as plt
import networkx as nx
import textwrap
import numpy as np  # For calculating text rotation


# Define the etymology data for "extraordinary"
etymology_data = {
  "word": "pseudopseudohypoparathyroidism",
  "meaning": "a medical condition that mimics the symptoms of the condition known as pseudohypoparathyroidism, but without the associated biochemical abnormalities.",
  "language": "English",
  "synonyms_in_other_languages": {
    "japanese": "偽偽副甲状腺機能低下症",
    "arabic": "الكاذب المزيف لنقص نشاط الغدة جار الدرقية",
    "french": "pseudopseudohypoparathyroïdisme",
    "chinese": "假性假性甲状旁腺功能减退症"
  },
  "history": "The term describes a rare inherited disorder that affects the bones and growth. Its name reflects the complexity of diagnosing endocrine disorders.",
  "parts": [
    {
      "part": "pseudo",
      "type": "prefix"
    },
    {
      "part": "pseudo",
      "type": "prefix"
    },
    {
      "part": "hypo",
      "type": "prefix"
    },
    {
      "part": "para",
      "type": "prefix"
    },
    {
      "part": "thyroid",
      "type": "root"
    },
    {
      "part": "ism",
      "type": "suffix"
    }
  ],
  "etymology": [
    {
      "part": "pseudo",
      "meaning": "false, deceptive",
      "example_words": ["pseudonym", "pseudoscience"],
      "language": "Greek",
      "history": "Derived from Greek 'pseudēs' meaning false and 'pseudos' meaning falsehood.",
      "tracing": ["pseudo <-- pseudēs (Greek) <-- pseudos (falsehood)"],
      "etymology": [
        {
          "part": "pseudos",
          "meaning": "falsehood, lie",
          "language": "Ancient Greek",
          "history": "Used in Greek literature and philosophy to denote something that is false or not genuine.",
          "example_words": ["pseudology", "pseudomorph"],
          "tracing": ["pseudos <-- Proto-Indo-European *bhaudh- (to be aware, make aware)"]
        }
      ]
    },
    {
      "part": "hypo",
      "meaning": "under, below normal",
      "example_words": ["hypoglycemia", "hypotension"],
      "language": "Greek",
      "history": "Comes from Greek 'hypo' meaning under or below.",
      "tracing": ["hypo <-- hypo (Greek)"],
      "etymology": [
        {
          "part": "hypo",
          "meaning": "under, below",
          "language": "Ancient Greek",
          "history": "Frequently used in medical and scientific terms to indicate a deficiency or lower-than-normal condition.",
          "example_words": ["hypodermic", "hypothermia"],
          "tracing": ["hypo <-- Proto-Indo-European *upo (under, below)"]
        }
      ]
    },
    {
      "part": "para",
      "meaning": "beside, near, resembling",
      "example_words": ["parallel", "paradox"],
      "language": "Greek",
      "history": "Derived from Greek 'para' meaning beside, near, or related to.",
      "tracing": ["para <-- para (Greek)"],
      "etymology": [
        {
          "part": "para",
          "meaning": "beside, beyond",
          "language": "Ancient Greek",
          "history": "Used in various contexts to indicate proximity, similarity, or adjacency.",
          "example_words": ["paraprofessional", "paramilitary"],
          "tracing": ["para <-- Proto-Indo-European *per- (forward, through)"]
        }
      ]
    },
    {
      "part": "thyroid",
      "meaning": "a gland in the neck that secretes hormones regulating growth and development",
      "example_words": ["thyroidectomy", "thyrotoxicosis"],
      "language": "Greek",
      "history": "Derived from Greek 'thyreos' (shield) and 'oeides' (form), referring to the shield-like shape of the gland.",
      "tracing": ["thyroid <-- thyreos + oeides (Greek)"],
      "etymology": [
        {
          "part": "thyreos",
          "meaning": "large shield",
          "language": "Ancient Greek",
          "history": "Refers to a type of large, door-shaped shield used in ancient Greece.",
          "example_words": ["thyreoid", "thyreotoxic"],
          "tracing": ["thyreos <-- Proto-Indo-European *terə- (to rub, turn)"]
        },
        {
          "part": "oeides",
          "meaning": "form, shape",
          "language": "Ancient Greek",
          "history": "Used to denote shape or resemblance in various compound words.",
          "example_words": ["anthropoid", "cyclopean"],
          "tracing": ["oeides <-- Proto-Indo-European *eid- (to see, to know)"]
        }
      ]
    },
    {
      "part": "ism",
      "meaning": "a distinctive practice, system, or philosophy",
      "example_words": ["capitalism", "socialism"],
      "language": "Greek",
      "history": "Derived from Greek suffix '-ismos', indicating a practice, system, or behavior.",
      "tracing": ["ism <-- ismos (Greek)"],
      "etymology": [
        {
          "part": "ismos",
          "meaning": "forming action nouns from verbs",
          "language": "Ancient Greek",
          "history": "Utilized in Greek to form nouns that denote a practice, system, or characteristic behavior.",
          "example_words": ["tourism", "plagiarism"],
          "tracing": ["ismos <-- Proto-Indo-European *dhe- (to set, put)"]
        }
      ]
    }
  ]
}


# Initialize the directed graph and populate it
G = nx.DiGraph()

# This function merges duplicate parts to prevent infinite recursion
def merge_duplicates(data):
    seen = {}
    def recurse(item):
        if item['part'] in seen:
            return
        seen[item['part']] = item
        if 'etymology' in item:
            new_etymology = []
            for child in item['etymology']:
                recurse(child)
                # Replace with seen to ensure references are reused
                new_etymology.append(seen[child['part']])
            item['etymology'] = new_etymology
    recurse(data)
    return data

etymology_data["part"] = etymology_data["word"]
# Preprocess the etymology data to merge duplicates
etymology_data = merge_duplicates(etymology_data)


# def add_to_graph(graph, data, parent=None):
#     node_data = {key: data.get(key, '') for key in ['part', 'language', 'meaning', 'example_words']}
#     node = node_data['part']
#     graph.add_node(node, **node_data)
#     if parent:
#         graph.add_edge(parent, node)
#     for child in data.get('etymology', []):
#         add_to_graph(graph, child, parent=node)

# # Update extraordinary_data with synonyms as example_words for the word part
# etymology_data["example_words"] = list(etymology_data["synonyms_in_other_languages"].values())


# add_to_graph(G, {"part": etymology_data["word"], "language": etymology_data["language"], "meaning": etymology_data["meaning"], "etymology": etymology_data["etymology"]})

def add_to_graph_iterative(graph, root_data):
    stack = [(None, root_data)]  # Stack of tuples: (parent_node, current_node_data)
    
    while stack:
        parent, data = stack.pop()
        if 'part' not in data:  # Skip if 'part' key is missing
            continue
        node_data = {key: data.get(key, '') for key in ['part', 'language', 'meaning', 'example_words']}
        node = node_data['part']
        graph.add_node(node, **node_data)
        if parent is not None:
            graph.add_edge(parent, node)
        for child in data.get('etymology', []):
            stack.append((node, child))

# Convert synonyms to example words for the root
etymology_data["example_words"] = list(etymology_data["synonyms_in_other_languages"].values())

# Call the iterative function to build the graph
add_to_graph_iterative(G, {"part": etymology_data["word"], "language": etymology_data["language"], "meaning": etymology_data["meaning"], "etymology": etymology_data["etymology"]})


# Initial positions: place the main word at an arbitrary top center, e.g., (0, 0)
pos = {etymology_data["word"]: (0, 0)}



# Function to manually set positions for child nodes to avoid overlap
def set_positions_branch_wise(graph, current_pos, parent, level=1, branch_offset=0):
    children = list(graph.successors(parent))
    num_children = len(children)
    width_between_children = 1.0 / (num_children + 1)
    for i, child in enumerate(children):
        # Horizontal position: disperse children horizontally
        x = -0.5 + width_between_children * (i + 1) + branch_offset
        # Vertical position: place lower levels further down
        y = -level * 0.1
        current_pos[child] = (x, y)
        # Recursively position the children of the current node, adjusting the branch offset
        set_positions_branch_wise(graph, current_pos, child, level + 1, branch_offset + i * 0.1)

# Apply custom positioning
set_positions_branch_wise(G, pos, etymology_data["word"])

# Adjust all positions so that the main word is at the top center of the final layout
# Find min and max x to center horizontally
min_x = min(pos.values(), key=lambda x: x[0])[0]
max_x = max(pos.values(), key=lambda x: x[0])[0]
for node in pos:
    # Centering horizontally
    pos[node] = ((pos[node][0] - min_x) / (max_x - min_x), pos[node][1])

plt.figure(figsize=(15, 10))

# Draw the graph
nx.draw_networkx_nodes(G, pos, node_size=3000, node_color="lightblue", alpha=0.7)
nx.draw_networkx_edges(G, pos, arrowstyle="<|-", arrowsize=30, edge_color="gray")
nx.draw_networkx_labels(G, pos, font_size=20, font_weight="bold")

# Add edge labels (keeping your original method)
for edge in G.edges:
    source, target = edge
    mid_x, mid_y = np.mean([pos[source], pos[target]], axis=0)
    angle = np.degrees(np.arctan2(pos[target][1] - pos[source][1], pos[target][0] - pos[source][0])) + 180
    plt.text(mid_x, mid_y, f"{G.nodes[target]['language']} --> {G.nodes[source]['language']}", rotation=angle, rotation_mode='anchor', fontsize=10, color='red', ha='center', va='center')

# Add example words around nodes
for node, (x, y) in pos.items():
    examples = ", ".join(G.nodes[node].get('example_words', []))
    wrapped_text = textwrap.fill(examples, width=20)
    plt.text(x, y - 0.15, wrapped_text, fontsize=14, ha='center', va='top', wrap=True)

plt.title(f'Etymology of "{etymology_data["word"]}"', fontsize=15)
plt.axis('off')
plt.show()

KeyboardInterrupt: 

In [4]:
etymology_data

{'word': 'pseudopseudohypoparathyroidism',
 'meaning': 'a medical condition that mimics the symptoms of the condition known as pseudohypoparathyroidism, but without the associated biochemical abnormalities.',
 'language': 'English',
 'synonyms_in_other_languages': {'japanese': '偽偽副甲状腺機能低下症',
  'arabic': 'الكاذب المزيف لنقص نشاط الغدة جار الدرقية',
  'french': 'pseudopseudohypoparathyroïdisme',
  'chinese': '假性假性甲状旁腺功能减退症'},
 'history': 'The term describes a rare inherited disorder that affects the bones and growth. Its name reflects the complexity of diagnosing endocrine disorders.',
 'parts': [{'part': 'pseudo', 'type': 'prefix'},
  {'part': 'pseudo', 'type': 'prefix'},
  {'part': 'hypo', 'type': 'prefix'},
  {'part': 'para', 'type': 'prefix'},
  {'part': 'thyroid', 'type': 'root'},
  {'part': 'ism', 'type': 'suffix'}],
 'etymology': [{'part': 'pseudo',
   'meaning': 'false, deceptive',
   'example_words': ['pseudonym', 'pseudoscience'],
   'language': 'Greek',
   'history': "Derived 