In [24]:
import spacy
from nltk.corpus import reuters

In [25]:
import nltk
nltk.download('reuters')
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [26]:
nlp = spacy.load("en_core_web_sm")

In [27]:
def extract_named_entities(sentences):
    entities = []
    for sentence in sentences:
        doc = nlp(sentence)
        for ent in doc.ents:
            entities.append(ent.text)
    return entities

In [28]:
sample_sentences = [" ".join(s) for s in reuters.sents()[:10]]  # Convert list of words to a string

In [29]:
print("Sample Sentences:")
for i, sentence in enumerate(sample_sentences):
    print(f"Sentence {i+1}: {sentence}\n")


Sample Sentences:
Sentence 1: ASIAN EXPORTERS FEAR DAMAGE FROM U . S .- JAPAN RIFT Mounting trade friction between the U . S . And Japan has raised fears among many of Asia ' s exporting nations that the row could inflict far - reaching economic damage , businessmen and officials said .

Sentence 2: They told Reuter correspondents in Asian capitals a U . S . Move against Japan might boost protectionist sentiment in the U . S . And lead to curbs on American imports of their products .

Sentence 3: But some exporters said that while the conflict would hurt them in the long - run , in the short - term Tokyo ' s loss might be their gain .

Sentence 4: The U . S . Has said it will impose 300 mln dlrs of tariffs on imports of Japanese electronics goods on April 17 , in retaliation for Japan ' s alleged failure to stick to a pact not to sell semiconductors on world markets at below cost .

Sentence 5: Unofficial Japanese estimates put the impact of the tariffs at 10 billion dlrs and spokesmen

In [30]:
named_entities = extract_named_entities(sample_sentences)
print("\nNamed Entities:")
for i, entity in enumerate(named_entities[:10]):  # Display only the first 10 named entities
    print(f"Entity {i+1}: {entity}")


Named Entities:
Entity 1: ASIAN
Entity 2: JAPAN
Entity 3: Japan
Entity 4: Asia
Entity 5: Reuter
Entity 6: Asian
Entity 7: Japan
Entity 8: American
Entity 9: Tokyo
Entity 10: 300


In [31]:
import networkx as nx
import plotly.graph_objects as go

In [32]:
G = nx.Graph()
for i, entity in enumerate(named_entities):
    G.add_node(entity)

In [33]:
for sentence in sample_sentences:
    doc = nlp(sentence)
    for token in doc:
        if token.ent_type_:
            for other_token in doc:
                if other_token != token and other_token.ent_type_ and other_token.ent_type_ == token.ent_type_:
                    if not G.has_edge(token.text, other_token.text):
                        G.add_edge(token.text, other_token.text)


In [34]:
pos = nx.spring_layout(G)

In [35]:
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

In [36]:
edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=1, color='grey'),
    hoverinfo='none',
    mode='lines'
)

In [37]:
node_x = []
node_y = []
for node in G.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)

In [38]:
node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',
    marker=dict(
        size=10,
        color='royalblue',
        line=dict(width=2, color='DarkSlateGrey')
    ),
    text=list(G.nodes()),
    textposition='top center',
    hoverinfo='text'
)

In [39]:
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=0),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False)
                ))

In [40]:
fig.show()

In [41]:
import spacy
import networkx as nx
import plotly.graph_objects as go
import nltk
from nltk.corpus import reuters

# Download necessary NLTK data
nltk.download('reuters')
nltk.download('punkt')

# Load the English language model from spaCy
nlp = spacy.load("en_core_web_sm")

# Function to extract named entities from sentences using spaCy
def extract_named_entities(sentences):
    entities = []
    for sentence in sentences:
        doc = nlp(sentence)
        for ent in doc.ents:
            # Add entities with their types as tuples (text, type)
            entities.append((ent.text, ent.label_))
    return entities

# Extract sentences from the Reuters dataset
sample_sentences = [" ".join(s) for s in reuters.sents()[:10]]  # Convert list of words to a string

# Extract named entities with their types
named_entities = extract_named_entities(sample_sentences)

# Create a graph from named entities for visualization
G = nx.Graph()
for entity, entity_type in named_entities:
    # Add the type as a node attribute
    G.add_node(entity, type=entity_type)

# Add edges based on co-occurrences in the sample sentences
for sentence in sample_sentences:
    doc = nlp(sentence)
    for token in doc:
        if token.ent_type_:
            for other_token in doc:
                if other_token != token and other_token.ent_type_ and other_token.ent_type_ == token.ent_type_:
                    if not G.has_edge(token.text, other_token.text):
                        G.add_edge(token.text, other_token.text, weight=1)

# Generate positions for nodes using a spring layout
pos = nx.spring_layout(G, seed=42)

# Create edge and node traces for Plotly
edge_x = []
edge_y = []
edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=1, color='grey'),
    hoverinfo='none',
    mode='lines'
)

# Add nodes with fixed size and custom colors based on the entity type
node_x = []
node_y = []
node_size = 10  # Fixed node size
node_color = []

for node, data in G.nodes(data=True):
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)

        # Set colors based on entity type and custom rules
    entity_type = data.get('type', 'UNKNOWN')  # Use 'UNKNOWN' if no type is present
    word_text = data.get('text', '')  # Extract the actual word/text

    # Handle numbers, dates, and quantifiers
    if entity_type == "CARDINAL":
        node_color.append('darkblue')  # Color for cardinal numbers
    elif entity_type == "ORDINAL":
        node_color.append('darkgreen')  # Color for ordinal numbers
    elif entity_type == "DATE":
        node_color.append('purple')  # Color for dates
    elif word_text.lower() in ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]:
        node_color.append('lightgoldenrodyellow')  # Color for days of the week
    elif word_text.lower() in ["january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"]:
        node_color.append('lightcoral')  # Color for months
    elif word_text.lower() in ["million", "billion", "trillion", "thousand"]:
        node_color.append('gold')  # Color for large numbers/quantifiers
    elif word_text.lower() in ["a", "in", "on", "at", "for", "of", "with", "about"]:
        node_color.append('lightgray')  # Neutral color for prepositions and small words
    else:
        # Use a different color for other types
        node_color.append('lightgray')  # Default color for other types

    # Example for adding colors based on entity type
    if entity_type == "PERSON":
        node_color.append('royalblue')  # Color for people
    elif entity_type == "ORG":
        node_color.append('lightgreen')  # Color for organizations
    elif entity_type == "GPE":
        node_color.append('orange')  # Color for geopolitical entities
    elif entity_type == "MONEY":
        node_color.append('gold')  # Color for monetary amounts
    elif entity_type == "LOC":
        node_color.append('cyan')  # Color for general locations
    elif entity_type == "EVENT":
        node_color.append('pink')  # Color for events
    elif entity_type == "PRODUCT":
        node_color.append('lime')  # Color for products
    elif entity_type == "NORP":
        node_color.append('red')  # Color for nationalities, religious, or political groups
    elif entity_type == "FAC":
        node_color.append('teal')  # Color for facilities
    elif entity_type == "LANGUAGE":
        node_color.append('lightblue')  # Color for languages
    elif entity_type == "WORK_OF_ART":
        node_color.append('lightcoral')  # Color for works of art
    else:
        node_color.append('lightgray')  # Default color for unclassified types




node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',
    marker=dict(
        size=node_size,
        color=node_color,
        line=dict(width=2, color='DarkSlateGrey')
    ),
    text=list(G.nodes()),
    textposition='top center',
    hoverinfo='text'
)

# Create the final figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=0),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False)
                ))

fig.show()


[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [42]:
import spacy
import networkx as nx
import plotly.graph_objects as go
import nltk
from nltk.corpus import reuters

# Download necessary NLTK data
nltk.download('reuters')
nltk.download('punkt')
/
# Load the English language model from spaCy
nlp = spacy.load("en_core_web_sm")

# Function to extract named entities from sentences using spaCy
def extract_named_entities(sentences):
    entities = []
    for sentence in sentences:
        doc = nlp(sentence)
        for ent in doc.ents:
            # Add entities with their types as tuples (text, type)
            entities.append((ent.text, ent.label_))
    return entities

# Extract sentences from the Reuters dataset
sample_sentences = [" ".join(s) for s in reuters.sents()[:10]]  # Convert list of words to a string

# Extract named entities with their types
named_entities = extract_named_entities(sample_sentences)

# Create a graph from named entities for visualization
G = nx.Graph()
for entity, entity_type in named_entities:
    # Add the type as a node attribute
    G.add_node(entity, type=entity_type)

# Add edges based on co-occurrences in the sample sentences
for sentence in sample_sentences:
    doc = nlp(sentence)
    for token in doc:
        if token.ent_type_:
            for other_token in doc:
                if other_token != token and other_token.ent_type_ and other_token.ent_type_ == token.ent_type_:
                    if not G.has_edge(token.text, other_token.text):
                        G.add_edge(token.text, other_token.text, weight=1)

# Generate positions for nodes using a spring layout
pos = nx.spring_layout(G, seed=42)

# Create edge and node traces for Plotly
edge_x = []
edge_y = []
edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=1, color='grey'),
    hoverinfo='none',
    mode='lines'
)

# Populate edge traces
for edge in G.edges(data=True):
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)  # Break between lines
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)  # Break between lines

# Add nodes with fixed size and custom colors based on the entity type
node_x = []
node_y = []
node_color = []
node_text = []

for node, data in G.nodes(data=True):
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)
    node_text.append(node)  # Add node text for display

    # Set colors based on entity type and custom rules
    entity_type = data.get('type', 'UNKNOWN')  # Use 'UNKNOWN' if no type is present

    # Example for adding colors based on entity type
    if entity_type == "PERSON":
        node_color.append('royalblue')  # Color for people
    elif entity_type == "ORG":
        node_color.append('lightgreen')  # Color for organizations
    elif entity_type == "GPE":
        node_color.append('orange')  # Color for geopolitical entities
    else:
        node_color.append('lightgray')  # Default color for unclassified types

# Create node trace
node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',
    marker=dict(
        size=10,
        color=node_color,
        line=dict(width=2, color='DarkSlateGrey')
    ),
    text=node_text,
    textposition='top center',
    hoverinfo='text'
)

# Create the final figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=0),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False)
                ))

fig.show()


[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [43]:
import spacy
import networkx as nx
import plotly.graph_objects as go
import nltk
from nltk.corpus import reuters

# Download necessary NLTK data
nltk.download('reuters')
nltk.download('punkt')

# Load the English language model from spaCy
nlp = spacy.load("en_core_web_sm")

# Function to extract named entities from sentences using spaCy
def extract_named_entities(sentences):
    entities = []
    for sentence in sentences:
        doc = nlp(sentence)
        for ent in doc.ents:
            # Add entities with their types as tuples (text, type)
            entities.append((ent.text, ent.label_))
    return entities

# Extract sentences from the Reuters dataset
sample_sentences = [" ".join(s) for s in reuters.sents()[:10]]  # Convert list of words to a string

# Extract named entities with their types
named_entities = extract_named_entities(sample_sentences)

# Create a graph from named entities for visualization
G = nx.Graph()
for entity, entity_type in named_entities:
    # Add the type as a node attribute
    G.add_node(entity, type=entity_type)

# Add edges based on co-occurrences in the sample sentences
for sentence in sample_sentences:
    doc = nlp(sentence)
    for token in doc:
        if token.ent_type_:
            for other_token in doc:
                if other_token != token and other_token.ent_type_ and other_token.ent_type_ == token.ent_type_:
                    if not G.has_edge(token.text, other_token.text):
                        G.add_edge(token.text, other_token.text, weight=1)

# Generate positions for nodes using a spring layout
pos = nx.spring_layout(G, seed=42)

# Create edge and node traces for Plotly
edge_x = []
edge_y = []
edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=1, color='grey'),
    hoverinfo='none',
    mode='lines'
)

# Add nodes with fixed size and custom colors based on the entity type
node_x = []
node_y = []
node_size = 10  # Fixed node size
node_color = []

for node, data in G.nodes(data=True):
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)

    # Set colors based on entity type (can be customized)
    entity_type = data.get('type', 'UNKNOWN')  # Use 'UNKNOWN' if no type is present
    if entity_type == "PERSON":
        node_color.append('royalblue')  # Example color for people
    elif entity_type == "ORG":
        node_color.append('lightgreen')  # Example color for organizations
    elif entity_type == "GPE":
        node_color.append('orange')  # Example color for geopolitical entities
    else:
        node_color.append('lightgray')  # Default color for other types

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',
    marker=dict(
        size=node_size,
        color=node_color,
        line=dict(width=2, color='DarkSlateGrey')
    ),
    text=list(G.nodes()),
    textposition='top center',
    hoverinfo='text'
)

# Create the final figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=0),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False)
                ))

fig.show()


[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [44]:
import spacy
import networkx as nx
import plotly.graph_objects as go
import nltk
from nltk.corpus import reuters

# Download necessary NLTK data
nltk.download('reuters')
nltk.download('punkt')

# Load the English language model from spaCy
nlp = spacy.load("en_core_web_sm")

# Extract sentences from Reuters dataset
sample_sentences = [" ".join(s) for s in reuters.sents()[:10]]  # Use first 10 sentences as sample

# Function to extract event chains
def extract_event_chains(sentences):
    events = []
    for sentence in sentences:
        doc = nlp(sentence)
        for token in doc:
            # Identify verbs as events and their subjects/objects
            if token.pos_ == "VERB":
                subjects = [child.text for child in token.children if child.dep_ in ("nsubj", "nsubjpass")]
                objects = [child.text for child in token.children if child.dep_ in ("dobj", "pobj")]
                time = [ent.text for ent in doc.ents if ent.label_ == "DATE"]  # Temporal information
                events.append({
                    "event": token.text,
                    "subjects": subjects,
                    "objects": objects,
                    "time": time,
                    "sentence": sentence
                })
    return events

# Extract events
event_chains = extract_event_chains(sample_sentences)

# Create a directed graph for event chains
G = nx.DiGraph()

# Add nodes and edges for events and their relationships
for event in event_chains:
    event_node = f"Event: {event['event']}"
    G.add_node(event_node, type="event", sentence=event["sentence"])

    # Connect subjects and objects to the event
    for subject in event["subjects"]:
        G.add_node(subject, type="entity")
        G.add_edge(subject, event_node, relationship="acted")
    for obj in event["objects"]:
        G.add_node(obj, type="entity")
        G.add_edge(event_node, obj, relationship="acted upon")

    # Connect temporal information to the event
    for time in event["time"]:
        G.add_node(time, type="time")
        G.add_edge(time, event_node, relationship="time")

# Generate positions for nodes using a spring layout
pos = nx.spring_layout(G, seed=42)

# Create edge and node traces for Plotly
edge_x = []
edge_y = []
edge_text = []
for edge in G.edges(data=True):
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)  # Break between lines
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)
    edge_text.append(edge[2].get("relationship", "unknown"))

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=1, color='gray'),
    hoverinfo='none',
    mode='lines'
)

node_x = []
node_y = []
node_color = []
node_text = []

for node, data in G.nodes(data=True):
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)
    node_text.append(f"{node}<br>{data.get('sentence', '')}")
    node_type = data.get("type", "unknown")
    if node_type == "event":
        node_color.append("orange")  # Events
    elif node_type == "entity":
        node_color.append("lightblue")  # Entities
    elif node_type == "time":
        node_color.append("green")  # Time
    else:
        node_color.append("lightgray")  # Default color

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode="markers+text",
    marker=dict(
        size=10,
        color=node_color,
        line=dict(width=2, color="DarkSlateGrey")
    ),
    text=node_text,
    textposition="top center",
    hoverinfo="text"
)

# Create the final figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title="Narrative Event Chain Visualization",
                    showlegend=False,
                    hovermode="closest",
                    margin=dict(b=0, l=0, r=0, t=40),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False)
                ))

fig.show()


[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [45]:
import spacy
import networkx as nx
import plotly.graph_objects as go
import nltk
from nltk.corpus import reuters

# Download necessary NLTK data
nltk.download('reuters')
nltk.download('punkt')

# Load the English language model from spaCy
nlp = spacy.load("en_core_web_sm")

# Function to extract causal relationships from sentences
def extract_causal_relationships(sentences):
    causal_phrases = ["because", "due to", "led to", "resulted in", "caused by"]
    events = []
    relationships = []

    for sentence in sentences:
        doc = nlp(sentence)
        # Split sentence by causal phrases
        for phrase in causal_phrases:
            if phrase in sentence.lower():
                parts = sentence.lower().split(phrase)
                if len(parts) == 2:
                    cause, effect = parts
                    events.append(cause.strip())
                    events.append(effect.strip())
                    relationships.append((cause.strip(), effect.strip()))
    return list(set(events)), relationships

# Extract sentences from Reuters dataset
sample_sentences = [" ".join(s) for s in reuters.sents()[:50]]  # Take the first 50 sentences

# Extract events and causal relationships
events, causal_links = extract_causal_relationships(sample_sentences)

# Build a Directed Acyclic Graph (DAG) for causal analysis
G = nx.DiGraph()
for event in events:
    G.add_node(event)

for cause, effect in causal_links:
    G.add_edge(cause, effect)

# Generate positions for the graph using a spring layout
pos = nx.spring_layout(G, seed=42)

# Prepare edge traces for Plotly
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=1, color='gray'),
    hoverinfo='none',
    mode='lines'
)

# Prepare node traces for Plotly
node_x = []
node_y = []
node_text = []
node_color = []

for node in G.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)
    node_text.append(node)
    node_color.append("orange")  # Default color for events

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',
    marker=dict(
        size=15,
        color=node_color,
        line=dict(width=2, color='black')
    ),
    text=node_text,
    textposition="top center",
    hoverinfo="text"
)

# Combine into a single interactive Plotly figure
fig = go.Figure(
    data=[edge_trace, node_trace],
    layout=go.Layout(
        title="Causal Analysis and Event Storytelling",
        showlegend=False,
        hovermode="closest",
        margin=dict(b=0, l=0, r=0, t=40),
        xaxis=dict(showgrid=False, zeroline=False),
        yaxis=dict(showgrid=False, zeroline=False)
    )
)

fig.show()


[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [46]:
import spacy
import networkx as nx
import plotly.graph_objects as go
import nltk
from nltk.corpus import reuters

# Download necessary NLTK data
nltk.download('reuters')
nltk.download('punkt')

# Load the English language model from spaCy
nlp = spacy.load("en_core_web_sm")

# Function to extract named entities and potential event relationships
def extract_named_entities(sentences):
    entities = []
    for sentence in sentences:
        doc = nlp(sentence)
        for ent in doc.ents:
            # Add entities with their types as tuples (text, type)
            entities.append((ent.text, ent.label_))
    return entities

# Extract sentences from the Reuters dataset
sample_sentences = [" ".join(s) for s in reuters.sents()[:10]]  # Convert list of words to a string

# Extract named entities with their types
named_entities = extract_named_entities(sample_sentences)

# Create a graph from named entities for visualization
G = nx.Graph()
for entity, entity_type in named_entities:
    # Add the type as a node attribute
    G.add_node(entity, type=entity_type)

# Add edges based on co-occurrences in the sample sentences
for sentence in sample_sentences:
    doc = nlp(sentence)
    for token in doc:
        if token.ent_type_:
            for other_token in doc:
                if other_token != token and other_token.ent_type_ and other_token.ent_type_ == token.ent_type_:
                    if not G.has_edge(token.text, other_token.text):
                        G.add_edge(token.text, other_token.text, weight=1)

# Generate positions for nodes using a spring layout
pos = nx.spring_layout(G, seed=42)

# Create edge and node traces for Plotly
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=1, color='grey'),
    hoverinfo='none',
    mode='lines'
)

# Add nodes with fixed size and custom colors based on the entity type
node_x = []
node_y = []
node_size = 10  # Fixed node size
node_color = []

for node, data in G.nodes(data=True):
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)

    # Set colors based on entity type (can be customized)
    entity_type = data.get('type', 'UNKNOWN')
    if entity_type == "PERSON":
        node_color.append('royalblue')
    elif entity_type == "ORG":
        node_color.append('lightgreen')
    elif entity_type == "GPE":
        node_color.append('orange')
    else:
        node_color.append('lightgray')

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',
    marker=dict(
        size=node_size,
        color=node_color,
        line=dict(width=2, color='DarkSlateGrey')
    ),
    text=list(G.nodes()),
    textposition='top center',
    hoverinfo='text'
)

# Create the final figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=0),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False)
                ))

fig.show()


[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [47]:
import nltk
print(nltk.data.path)

['/root/nltk_data', '/usr/nltk_data', '/usr/share/nltk_data', '/usr/lib/nltk_data', '/usr/share/nltk_data', '/usr/local/share/nltk_data', '/usr/lib/nltk_data', '/usr/local/lib/nltk_data']
