# Here are the Plotly codes for the visualizations:

**1. Most Common Relations Bar Plot**

In [None]:
import plotly.express as px

# Assuming you have a DataFrame called 'df' with the relations data
# The data from your notebook shows the most common relations are:
relations_data = {
    'relation': ["['is']", "['has']", "['are']", "['love']", "['takes']",
                 "['shows']", "['looks']", "['makes']", "['delivers']", "['have']"],
    'count': [1541, 729, 274, 268, 256, 224, 222, 190, 167, 155]
}

fig = px.bar(relations_data, x='relation', y='count',
             title='Most Common Relations',
             labels={'relation': 'Relation', 'count': 'Frequency'})
fig.update_layout(xaxis_tickangle=-45)
fig.show()

**2. Distribution of Relation Types (Pie Chart)**

In [None]:
fig = px.pie(relations_data, values='count', names='relation',
             title='Distribution of Relation Types')
fig.show()

**3. Word Cloud for Tokens (using Plotly with Wordcloud)**

In [None]:
from wordcloud import WordCloud
import plotly.graph_objects as go
from PIL import Image
import numpy as np
import pandas as pd # Import pandas

# Assuming you have a DataFrame called 'df' with the relations data
# The data from your notebook shows the most common relations are:
relations_data = {
    'relation': ["['is']", "['has']", "['are']", "['love']", "['takes']",
                 "['shows']", "['looks']", "['makes']", "['delivers']", "['have']"],
    'count': [1541, 729, 274, 268, 256, 224, 222, 190, 167, 155]
}

# --- Start of added code ---
# Create a sample DataFrame 'df' with a 'tokens' column
# Replace this with your actual data loading or DataFrame creation
data = {'tokens': [['apple', 'banana', 'orange'], ['grape', 'kiwi'], ['apple', 'orange', 'pear']]}
df = pd.DataFrame(data)
# --- End of added code ---

# Combine all tokens into one string
# Ensure the 'tokens' column contains lists or similar iterable objects
all_tokens = ' '.join(df['tokens'].explode().astype(str))

# Generate word cloud
wordcloud = WordCloud(width=800, height=400).generate(all_tokens)

# Convert to plotly figure
fig = go.Figure(go.Image(z=wordcloud.to_array()))
fig.update_layout(title='Word Cloud of Tokens',
                 xaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
                 yaxis=dict(showgrid=False, showticklabels=False, zeroline=False))
fig.show()

**4. Distribution of Source and Target Lengths**

In [None]:
# Calculate lengths
import pandas as pd # Ensure pandas is imported here if this is a separate cell

# Create a sample DataFrame with 'source' and 'target' columns
# Replace this with your actual data loading or DataFrame creation
data = {
    'source': ['apple', 'banana', 'orange', 'grape', 'kiwi'],
    'target': ['pie', 'smoothie', 'juice', 'juice', 'smoothie']
}
df = pd.DataFrame(data)

df['source_length'] = df['source'].apply(len)
df['target_length'] = df['target'].apply(len)

# Create subplots
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=2, subplot_titles=('Source Length Distribution', 'Target Length Distribution'))

fig.add_trace(go.Histogram(x=df['source_length'], nbinsx=20, name='Source Length'),
             row=1, col=1)
fig.add_trace(go.Histogram(x=df['target_length'], nbinsx=20, name='Target Length'),
             row=1, col=2)

fig.update_layout(title_text='Length Distribution of Source and Target Words')
fig.show()

**5. Interactive Scatter Plot of Source vs Target Lengths**

In [None]:
fig = px.scatter(df, x='source_length', y='target_length',
                 title='Source vs Target Lengths',
                 labels={'source_length': 'Source Word Length',
                        'target_length': 'Target Word Length'},
                 hover_data=['source', 'target'])
fig.show()

**6. Sunburst Chart of Relations and Tags**

In [None]:
# First we need to prepare the data
# Explode the tags to count occurrences

# Create a sample DataFrame with a 'tags' column for the sunburst plot
# Replace this with your actual data loading or DataFrame creation that includes tags
data_with_tags = {
    'tags': [['fruit', 'sweet'], ['fruit'], ['fruit', 'sweet'], ['fruit', 'sour'], ['fruit']]
}
df_tags = pd.DataFrame(data_with_tags) # Use a new variable name if you want to keep previous df

# Explode the tags to count occurrences
# Use df_tags instead of df if you created a new DataFrame
tags_exploded = df_tags['tags'].explode().value_counts().reset_index()
tags_exploded.columns = ['tag', 'count']

fig = px.sunburst(tags_exploded, path=['tag'], values='count',
                  title='Distribution of Tags')
fig.show()

**7.Interactive Entity-Relation Graph Visualization**

In [None]:
import networkx as nx
import plotly.graph_objects as go
import pandas as pd
import ast

# Load the dataset
df = pd.read_csv("/content/kG_nlp_transformed.csv")

# Create a directed graph
graph = nx.DiGraph()

# Add nodes and edges
for _, row in df.iterrows():
    source = row["source"]
    target = row["target"]
    # Safely evaluate the relation string
    try:
        relations = ast.literal_eval(row["relation"])
        if not isinstance(relations, list):
            relations = [str(relations)]  # Convert to list if not already
    except (ValueError, SyntaxError):
        relations = ["unknown_relation"]  # Default value for malformed data
    relation = " ".join(map(str, relations)).strip()  # Join relations into a single string
    graph.add_node(source, color="skyblue")  # Source node with color
    graph.add_node(target, color="lightgreen")  # Target node with different color
    graph.add_edge(source, target, label=relation)  # Add edge with relation label

# Generate positions for nodes using NetworkX's spring layout
pos = nx.spring_layout(graph, seed=42)

# Extract node coordinates and colors
node_x = []
node_y = []
node_colors = []
node_text = []
for node in graph.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)
    node_colors.append(graph.nodes[node]["color"])
    node_text.append(node)

# Create node trace
node_trace = go.Scatter(
    x=node_x,
    y=node_y,
    mode="markers+text",
    text=node_text,
    textposition="top center",
    hoverinfo="text",
    marker=dict(
        size=20,
        color=node_colors,
        line=dict(width=2, color="black")
    )
)

# Extract edge coordinates and labels
edge_x = []
edge_y = []
edge_text = []
edge_positions = []
for edge in graph.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])
    # Calculate midpoint for edge label
    mid_x = (x0 + x1) / 2
    mid_y = (y0 + y1) / 2
    edge_text.append(graph.edges[edge]["label"])
    edge_positions.append([mid_x, mid_y])

# Create edge trace
edge_trace = go.Scatter(
    x=edge_x,
    y=edge_y,
    line=dict(width=1, color="gray"),
    hoverinfo="none",
    mode="lines"
)

# Create edge label trace
edge_label_trace = go.Scatter(
    x=[pos[0] for pos in edge_positions],
    y=[pos[1] for pos in edge_positions],
    mode="text",
    text=edge_text,
    textfont=dict(size=9, color="darkred"),
    hoverinfo="text"
)

# Create the figure
fig = go.Figure(
    data=[edge_trace, node_trace, edge_label_trace],
    layout=go.Layout(
        title="Enhanced Entity-Relation Graph",
        titlefont=dict(size=16, family="Arial, bold"),
        showlegend=False,
        hovermode="closest",
        margin=dict(b=20, l=5, r=5, t=40),
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        paper_bgcolor="white",
        plot_bgcolor="white"
    )
)

# Save the plot as an HTML file for interactivity
fig.write_html("entity_relation_graph.html")

# Display the plot (optional, depending on environment)
fig.show()