Generate graphs

Graph 1:

In [None]:
import networkx as nx
import pandas as pd
import pickle
import os

# Upload the Excel file to Google Colab and get the file path
uploaded_file_path = '/content/Network.xlsx'
excel_file = os.path.abspath(uploaded_file_path)
sheet_name = 'Gene-Diseasenetwork'

# Read the Excel file and the first sheet
df = pd.read_excel(excel_file, sheet_name=sheet_name)

# Create an empty graph
G_gene_disease = nx.Graph()

# Get unique genes from column 1
genes = df.iloc[:, 0].unique()

# Get unique diseases from column 3
diseases = df.iloc[:, 2].unique()

# Iterate over the rows of the dataframe
for index, row in df.iterrows():
    node_a = row.iloc[0]  # Get value from the first column
    node_c = row.iloc[2]  # Get value from the third column

    # Add nodes and edges to the graph
    G_gene_disease.add_node(node_a)
    G_gene_disease.add_node(node_c)
    G_gene_disease.add_edge(node_a, node_c)

# Save the graph as a database file
output_path = '/content/gene_disease_network.pkl'
with open(output_path, 'wb') as f:
    pickle.dump(G_gene_disease, f)


Graph 2:

In [None]:
import networkx as nx
import pandas as pd
import pickle
import matplotlib.pyplot as plt

# Define the file path and sheet name
excel_file = '/content/Network.xlsx'
sheet_name = 'Shared gene-disease network'

# Read the Excel file and the specified sheet
df = pd.read_excel(excel_file, sheet_name=sheet_name)

# Create an empty graph
G_shared_network = nx.Graph()

# Iterate over the rows of the dataframe
for index, row in df.iterrows():
    node_a = row.iloc[0]  # Get value from the first column
    node_b = row.iloc[1]  # Get value from the second column
    weight = row.iloc[2]  # Get value from the third column

    # Add nodes and edges to the graph
    G_shared_network.add_node(node_a)
    G_shared_network.add_node(node_b)
    G_shared_network.add_edge(node_a, node_b, weight=weight)

# Create a figure and axis
fig, ax = plt.subplots(figsize=(12, 8))

# Position the nodes using a spring layout
pos = nx.spring_layout(G_shared_network, seed=42, k=0.3)

# Draw the nodes and edges
edge_weights = [G_shared_network[u][v]['weight'] for u, v in G_shared_network.edges()]
node_colors = ['lightblue' for node in G_shared_network.nodes()]
nx.draw_networkx_nodes(G_shared_network, pos, ax=ax, node_size=200, node_color=node_colors)
nx.draw_networkx_edges(G_shared_network, pos, ax=ax, edge_color='gray', alpha=0.7)
nx.draw_networkx_labels(G_shared_network, pos, font_size=6)

# Adjust the plot limits for better visibility
ax.margins(0.2)

# Remove the axis labels
ax.set_xticks([])
ax.set_yticks([])

# Remove the surrounding box
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)

# Save the graph as a pickle file
output_path = '/content/shared_gene_disease_network.pkl'
with open(output_path, 'wb') as f:
    pickle.dump(G_shared_network, f)


Graph 3:

In [None]:
import networkx as nx
import pandas as pd
import pickle
import matplotlib.pyplot as plt

# Define the file path and sheet name
excel_file = '/content/Network.xlsx'
sheet_name = 'PPI network String'

# Read the Excel file and the specified sheet
df = pd.read_excel(excel_file, sheet_name=sheet_name)

# Create an empty graph
G_ppi_network = nx.Graph()

# Iterate over the rows of the dataframe
for index, row in df.iterrows():
    node_a = row.iloc[0]  # Get value from the first column
    node_b = row.iloc[2]  # Get value from the third column

    # Add nodes and edges to the graph
    G_ppi_network.add_node(node_a)
    G_ppi_network.add_node(node_b)
    G_ppi_network.add_edge(node_a, node_b)

# Create a figure and axis
fig, ax = plt.subplots(figsize=(12, 8))

# Position the nodes using a spring layout with increased k value
pos = nx.spring_layout(G_ppi_network, seed=42, k=0.3)

# Draw the nodes and edges
node_colors = 'lightblue'
nx.draw_networkx_nodes(G_ppi_network, pos, ax=ax, node_size=200, node_color=node_colors)
nx.draw_networkx_edges(G_ppi_network, pos, ax=ax, edge_color='gray', alpha=0.7)
nx.draw_networkx_labels(G_ppi_network, pos, font_size=8)

# Adjust the plot limits for better visibility
ax.margins(0.15)

# Remove the axis labels
ax.set_xticks([])
ax.set_yticks([])

# Remove the surrounding box
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)

# Save the graph as a pickle file
output_path = '/content/ppi_network_string.pkl'
with open(output_path, 'wb') as f:
    pickle.dump(G_ppi_network, f)


Graph 4:

In [None]:
import networkx as nx
import pandas as pd
import pickle
import matplotlib.pyplot as plt

# Define the file path and sheet name
excel_file = '/content/Network.xlsx'
sheet_name = 'Expanded PGx biomarker interact'

# Read the Excel file and the specified sheet
df = pd.read_excel(excel_file, sheet_name=sheet_name)

# Create an empty graph
G_expanded_pgx = nx.Graph()

# Iterate over the rows of the dataframe
for index, row in df.iterrows():
    node_a = row.iloc[0]  # Get value from the first column
    node_b = row.iloc[2]  # Get value from the third column

    # Add nodes and edges to the graph
    G_expanded_pgx.add_node(node_a)
    G_expanded_pgx.add_node(node_b)
    G_expanded_pgx.add_edge(node_a, node_b)

# Create a figure and axis
fig, ax = plt.subplots(figsize=(12, 8))

# Position the nodes using a spring layout
pos = nx.spring_layout(G_expanded_pgx, seed=42)

# Draw the nodes and edges
node_colors = 'lightblue'
nx.draw_networkx_nodes(G_expanded_pgx, pos, ax=ax, node_size=200, node_color=node_colors)
nx.draw_networkx_edges(G_expanded_pgx, pos, ax=ax, edge_color='gray', alpha=0.7)
nx.draw_networkx_labels(G_expanded_pgx, pos, font_size=8)

# Adjust the plot limits for better visibility
ax.margins(0.15)

# Remove the axis labels
ax.set_xticks([])
ax.set_yticks([])

# Remove the surrounding box
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)

# Save the graph as a pickle file
output_path = '/content/expanded_pgx_biomarker_interact.pkl'
with open(output_path, 'wb') as f:
    pickle.dump(G_expanded_pgx, f)


Prompt generation

In [None]:
import random
import networkx as nx
import pickle

# Load the knowledge graph from the pickle file
with open('gene_disease_network.pkl', 'rb') as file:
    knowledge_graph = pickle.load(file)

# Set the zero-hop entities (V0) based on the mentioned entities in the article
# Replace the following list with the entities mentioned in your article
zero_hop_entities = ['Cholesterol', 'LDL cholesterol']

# Grow zero-hop entities to include one-hop entities (V1) and two-hop entities (V2)
one_hop_entities = set()
two_hop_entities = set()

for entity in zero_hop_entities:
    if entity in knowledge_graph:
        one_hop_entities.update(knowledge_graph[entity])
        for one_hop_entity in knowledge_graph[entity]:
            if one_hop_entity in knowledge_graph:
                two_hop_entities.update(knowledge_graph[one_hop_entity])

# Create the central graph (GC) using V0 and V1
central_graph = nx.Graph()
central_graph.add_nodes_from(zero_hop_entities)
central_graph.add_nodes_from(one_hop_entities)

for entity in zero_hop_entities:
    if entity in knowledge_graph:
        central_graph.add_edges_from([(entity, one_hop_entity) for one_hop_entity in knowledge_graph[entity]])

# Create the multi-hop graph (GM) using V1 and V2
multi_hop_graph = nx.Graph()
multi_hop_graph.add_nodes_from(one_hop_entities)
multi_hop_graph.add_nodes_from(two_hop_entities)

for entity in one_hop_entities:
    if entity in knowledge_graph:
        multi_hop_graph.add_edges_from([(entity, two_hop_entity) for two_hop_entity in knowledge_graph[entity]])

# Perform Random Walk
def perform_random_walk(graph, start_node, num_steps):
    random_walk = [start_node]
    current_node = start_node
    for _ in range(num_steps):
        neighbors = list(graph.neighbors(current_node))  # Assuming the graph is represented as an adjacency list
        if not neighbors:
            break
        next_node = random.choice(neighbors)
        random_walk.append(next_node)
        current_node = next_node
    return random_walk

# Generate Keywords from Random Walks
num_steps = 5  # Number of steps for the random walk
num_prompts = 10  # Number of prompts to generate

prompt_templates = [
    "Discuss the relationship between {} and {} in the context of your topic of interest.",
    "Explain the impact of {} on {} in the context of your topic of interest.",
    "Describe the role of {} in relation to {} in the context of your topic of interest.",
    "Provide an overview of the interactions between {} and {} in the context of your topic of interest.",
    "Investigate the correlation between {} and {} in the context of your topic of interest.",
    "Elaborate on the significance of {} with respect to {} in the context of your topic of interest.",
    "Examine the implications of {} on {} in the context of your topic of interest.",
    "Analyze the associations between {} and {} in the context of your topic of interest.",
    "Evaluate the connections between {} and {} in the context of your topic of interest.",
    "Explore the interplay between {} and {} in the context of your topic of interest."
]

generated_prompts1 = []  # List to store the generated prompts

for _ in range(num_prompts):
    central_start_node = random.choice(list(central_graph))  # Start node for the central graph random walk
    multi_hop_start_node = random.choice(list(multi_hop_graph))  # Start node for the multi-hop graph random walk

    central_random_walk = perform_random_walk(central_graph, central_start_node, num_steps)
    multi_hop_random_walk = perform_random_walk(multi_hop_graph, multi_hop_start_node, num_steps)

    central_keyword = central_random_walk[-1]  # Last node of the central graph random walk
    multi_hop_keyword = multi_hop_random_walk[-1]  # Last node of the multi-hop graph random walk

    # Select a random prompt template
    prompt_template = random.choice(prompt_templates)

    # Generate Prompt
    prompt = prompt_template.format(central_keyword, multi_hop_keyword)

    generated_prompts1.append(prompt)  # Save the generated prompt to the list

# Print the generated prompts
for prompt in generated_prompts1:
    print("Generated Prompt:")
    print(prompt)
    print("---------------------------")


In [None]:
import random
import networkx as nx
import pickle

# Load the knowledge graph from the pickle file
shared_gene_disease_network_path = '/content/shared_gene_disease_network.pkl'
with open(shared_gene_disease_network_path, 'rb') as file:
    knowledge_graph = pickle.load(file)

# Set the zero-hop entities (V0) based on the mentioned entities in the article
zero_hop_entities = ['UGT1A1', 'G6PD']

# Grow zero-hop entities to include one-hop entities (V1) and two-hop entities (V2)
one_hop_entities = set()
two_hop_entities = set()

for entity in zero_hop_entities:
    if entity in knowledge_graph:
        one_hop_entities.update(knowledge_graph[entity])
        for one_hop_entity in knowledge_graph[entity]:
            if one_hop_entity in knowledge_graph:
                two_hop_entities.update(knowledge_graph[one_hop_entity])

# Create the central graph (GC) using V0 and V1
central_graph = nx.Graph()
central_graph.add_nodes_from(zero_hop_entities)
central_graph.add_nodes_from(one_hop_entities)

for entity in zero_hop_entities:
    if entity in knowledge_graph:
        central_graph.add_edges_from([(entity, one_hop_entity) for one_hop_entity in knowledge_graph[entity]])

# Create the multi-hop graph (GM) using V1 and V2
multi_hop_graph = nx.Graph()
multi_hop_graph.add_nodes_from(one_hop_entities)
multi_hop_graph.add_nodes_from(two_hop_entities)

for entity in one_hop_entities:
    if entity in knowledge_graph:
        multi_hop_graph.add_edges_from([(entity, two_hop_entity) for two_hop_entity in knowledge_graph[entity]])

# Perform Random Walk
def perform_random_walk(graph, start_node, num_steps):
    random_walk = [start_node]
    current_node = start_node
    for _ in range(num_steps):
        neighbors = list(graph.neighbors(current_node))  # Assuming the graph is represented as an adjacency list
        if not neighbors:
            break
        next_node = random.choice(neighbors)
        random_walk.append(next_node)
        current_node = next_node
    return random_walk

# Generate Keywords from Random Walks
num_steps = 5  # Number of steps for the random walk
num_prompts = 10  # Number of prompts to generate

prompt_templates = [
    "Discuss how the relationship between {} and {} shapes the dynamics within your topic of interest.",
    "Compare and contrast the impact of {} and {} on your topic of interest, highlighting their distinct influences.",
    "Provide an overview of the evolving roles of {} and {} and their interplay within your topic of interest.",
    "Explore the potential future interactions between {} and {} and their implications for your topic of interest.",
    "Investigate the contrasting viewpoints on the correlation between {} and {} and analyze their relevance to your topic of interest.",
    "Examine the significance of {} in relation to {} and its implications for your topic of interest.",
    "Analyze the associations between {} and {} and evaluate their effects on your topic of interest.",
    "Evaluate the connections between {} and {} and their respective contributions to your topic of interest.",
    "Explore the interplay between {} and {} in different contexts and its impact on your topic of interest.",
    "Discuss the controversies surrounding the relationship between {} and {} and present your assessment in the context of your topic of interest."
]

generated_prompts2 = []  # List to store the generated prompts

for _ in range(num_prompts):
    central_start_node = random.choice(list(central_graph))  # Start node for the central graph random walk
    multi_hop_start_node = random.choice(list(multi_hop_graph))  # Start node for the multi-hop graph random walk

    central_random_walk = perform_random_walk(central_graph, central_start_node, num_steps)
    multi_hop_random_walk = perform_random_walk(multi_hop_graph, multi_hop_start_node, num_steps)

    central_keyword = central_random_walk[-1]  # Last node of the central graph random walk
    multi_hop_keyword = multi_hop_random_walk[-1]  # Last node of the multi-hop graph random walk

    # Select a random prompt template
    prompt_template = random.choice(prompt_templates)

    # Generate Prompt
    prompt = prompt_template.format(central_keyword, multi_hop_keyword)

    generated_prompts2.append(prompt)  # Save the generated prompt to the list

# Print the generated prompts
for prompt in generated_prompts2:
    print("Generated Prompt:")
    print(prompt)
    print("---------------------------")


In [None]:
import random
import networkx as nx
import pickle

# Load the knowledge graph from the pickle file
ppi_network_string_path = '/content/ppi_network_string.pkl'
with open(ppi_network_string_path, 'rb') as file:
    knowledge_graph = pickle.load(file)

# Set the zero-hop entities (V0) based on the mentioned entities in the article
zero_hop_entities = ['CYP3A4', 'CYP2C9']

# Grow zero-hop entities to include one-hop entities (V1) and two-hop entities (V2)
one_hop_entities = set()
two_hop_entities = set()

for entity in zero_hop_entities:
    if entity in knowledge_graph:
        one_hop_entities.update(knowledge_graph[entity])
        for one_hop_entity in knowledge_graph[entity]:
            if one_hop_entity in knowledge_graph:
                two_hop_entities.update(knowledge_graph[one_hop_entity])

# Create the central graph (GC) using V0 and V1
central_graph = nx.Graph()
central_graph.add_nodes_from(zero_hop_entities)
central_graph.add_nodes_from(one_hop_entities)

for entity in zero_hop_entities:
    if entity in knowledge_graph:
        central_graph.add_edges_from([(entity, one_hop_entity) for one_hop_entity in knowledge_graph[entity]])

# Create the multi-hop graph (GM) using V1 and V2
multi_hop_graph = nx.Graph()
multi_hop_graph.add_nodes_from(one_hop_entities)
multi_hop_graph.add_nodes_from(two_hop_entities)

for entity in one_hop_entities:
    if entity in knowledge_graph:
        multi_hop_graph.add_edges_from([(entity, two_hop_entity) for two_hop_entity in knowledge_graph[entity]])

# Perform Random Walk
def perform_random_walk(graph, start_node, num_steps):
    random_walk = [start_node]
    current_node = start_node
    for _ in range(num_steps):
        neighbors = list(graph.neighbors(current_node))  # Assuming the graph is represented as an adjacency list
        if not neighbors:
            break
        next_node = random.choice(neighbors)
        random_walk.append(next_node)
        current_node = next_node
    return random_walk

# Generate Keywords from Random Walks
num_steps = 5  # Number of steps for the random walk
num_prompts = 10  # Number of prompts to generate

prompt_templates = [
    "Discuss how the relationship between {} and {} shapes the dynamics within your topic of interest.",
    "Compare and contrast the impact of {} and {} on your topic of interest, highlighting their distinct influences.",
    "Provide an overview of the evolving roles of {} and {} and their interplay within your topic of interest.",
    "Explore the potential future interactions between {} and {} and their implications for your topic of interest.",
    "Investigate the contrasting viewpoints on the correlation between {} and {} and analyze their relevance to your topic of interest.",
    "Examine the significance of {} in relation to {} and its implications for your topic of interest.",
    "Analyze the associations between {} and {} and evaluate their effects on your topic of interest.",
    "Evaluate the connections between {} and {} and their respective contributions to your topic of interest.",
    "Explore the interplay between {} and {} in different contexts and its impact on your topic of interest.",
    "Discuss the controversies surrounding the relationship between {} and {} and present your assessment in the context of your topic of interest."
]

generated_prompts3 = []  # List to store the generated prompts

for _ in range(num_prompts):
    central_start_node = random.choice(list(central_graph))  # Start node for the central graph random walk
    multi_hop_start_node = random.choice(list(multi_hop_graph))  # Start node for the multi-hop graph random walk

    central_random_walk = perform_random_walk(central_graph, central_start_node, num_steps)
    multi_hop_random_walk = perform_random_walk(multi_hop_graph, multi_hop_start_node, num_steps)

    central_keyword = central_random_walk[-1]  # Last node of the central graph random walk
    multi_hop_keyword = multi_hop_random_walk[-1]  # Last node of the multi-hop graph random walk

    # Select a random prompt template
    prompt_template = random.choice(prompt_templates)

    # Generate Prompt
    prompt = prompt_template.format(central_keyword, multi_hop_keyword)

    generated_prompts3.append(prompt)  # Save the generated prompt to the list

# Print the generated prompts
for prompt in generated_prompts3:
    print("Generated Prompt:")
    print(prompt)
    print("---------------------------")


In [None]:
import random
import networkx as nx
import pickle

# Load the knowledge graph from the pickle file
expanded_pgx_biomarker_network_path = '/content/expanded_pgx_biomarker_interact.pkl'
with open(expanded_pgx_biomarker_network_path, 'rb') as file:
    knowledge_graph = pickle.load(file)

# Set the zero-hop entities (V0) based on the mentioned entities in the article
zero_hop_entities = ['CYP3A4', 'CYP2C9']

# Grow zero-hop entities to include one-hop entities (V1) and two-hop entities (V2)
one_hop_entities = set()
two_hop_entities = set()

for entity in zero_hop_entities:
    if entity in knowledge_graph:
        one_hop_entities.update(knowledge_graph[entity])
        for one_hop_entity in knowledge_graph[entity]:
            if one_hop_entity in knowledge_graph:
                two_hop_entities.update(knowledge_graph[one_hop_entity])

# Create the central graph (GC) using V0 and V1
central_graph = nx.Graph()
central_graph.add_nodes_from(zero_hop_entities)
central_graph.add_nodes_from(one_hop_entities)

for entity in zero_hop_entities:
    if entity in knowledge_graph:
        central_graph.add_edges_from([(entity, one_hop_entity) for one_hop_entity in knowledge_graph[entity]])

# Create the multi-hop graph (GM) using V1 and V2
multi_hop_graph = nx.Graph()
multi_hop_graph.add_nodes_from(one_hop_entities)
multi_hop_graph.add_nodes_from(two_hop_entities)

for entity in one_hop_entities:
    if entity in knowledge_graph:
        multi_hop_graph.add_edges_from([(entity, two_hop_entity) for two_hop_entity in knowledge_graph[entity]])

# Perform Random Walk
def perform_random_walk(graph, start_node, num_steps):
    random_walk = [start_node]
    current_node = start_node
    for _ in range(num_steps):
        neighbors = list(graph.neighbors(current_node))  # Assuming the graph is represented as an adjacency list
        if not neighbors:
            break
        next_node = random.choice(neighbors)
        random_walk.append(next_node)
        current_node = next_node
    return random_walk

# Generate Keywords from Random Walks
num_steps = 5  # Number of steps for the random walk
num_prompts = 10  # Number of prompts to generate

prompt_templates = [
    "Discuss how the relationship between {} and {} shapes the dynamics within your topic of interest.",
    "Compare and contrast the impact of {} and {} on your topic of interest, highlighting their distinct influences.",
    "Provide an overview of the evolving roles of {} and {} and their interplay within your topic of interest.",
    "Explore the potential future interactions between {} and {} and their implications for your topic of interest.",
    "Investigate the contrasting viewpoints on the correlation between {} and {} and analyze their relevance to your topic of interest.",
    "Examine the significance of {} in relation to {} and its implications for your topic of interest.",
    "Analyze the associations between {} and {} and evaluate their effects on your topic of interest.",
    "Evaluate the connections between {} and {} and their respective contributions to your topic of interest.",
    "Explore the interplay between {} and {} in different contexts and its impact on your topic of interest.",
    "Discuss the controversies surrounding the relationship between {} and {} and present your assessment in the context of your topic of interest."
]

generated_prompts4 = []  # List to store the generated prompts

for _ in range(num_prompts):
    central_start_node = random.choice(list(central_graph))  # Start node for the central graph random walk
    multi_hop_start_node = random.choice(list(multi_hop_graph))  # Start node for the multi-hop graph random walk

    central_random_walk = perform_random_walk(central_graph, central_start_node, num_steps)
    multi_hop_random_walk = perform_random_walk(multi_hop_graph, multi_hop_start_node, num_steps)

    central_keyword = central_random_walk[-1]  # Last node of the central graph random walk
    multi_hop_keyword = multi_hop_random_walk[-1]  # Last node of the multi-hop graph random walk

    # Select a random prompt template
    prompt_template = random.choice(prompt_templates)

    # Generate Prompt
    prompt = prompt_template.format(central_keyword, multi_hop_keyword)

    generated_prompts4.append(prompt)  # Save the generated prompt to the list

# Print the generated prompts
for prompt in generated_prompts4:
    print("Generated Prompt:")
    print(prompt)
    print("---------------------------")


Generate query

In [None]:

!pip install transformers
!pip install nltk

import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import nltk
from nltk.translate.bleu_score import corpus_bleu
import numpy as np
from collections import Counter

def calculate_entropy(queries, tokenizer):
    all_tokens = [tokenizer.encode(prompt, add_special_tokens=False) for prompt in queries]
    all_tokens_flat = [token for sublist in all_tokens for token in sublist]
    token_counts = Counter(all_tokens_flat)
    total_tokens = len(all_tokens_flat)
    token_probabilities = [count / total_tokens for count in token_counts.values()]
    entropy = -np.sum(token_probabilities * np.log2(token_probabilities))
    return entropy

def generate_queries(generated_prompts1, generated_prompts2, generated_prompts3, generated_prompts4):
    # Concatenate the generated prompts into a single list
    all_prompts = generated_prompts1 + generated_prompts2 + generated_prompts3 + generated_prompts4

    # Initialize the tokenizer and model
    tokenizer = T5Tokenizer.from_pretrained("t5-base")
    model = T5ForConditionalGeneration.from_pretrained("t5-base")

    # Generate queries based on the concatenated prompts
    queries = []

    for prompt in all_prompts:
        # Tokenize the prompt
        input_ids = tokenizer.encode(prompt, return_tensors="pt")

        # Generate query using the model with beam search
        with torch.no_grad():
            outputs = model.generate(
                input_ids,
                max_length=50,
                num_beams=5,  # Set the number of beams for beam search
                num_return_sequences=1,
                do_sample=False,  # Disable sampling
            )

        # Decode the generated query
        query = tokenizer.decode(outputs[0], skip_special_tokens=True)
        queries.append(query)

    # Perform post-processing on the generated queries
    filtered_queries = list(set(queries))  # Remove duplicates

    # Calculate Self-BLEU
    references = [filtered_queries] * len(filtered_queries)  # Treat each generated query as a reference
    self_bleu = corpus_bleu(references, filtered_queries)

    # Calculate Distinct Uni-grams and Distinct Bi-grams
    all_unigrams = [token for prompt in filtered_queries for token in prompt.split()]
    all_bigrams = [" ".join(bigram) for prompt in filtered_queries for bigram in nltk.bigrams(prompt.split())]
    distinct_unigrams = len(set(all_unigrams)) / len(all_unigrams)
    distinct_bigrams = len(set(all_bigrams)) / len(all_bigrams)

    # Calculate entropy
    entropy = calculate_entropy(filtered_queries, tokenizer)

    # Print the Self-BLEU score, Distinct Uni-grams, Distinct Bi-grams, and entropy
    print("Self-BLEU:", self_bleu)
    print("Distinct Uni-grams:", distinct_unigrams)
    print("Distinct Bi-grams:", distinct_bigrams)
    print("Entropy:", entropy)

if __name__ == "__main__":
    # Replace the following lists with your generated prompts
    generated_prompts1 =  generated_prompts1
    generated_prompts2 =  generated_prompts2
    generated_prompts3 =  generated_prompts3
    generated_prompts4 =  generated_prompts4

    generate_queries(generated_prompts1, generated_prompts2, generated_prompts3, generated_prompts4)
