In [1]:
import pandas as pd
import json
import os
import re
from dotenv import load_dotenv
from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.llms import HuggingFaceHub
from langchain.chains import GraphCypherQAChain
from langchain.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
load_dotenv(dotenv_path=".env")
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# # Function to extract the year and clean the title
# def separate_title_and_year(title):
#     # Use regex to find the year in parentheses
#     year_match = re.search(r'\((\d{4})\)$', title)
    
#     # If a match is found, extract the year and clean the title
#     if year_match:
#         year = year_match.group(1)
#         # Remove the year from the title
#         cleaned_title = re.sub(r'\(\d{4}\)$', '', title).strip()
#     else:
#         year = None
#         cleaned_title = title
    
#     return cleaned_title, year


In [3]:
# # Load the first 500 rows of each dataset
# ratings = pd.read_csv('data/ratings.csv', usecols=['userId', 'movieId', 'rating'], nrows=500)
# movies = pd.read_csv('data/movies.csv', nrows=500)
# tags = pd.read_csv('data/tags.csv', usecols=['userId', 'movieId', 'tag'], nrows=500)  # Optional

# movies['title'], movies['year'] = zip(*movies['title'].apply(separate_title_and_year))

# # Merge ratings with movies based on 'movieId'
# df = pd.merge(ratings, movies, on='movieId', how='inner')

# # Optional: Merge tags if needed (without timestamps)
# # df = pd.merge(merged_data, tags, on=['userId', 'movieId'], how='left')

# # Now you have the merged dataset for recommendation (limited to 500 rows)
# df.head()

In [4]:
# df['genres_list'] = df['genres'].str.split('|')

# # Step 2: Explode the list into multiple rows (one genre per row per movie)
# movies_exploded = df.explode('genres_list').reset_index(drop=True)
# movies_exploded["genres"] = movies_exploded["genres_list"]
# movies_exploded.drop(["genres_list"], axis=1, inplace=True)

In [5]:
# movies_exploded.head()

In [6]:
gemini_api = os.getenv("GOOGLE_API_KEY")
hugging_face_api = os.getenv("HF_API_KEY")
gpt_api = os.getenv("GPT_API_KEY_IE")
anthropic_api = os.getenv("ANTHROPIC_API_KEY")
neo4j_url = os.getenv("NEO4J_CONNECTION_URL")
neo4j_user = os.getenv("NEO4J_USER")
neo4j_password = os.getenv("NEO4J_PASSWORD")
neo4j_db = os.getenv("NEO4J_MOVIE")

# graph = Neo4jGraph(neo4j_url, neo4j_user, neo4j_password, database=neo4j_db)

neo4j_db = os.getenv("NEO4J_SCHOLAR")
graph = Neo4jGraph(neo4j_url, neo4j_user, neo4j_password, database=neo4j_db)


In [7]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Paper {citations: INTEGER, title: STRING, topic: STRING, year: FLOAT}
Author {name: STRING}
Discipline {name: STRING}
Venue {name: STRING}
Relationship properties:

The relationships:
(:Paper)-[:BELONGS_TO]->(:Discipline)
(:Paper)-[:PUBLISHED_IN]->(:Venue)
(:Author)-[:AUTHORED]->(:Paper)


In [6]:
example = [
    {
        "question": "Give me 10 movies similar to 'Forrest Gump' released in 1990 or later.",
        "query": "MATCH (m:Movie {title: 'Forrest Gump'})-[:HAS_GENRE]->(g:Genre)<-[:HAS_GENRE] -(similar:Movie) WHERE m <> similar AND similar.year >= 1990 RETURN similar.title AS SimilarMovies, COUNT(g) AS SharedGenres ORDER BY SharedGenres DESC LIMIT 10",
    },
    {
        "question": "What are the ratings for 'Forrest Gump'?",
        "query": "MATCH (m:Movie {title: 'Forrest Gump'})<-[r:RATED]-(u:User) RETURN u.userId AS UserId, r.rating AS Rating",
    },
    {
        "question": "What is the average rating for 'Forrest Gump'?",
        "query": "MATCH (m:Movie {title: 'Forrest Gump'})<-[r:RATED]-(u:User) RETURN AVG(r.rating) AS AverageRating",
    },
    {
        "question": "Find all movies released in 1995.",
        "query": "MATCH (m:Movie) WHERE m.year = 1995RETURN m.title AS MoviesReleasedIn1995",
    },
    {
        "question": "What are the genres of 'Pocahontas'?",
        "query": "MATCH (m:Movie {title: 'Pocahontas'})-[:HAS_GENRE]->(g:Genre) RETURN g.name AS Genres",
    },
    {
        "question": "How many movies are in the 'Comedy' genre?",
        "query": "MATCH (m:Movie)-[:HAS_GENRE]->(g:Genre {name: 'Comedy'}) RETURN COUNT(m) AS ComedyMovieCount",
    },
    {
        "question": "List all users who rated movies in the 'Drama' genre.",
        "query": "MATCH (m:Movie)-[:HAS_GENRE]->(g:Genre {name: 'Drama'})<-[:RATED]-(u:User)RETURN DISTINCT u.userId AS UsersWhoRatedDrama",
    },
    {
        "question": "Find all movies rated above 4.0.",
        "query": "MATCH (m:Movie)<-[r:RATED]-(u:User) WHERE r.rating > 4.0 RETURN m.title AS HighRatedMovies, AVG(r.rating) AS AverageRating",
    },
    {
        "question": "Find movies with 'City' in the title.",
        "query": " MATCH (m:Movie) WHERE m.title CONTAINS 'City' RETURN m.title AS MoviesWithCityInTitle",
    },
    {
        "question": "What companies do workers named John work in?",
        "query": "MATCH (p:Person {name: 'John'})-[:WORKS_IN]->(c:Company) RETURN c.name",
    },
    {
        "question": "What are the ratings for 'Eat Drink Man Woman (Yin shi nan nu)'?",
        "query": "MATCH (m:Movie {title: 'Eat Drink Man Woman (Yin shi nan nu)'})<-[r:RATED]-(u:User) RETURN u.userId AS UserId, r.rating AS Rating",
    },
    {
        "question": "Which movies have the most shared genres with 'While You Were Sleeping'?",
        "query": "MATCH (m:Movie {title: 'While You Were Sleeping'})-[:HAS_GENRE]->(g:Genre)<-[:HAS_GENRE]-(similar:Movie) WHERE m <> similar RETURN similar.title AS SimilarMovies, COUNT(g) AS SharedGenres ORDER BY SharedGenres DESC LIMIT 10",
    },
    {
        "question": "What is the highest rated movie?",
        "query": "MATCH (m:Movie)<-[r:RATED]-(u:User) RETURN m.title AS HighestRatedMovie, AVG(r.rating) AS AverageRating ORDER BY AverageRating DESC LIMIT 1",
    },
    {
        "question": "Find all movies that are both 'Comedy' and 'Romance'.",
        "query": "MATCH (m:Movie)-[:HAS_GENRE]->(g:Genre) WHERE g.name IN ['Comedy', 'Romance'] RETURN m.title AS MoviesThatAreComedyAndRomance",
    },
    {
        "question": "How many users rated 'French Kiss'?",
        "query": "MATCH (m:Movie {title: 'French Kiss'})<-[:RATED]-(u:User) RETURN COUNT(u) AS NumberOfUsersWhoRatedFrenchKiss",
    },
    {
        "question": "What movies did user 1 rate?",
        "query": "MATCH (u:User {userId: 1})-[r:RATED]->(m:Movie) RETURN m.title AS MoviesRatedByUser1",
    },
    {
        "question": "Find all movies in the 'True Lies' genre.",
        "query": "MATCH (m:Movie)-[:HAS_GENRE]->(g:Genre {name: 'True Lies'}) RETURN m.title AS MoviesInTrueLiesGenre",
    },
    {
        "question": "List users who rated movies released in 1995.",
        "query": "MATCH (m:Movie)<-[r:RATED]-(u:User) WHERE m.year = 1995 RETURN DISTINCT u.userId AS UsersWhoRatedMoviesIn1995",
    },
    {
        "question": "How many movies has each user rated?",
        "query": "MATCH (u:User)-[r:RATED]->(m:Movie) RETURN u.userId AS UserId, COUNT(r) AS RatedMoviesCount",
    },
    {
        "question": "What are the genres of 'Hot Shots! Part Deux'?",
        "query": " MATCH (m:Movie {title: 'Hot Shots! Part Deux'})-[:HAS_GENRE]->(g:Genre) RETURN g.name AS Genres",
    },
    {
        "question": "I like the movie 'True Lies'. give me 5 movie similar to this one with their genres",
        "query": "MATCH (m:Movie {title: 'True Lies'})-[:HAS_GENRE]->(g:Genre) WITH g MATCH (other:Movie)-[:HAS_GENRE]->(g) WHERE other.title <> 'True Lies' WITH other, collect(g.name) as genres, COUNT(g) as genreCount ORDER BY genreCount DESC RETURN other.title as Movie, genres as Genres LIMIT 5"
    },
    {
        "question": "I liked the movie 'Braveheart'. What are 5 similar movies I should watch?",
        "query": "MATCH (m:Movie {title: 'Braveheart'})-[:HAS_GENRE]->(g:Genre) WITH g MATCH (other:Movie)-[:HAS_GENRE]->(g) WHERE other.title <> 'Braveheart' WITH other, COLLECT(g.name) as genres, COUNT(g) as genreCount ORDER BY genreCount DESC RETURN other.title as SimilarMovie, genres LIMIT 5",
    },
    {
        "question": "I like 'Drama' and 'Romance'. I have watched 'Sense and Sensibility' and 'Leaving Las Vegas'. What should I watch next?",
        "query": "MATCH (x:Movie {title: 'Sense and Sensibility'})-[:HAS_GENRE]->(g1:Genre), (z:Movie {title: 'Leaving Las Vegas'})-[:HAS_GENRE]->(g2:Genre) WITH g1, g2 MATCH (other:Movie)-[:HAS_GENRE]->(g)  WHERE (g = g1 OR g = g2) AND other.title <> 'Sense and Sensibility' AND other.title <> 'Leaving Las Vegas' RETURN DISTINCT other.title AS RecommendedMovie LIMIT 5",
    }
]


In [8]:
example = [
    {
        "question": "List all papers authored by 'Han Xiao'.",
        "query": "MATCH (a:Author {name: 'Han Xiao'})-[:AUTHORED]->(p:Paper) RETURN p.title AS PapersAuthoredByHanXiao",
    },
    {
        "question": "Which papers belong to the 'Computer Science' discipline?",
        "query": "MATCH (p:Paper)-[:BELONGS_TO]->(d:Discipline {name: 'Computer Science'}) RETURN p.title AS PapersInComputerScience Limit 5"
    },
    {
        "question": "What are the papers published in 'Nature' in the year 2018?",
        "query": "MATCH (p:Paper)-[:PUBLISHED_IN]->(v:Venue {name: 'Nature'}) WHERE p.year = 2018 RETURN p.title AS PapersPublishedInNature2018"
    },
    {
        "question": "How many papers did 'Jianmin Chen' author?",
        'query': "MATCH (a:Author {name: 'Jianmin Chen'})-[:AUTHORED]->(p:Paper) RETURN COUNT(p) AS NumberOfPapersAuthoredByJianminChen"
    },
    {
        "question": "List all authors who have published papers in the topic 'Machine Learning'.",
        "query": "MATCH (a:Author)-[:AUTHORED]->(p:Paper {topic: 'Machine Learning'}) RETURN DISTINCT a.name AS AuthorsInMachineLearning Limit 5"
    },
    {
        'question': "What are the most cited papers in 'Mathematics'?",
        'query': "MATCH (p:Paper)-[:BELONGS_TO]->(d:Discipline {name: 'Mathematics'}) RETURN p.title AS PapersInComputerScience Limit 5"
    },
    {
        'question': "What are the most cited papers in 'Materials Science' discipline?",
        'query': "MATCH (p:Paper)-[:BELONGS_TO]->(d:Discipline {name: 'Materials Science'}) RETURN p.title AS Paper, p.citations AS Citations ORDER BY Citations DESC LIMIT 5"
    },
    {
        'question': "Which venues have published papers in the 'Network Science' topic?",
        'query': "MATCH (p:Paper {topic: 'Network Science'})-[:PUBLISHED_IN]->(v:Venue) RETURN DISTINCT v.name AS VenuesForNetworkScience LIMIT 5"
    },
    {
        'question': "I am 'Han Xiao' conducts research in 'Computer Science' and 'Machine Learning'. Which professors should he collaborate with?",
        'query': "MATCH (a:Author {name: 'Han Xiao'})-[:AUTHORED]->(p:Paper)-[:BELONGS_TO]->(d:Discipline) WHERE d.name = 'Computer Science' OR p.topic = 'Machine Learning' WITH DISTINCT d AS Discipline, p.topic AS Topic MATCH (other:Author)-[:AUTHORED]->(:Paper)-[:BELONGS_TO]->(d) WHERE other.name <> 'Han Xiao' RETURN DISTINCT other.name AS PotentialCollaborators LIMIT 5"
    },
    {
        'question': "I am 'Han Xiao'. Which researchers I collaborated with before?",
        'query': "MATCH (a1:Author {name: 'Han Xiao'})-[:AUTHORED]->(p:Paper)<-[:AUTHORED]-(a2:Author) WHERE a1 <> a2 RETURN DISTINCT a2.name"
    },
    {
        'question': "I am 'Han Xiao'. Which new researchers should I collaborate with for future work?",
        'query': "MATCH (a1:Author {name: 'Han Xiao'})-[:AUTHORED]->(p:Paper)-[:BELONGS_TO]->(d:Discipline)<-[:BELONGS_TO]-(p2:Paper)<-[:AUTHORED]-(a2:Author) WHERE a1 <> a2 RETURN a2.name, COUNT(p2) AS collaborations ORDER BY collaborations DESC"
    },
    {
        'question': "I am 'Kashif Rasul'. I have some workes in 'Mathematics' and want to expand my research in this field. Which researchers should I collaborate with based on papers related to 'Mathematics'?",
        'query': "MATCH (a:Author {name: 'Kashif Rasul'})-[:AUTHORED]->(p:Paper)-[:BELONGS_TO]->(d:Discipline) WHERE d.name = 'Mathematics' WITH DISTINCT d AS Discipline MATCH (other:Author)-[:AUTHORED]->(:Paper)-[:BELONGS_TO]->(d) WHERE other.name <> 'Kashif Rasul' RETURN DISTINCT other.name AS PotentialCollaborators"
    }
]


In [9]:
# Define the Cypher query prompt template
cypher_generation_prompt = PromptTemplate(
    template="""Based on the schema, write a Cypher query to answer the question.
    
    The question may ask about:
    - Similar movies based on genre
    - Movies a user has rated
    - Recommendations based on genres the user likes

    Schema:
    {schema}

    Example questions and queries:
    {example}

    **Important**:
    - Always identify movies based on their **titles**.
    - Use the exact movie titles provided in the question for any matching logic.
    - Do not reference `userId` or `movieId`; focus solely on the titles of movies and their genres.
    - Assume that if a user has rated a movie, they have watched it.

    Question: {question}
    Query:""",
    input_variables=["schema", "question", "example"],
)

# Define the answer generation prompt template
qa_prompt = PromptTemplate(
    template="""Based on the Cypher query results, answer the question.
    Question: {question}
    Results: {context}
    Give a clear, direct but human-friendly answer using the data from the results. 
    If it's a list, combine all items.
    Answer:""",
    input_variables=["question", "context"],
)

In [15]:
# # Initialize the LLM (Google Generative AI)
# llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=gemini_api, temperature=0)

# ALLOW_DANGEROUS_REQUEST = True

# # Define the chain
# chain = GraphCypherQAChain.from_llm(
#     llm=llm,
#     graph=graph,  # Your Neo4j graph object
#     verbose=True,
#     cypher_generation_prompt=cypher_generation_prompt,
#     qa_prompt=qa_prompt,
#     allow_dangerous_requests=ALLOW_DANGEROUS_REQUEST,
# )

In [16]:
# # Test query function
# def test_query(question):
#     try:
#         # Explicitly generate the Cypher query first using the prompt
#         generated_query = cypher_generation_prompt.format(
#             schema=graph.schema,  # Ensure dynamic schema usage
#             question=question,
#             example=example
#         )

#         # Run the chain with the generated query
#         result = chain.run(query=generated_query, question=question)
        
#         print(f"Q: {question}")
#         print(f"A: {result}\n")
#     except Exception as e:
#         print(f"Error: {str(e)}")
#     return result

In [10]:
# import io
# import sys
# import re

# def clean_ansi(text):
#     # Remove ANSI escape codes
#     ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
#     return ansi_escape.sub('', text).strip()

# def test_query(question):
#     try:
#         # Create a string buffer to capture the output
#         output_buffer = io.StringIO()
#         original_stdout = sys.stdout
#         # sys.stdout = output_buffer

#         # Explicitly generate the Cypher query first using the prompt
#         generated_query = cypher_generation_prompt.format(
#             schema=graph.schema,  # Ensure dynamic schema usage
#             question=question,
#             example=example
#         )
#         # Run the chain with the generated query
#         result = chain.run(query=generated_query, question=question)
        
#         # Restore original stdout and get the captured output
#         sys.stdout = original_stdout
#         output = output_buffer.getvalue()
        
#         # Extract Cypher query and context from the captured output
#         cypher_query = None
#         full_context = None
        
#         if 'Generated Cypher:' in output:
#             cypher_query = output.split('Generated Cypher:')[1].split('Full Context:')[0].strip()
#             cypher_query = clean_ansi(cypher_query)
        
#         if 'Full Context:' in output:
#             full_context = output.split('Full Context:')[1].split('>')[0].strip()
#             full_context = clean_ansi(full_context)
        
#         print(f"Q: {question}")
#         print(f"A: {result}\n")
        
#         return {
#             'result': result,
#             'cypher_query': cypher_query,
#             'full_context': full_context
#         }
#     except Exception as e:
#         print(f"Error: {str(e)}")
#         return {
#             'result': None,
#             'cypher_query': None,
#             'full_context': None,
#             'error': str(e)
#         }

In [13]:
# response = test_query("How many users rated 'Jungle Book, The'?")
# print(f"Cypher Query: {response['cypher_query']}")
# print(f"Full Context: {response['full_context']}")
# print(f"Result: {response['result']}")

### Hugging face Lamma 

In [14]:
# from huggingface_hub import login
# login(token=hugging_face_api)

In [15]:
# from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
# from langchain.llms import HuggingFacePipeline

# # Load tokenizer and model
# model_name = "meta-llama/Llama-2-7b-chat-hf"
# cache_dir = "./HF_models"

# tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
# model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir)

# # Create a text generation pipeline for Hugging Face
# pipeline_model = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     device_map="auto",
#     max_new_tokens=256,
#     temperature=0.1,
#     top_k=40,
#     top_p=0.95,
# )

# # Wrap pipeline in HuggingFacePipeline, which is Runnable-compatible
# llm = HuggingFacePipeline(pipeline=pipeline_model)

# # Define the chain with Hugging Face LLM
# chain = GraphCypherQAChain.from_llm(
#     llm=llm,
#     graph=graph,  # Your Neo4j graph object
#     verbose=True,
#     cypher_generation_prompt=cypher_generation_prompt,
#     qa_prompt=qa_prompt,
#     allow_dangerous_requests=ALLOW_DANGEROUS_REQUEST,
# )


In [16]:
# response = test_query("How many users rated 'Jungle Book, The'?")
# print(f"Cypher Query: {response['cypher_query']}")
# print(f"Full Context: {response['full_context']}")
# print(f"Result: {response['result']}")

In [17]:
# from langchain.llms import OpenAI

# # Initialize the OpenAI model with your stored API key
# llm = OpenAI(
#     model="gpt-3.5-turbo",  # Choose the model, e.g., "gpt-3.5-turbo" or "gpt-4"
#     temperature=0.1,
#     max_tokens=256,
#     openai_api_key=gpt_api  # Pass your API key directly
# )

# # Define the chain with the OpenAI LLM
# chain = GraphCypherQAChain.from_llm(
#     llm=llm,
#     graph=graph,  # Your Neo4j graph object
#     verbose=True,
#     cypher_generation_prompt=cypher_generation_prompt,
#     qa_prompt=qa_prompt,
#     allow_dangerous_requests=ALLOW_DANGEROUS_REQUEST,
# )
# response = test_query("How many users rated 'Jungle Book, The'?")

### GPT-3.5 Turbo

In [10]:
from langchain.chat_models import ChatOpenAI

# Initialize the ChatOpenAI model with your stored API key
llm = ChatOpenAI(
    model="gpt-3.5-turbo",  # or "gpt-4"
    temperature=0.1,
    max_tokens=256,
    openai_api_key=gpt_api  # Pass your API key directly
)
ALLOW_DANGEROUS_REQUEST = True

# Define the chain with the ChatOpenAI LLM
chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,  # Your Neo4j graph object
    verbose=True,
    cypher_generation_prompt=cypher_generation_prompt,
    qa_prompt=qa_prompt,
    allow_dangerous_requests=ALLOW_DANGEROUS_REQUEST,
)

  llm = ChatOpenAI(


In [11]:
import io
import sys
import re

def clean_ansi(text):
    # Remove ANSI escape codes
    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
    return ansi_escape.sub('', text).strip()

def test_query(question):
    try:
        # Create a string buffer to capture the output
        output_buffer = io.StringIO()
        original_stdout = sys.stdout
        # sys.stdout = output_buffer

        # Explicitly generate the Cypher query first using the prompt
        generated_query = cypher_generation_prompt.format(
            schema=graph.schema,  # Ensure dynamic schema usage
            question=question,
            example=example
        )
        # Run the chain with the generated query
        result = chain.run(query=generated_query, question=question)
        
        # Restore original stdout and get the captured output
        sys.stdout = original_stdout
        output = output_buffer.getvalue()
        
        # Extract Cypher query and context from the captured output
        cypher_query = None
        full_context = None
        
        if 'Generated Cypher:' in output:
            cypher_query = output.split('Generated Cypher:')[1].split('Full Context:')[0].strip()
            cypher_query = clean_ansi(cypher_query)
        
        if 'Full Context:' in output:
            full_context = output.split('Full Context:')[1].split('>')[0].strip()
            full_context = clean_ansi(full_context)
        
        print(f"Q: {question}")
        print(f"A: {result}\n")
        
        return {
            'result': result,
            'cypher_query': cypher_query,
            'full_context': full_context
        }
    except Exception as e:
        print(f"Error: {str(e)}")
        return {
            'result': None,
            'cypher_query': None,
            'full_context': None,
            'error': str(e)
        }

In [12]:
# response = test_query("How many users rated 'Jungle Book, The'?")
response = test_query("List all papers authored by 'Han Xiao'.")

  result = chain.run(query=generated_query, question=question)




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Author {name: 'Han Xiao'})-[:AUTHORED]->(p:Paper)
RETURN p.title[0m
Full Context:
[32;1m[1;3m[{'p.title': 'Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms'}][0m

[1m> Finished chain.[0m
Q: List all papers authored by 'Han Xiao'.
A: The paper authored by Han Xiao is "Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms".



### Anthropic Haruku

In [None]:
# import anthropic
# from langchain.llms import Anthropic

# # Define a custom function to count tokens
# def count_tokens(text):
#     return len(text.split())

# # Extend the Anthropic model to include count_tokens
# class AnthropicWithTokenCount(Anthropic):
#     def count_tokens(self, text):
#         return count_tokens(text)

# # Initialize the custom Anthropic model
# llm = AnthropicWithTokenCount(
#     model="claude-2",  # Specify the model, e.g., "claude-2"
#     anthropic_api_key=anthropic_api,
#     temperature=0.1,
#     max_tokens_to_sample=256,
# )

# # Define the chain with the custom Anthropic model
# chain = GraphCypherQAChain.from_llm(
#     llm=llm,
#     graph=graph,  # Your Neo4j graph object
#     verbose=True,
#     cypher_generation_prompt=cypher_generation_prompt,
#     qa_prompt=qa_prompt,
#     allow_dangerous_requests=ALLOW_DANGEROUS_REQUEST,
# )


In [21]:
TEST_SET = [
    {
        "question": "What are the genres of 'Star Wars: Episode IV - A New Hope'?",
        "query": "MATCH (m:Movie {title: 'Star Wars: Episode IV - A New Hope'})-[:HAS_GENRE]->(g:Genre) RETURN g.name AS Genres",
        "answer": "The genres of 'Star Wars: Episode IV - A New Hope' are 'Sci-Fi', 'Action', and 'Adventure'.",
    },
    {
        "question": "How many users rated 'Jungle Book, The'?",
        "query": "MATCH (m:Movie {title: 'Jungle Book, The'})<-[:RATED]-(u:User) RETURN COUNT(u) AS NumberOfUsersWhoRatedJungleBook",
        "answer": "2 users rated 'Jungle Book, The'.",
    },
    {
        "question": "I like the movie 'Dumb & Dumber (Dumb and Dumber)'. Give me 5 movies similar to this one with their genres.",
        "query": "MATCH (m:Movie {title: 'Dumb & Dumber (Dumb and Dumber)'})-[:HAS_GENRE]->(g:Genre) WITH g MATCH (other:Movie)-[:HAS_GENRE]->(g) WHERE other.title <> 'InceptDumb & Dumber (Dumb and Dumber)ion' WITH other, COLLECT(g.name) AS genres, COUNT(g) AS genreCount ORDER BY genreCount DESC RETURN other.title AS Movie, genres AS Genres LIMIT 5",
        "answer": "Based on your preference for 'Dumb & Dumber (Dumb and Dumber)', here are 5 similar movies you might enjoy: 'Batman Forever', 'True Lies', 'City Slickers II: The Legend of Curly's Gold', 'Coneheads', and 'Bullets Over Broadway'. These movies share the genres of Comedy and Adventure.",
    },
    {
        "question": "What are the ratings for 'Taxi Driver'?",
        "query": "MATCH (m:Movie {title: 'Taxi Driver'})<-[r:RATED]-(u:User) RETURN  r.rating AS Rating",
        "answer": "The ratings for 'Taxi Driver' are 5.0.",
    },
    {
        "question": "Find all movies released in 1993.",
        "query": "MATCH (m:Movie) WHERE m.year = 1993 RETURN m.title AS MoviesReleasedIn1993",
        "answer": "The movies released in 1993 are 'Three Colors: Blue (Trois couleurs: Bleu)', 'Firm, The', 'Fugitive, The', 'Hot Shots! Part Deux', 'Mrs. Doubtfire', 'Whats Eating Gilbert Grape', 'Cliffhanger', 'Demolition Man', 'Jurassic Park', 'Addams Family Values', 'Coneheads', and 'Dave'",
    },
    {
        "question": "What is the average rating for 'Pulp Fiction'?",
        "query": "MATCH (m:Movie {title: 'Pulp Fiction'})<-[r:RATED]-(u:User) RETURN AVG(r.rating) AS AverageRating",
        "answer": "The average rating for 'Pulp Fiction' is 2.875.",
    },
    {
        "question": "Find all movies that are both 'Action' and 'Thriller'.",
        "query": "MATCH (m:Movie)-[:HAS_GENRE]->(g:Genre) WHERE g.name IN ['Action', 'Thriller'] WITH m, COLLECT(g.name) AS genres WHERE 'Action' IN genres AND 'Thriller' IN genres RETURN m.title AS MoviesThatAreActionAndThriller",
        "answer": "The movies which are both Action and Thriller are 'Outbreak', 'Natural Born Killers', 'Die Hard: With a Vengeance', 'Jurassic Park', 'Cliffhanger', 'GoldenEye', 'True Lies', 'Speed', 'Clear and Present Danger'  and 'Net, The'.",
    },
    {
        "question": "List all users who rated movies in the 'Horror' genre.",
        "query": "MATCH (m:Movie)-[:HAS_GENRE]->(g:Genre {name: 'Horror'}) OPTIONAL MATCH (u:User)-[:RATED]->(m) RETURN DISTINCT u.userId as users LIMIT 10",
        "answer": "User 2 and 5 rated Horror movies.",
    },
    {
        "question": "What is the highest rated movie released before 2000?",
        "query": "MATCH (m:Movie)<-[r:RATED]-(u:User) WHERE m.year < 2000 RETURN m.title AS HighestRatedMovie, AVG(r.rating) AS AverageRating ORDER BY AverageRating DESC LIMIT 1",
        "answer": "Height rated movie resleased before 2000 is Shanghai Triad (Yao a yao yao dao waipo qiao).",
    },
    {
        "question": "Find movies with 'Love' in the title.",
        "query": "MATCH (m:Movie) WHERE m.title CONTAINS 'Love' RETURN m.title AS MoviesWithLoveInTitle",
        "answer": "The movie that have love in title is 'When a Man Loves a Woman'.",
    },
    {
        "question": "How many movies are in the 'Fantasy' genre?",
        "query": "MATCH (m:Movie)-[:HAS_GENRE]->(g:Genre {name: 'Fantasy'}) RETURN COUNT(m) AS FantasyMovieCount",
        "answer": "There are 5 movies in the 'Fantasy' genre.",
    },
    {
        "question": "What movies did user 2 rate?",
        "query": "MATCH (u:User {userId: 2})-[r:RATED]->(m:Movie) RETURN m.title AS MoviesRatedByUser2",
        "answer": "User 2 rated 'Mrs. Doubtfire','Hot Shots! Part Deux','Fugitive, The','Firm, The','When a Man Loves a Woman','True Lies','Speed','Lion King, The','Jungle Book, The','Four Weddings and a Funeral','Forrest Gump','Client, The','Clear and Present Danger','Ace Ventura: Pet Detective','While You Were Sleeping','Shawshank Redemption, The','Ready to Wear (Pret-A-Porter)','Pulp Fiction','Nell','Milk Money','Interview with the Vampire: The Vampire Chronicles','Forget Paris','French Kiss','Dumb & Dumber (Dumb and Dumber)','Disclosure','Circle of Friends','Boys on the Side','Billy Madison','Walk in the Clouds, A','Showgirls','Nine Months','Net, The','Batman Forever','Pocahontas','Clueless','Babe' and 'Dangerous Minds'.",
    },
    {
        "question": "List users who rated movies released in 1994.",
        "query": "MATCH (m:Movie)<-[r:RATED]-(u:User) WHERE m.year = 1994 RETURN DISTINCT u.userId AS UsersWhoRatedMoviesIn1994",
        "answer": "Users who rated movies released in 1994 include User 1, User 2, User 3, User 4, User 5, User 7 and User 8.",
    },
    {
        "question": "What are the genres of 'The Lion King'?",
        "query": "MATCH (m:Movie {title: 'Lion King, The'})-[:HAS_GENRE]->(g:Genre) RETURN g.name AS Genres",
        "answer": "'Lion King, The' is a movie in the 'Adventure', 'Animation', 'Children', 'Drama', 'Musical' and 'IMAX' genres.",
    },
    {
        "question": "I liked the movie 'French Kiss'. What are 5 similar movies I should watch?",
        "query": "MATCH (m:Movie {title: 'French Kiss'})-[:HAS_GENRE]->(g:Genre) WITH g MATCH (other:Movie)-[:HAS_GENRE]->(g) WHERE other.title <> 'French Kiss' WITH other, collect(g.name) as genres, COUNT(g) as genreCount ORDER BY genreCount DESC RETURN other.title as Movie, genres as Genres LIMIT 5",
        "answer": "Based on your preference for 'Dead Man Walking', here are 5 similar movies you might enjoy: 'True Lies','Englishman Who Went Up a Hill But Came Down a Mountain, The','First Knight','Rob Roy','Dave'.",
    },
    {
        "question": "What movies have the most shared genres with 'Milk Money'?",
        "query": "MATCH (m:Movie {title: 'Milk Money'})-[:HAS_GENRE]->(g:Genre)<-[:HAS_GENRE]-(similar:Movie) WHERE m <> similar RETURN similar.title AS SimilarMovies, COUNT(g) AS SharedGenres ORDER BY SharedGenres DESC LIMIT 5",
        "answer": "Five movies that have the same genres as 'Milk Money' are 'Englishman Who Went Up a Hill But Came Down a Mountain, The','Dave','Like Water for Chocolate (Como agua para chocolate)','First Knight', and 'Rob Roy'.",
    },
    {
        "question": "Find all movies rated above 4 and rated by atlead 2 user.",
        "query": "MATCH (m:Movie)<-[r:RATED]-(u:User) WHERE r.rating = 4 WITH m, COUNT(r) AS ratingCount, AVG(r.rating) AS averageRating WHERE ratingCount > 2 RETURN m.title AS HighRatedMovies, averageRating",
        "answer": "The movie that has rating above 4 and rated by at least 2 users are 'Firm, The', 'Four Weddings and a Funeral' and 'Clear and Present Danger'.",
    },
    {
        "question": "How many users rated 'Dangerous Minds'?",
        "query": "MATCH (m:Movie {title: 'Dangerous Minds'})<-[:RATED]-(u:User) RETURN COUNT(u) AS NumberOfUsersWhoRatedDarkKnight",
        "answer": "1 users rated 'Dangerous Minds'"
    },
    {
        "question": "I like 'Adventure' and 'Fantasy' genres. What movies should I watch next?",
        "query": "MATCH (m:Movie)-[:HAS_GENRE]->(g:Genre) WHERE g.name IN ['Adventure', 'Fantasy'] RETURN DISTINCT m.title AS RecommendedMovies LIMIT 5",
        "answer": "Based on your preferences for 'Adventure' and 'Fantasy', you might enjoy watching 'City Slickers II: The Legend of Curly's Gold', 'Jurassic Park', 'Demolition Man', 'Cliffhanger' and 'Star Trek: Generations'."
    },
]


In [13]:
TEST_SET = [
    {
        "question": "List the titles of papers authored by 'Jianmin Chen' in 2016?",
        "query": "",
        "answer": "Jianmin Chen authored the paper 'TensorFlow: A system for large-scale machine learning' in 2016.",
        "DF": "",
    },
    {
        "question": "What are the most cited papers in 'Physics'?",
        "query": "",
        "answer": "",
        "DF": "scholar_data[scholar_data.Discipline == 'Physics'][['Paper Title', 'Citations']].drop_duplicates().sort_values(by='Citations', ascending=False).head(5)"
    },
    {
        "question": "Which authors have worked on the 'Network Science' topic?",
        "query": "",
        "answer": "",
        "DF": "scholar_data[scholar_data.Topic == 'Network Science']['Author'].drop_duplicates().head(5)"
    },
    {
        "question": "What venues have published papers in 'Environmental Science' Discipline?",
        "query": "",
        "answer": "",
        "DF": "scholar_data[scholar_data.Discipline == 'Environmental Science']['Venue'].drop_duplicates().head(5)"
    },
    {
        "question": "How many papers authored by 'Roland Vollgraf'?",
        "query": "",
        "answer": "",
        "DF": "scholar_data[scholar_data.Author == 'Roland Vollgraf']['Paper Title'].drop_duplicates().shape[0]"
    },
    {
        "question": "I am 'Kashif Rasul'. Which researchers I collaborated with before?",
        "query": "",
        "answer": "Kashif Rasul has collaborated with Han Xiao and Roland Vollgraf.",
        "DF": ""
    },
    {
        "question": "I am 'Kashif Rasul'. Which professors should I collaborate with for future work?",
        "query": "",
        "answer": "Kashif Rasul could collaborate with Vijay Vasudevan, Pete Warden, M. Wicke, Yuan Yu, Ashish Agarwal, E. Brevdo, C. Citro, G. Corrado, I. Goodfellow, and A. Harp.",
        "GT_NDCG": ["Vijay Vasudevan", "Pete Warden", "M. Wicke", "Yuan Yu", "Ashish Agarwal", "E. Brevdo", "C. Citro", "G. Corrado", "I. Goodfellow", "A. Harp"],
        "DF": ""
    },
    {
        "question": "List all papers published in 'Biology' before 2015",
        "query": "",
        "answer": "",
        "DF": "scholar_data[(scholar_data.Discipline == 'Biology') & (scholar_data['Year Published'] < 2015)]['Paper Title'].drop_duplicates().to_list()"
    },
    {
        "question": "Find the authors who have contributed to papers in 'Chemistry' and 'Materials Science'",
        "query": "",
        "answer": "",
        "DF": "scholar_data[scholar_data.Discipline.isin(['Chemistry', 'Materials Science'])]['Author'].drop_duplicates().head(5).to_list()"
    },
    {
        "question": "Which papers were published in the 'Medicine' discipline in venues of type 'Journal'?",
        "query": "",
        "answer": "",
        "DF": "scholar_data[(scholar_data.Discipline == 'Medicine') & (scholar_data['Venue Type'] == 'Journal')]['Paper Title'].drop_duplicates().to_list()"
    },
    {
        "question": "List papers with 'Machine Learning' as the topic and citations greater than 5000.",
        "query": "",
        "answer": "",
        "DF": "scholar_data[(scholar_data.Topic == 'Machine Learning') & (scholar_data.Citations > 5000)]['Paper Title'].drop_duplicates().head().to_list()"
    },
    {
        "question": "What are the venues where 'Jianmin Chen' has published papers",
        "query": "",
        "answer": "",
        "DF": "scholar_data[scholar_data['Author'] == 'Jianmin Chen']['Venue'].drop_duplicates().to_list()"
    },
    {
        "question": "What are the papers authored by 'Roland Vollgraf' in 'Machine Learning'?",
        "query": "",
        "answer": "",
        "DF": "scholar_data[(scholar_data['Author'] == 'Roland Vollgraf') & (scholar_data['Topic'] == 'Machine Learning')]['Paper Title'].drop_duplicates().tolist()"
    },
    {
        "question": "Which authors have worked in both 'Computer Science' and 'Mathematics'?",
        "query": "",
        "answer": "",
        "DF": "cs_authors = scholar_data[scholar_data['Discipline'] == 'Computer Science']['Author'] / math_authors = scholar_data[scholar_data['Discipline'] == 'Mathematics']['Author']/ cs_authors[cs_authors.isin(math_authors)].drop_duplicates().to_list()[:5]"
    },
    {
        "question": "Which authors have published in 'Materials Science' and the venue 'Nature'?",
        "query": "",
        "answer": "",
        "DF": "scholar_data[(scholar_data['Discipline'] == 'Materials Science') & (scholar_data['Venue'] == 'Nature')]['Author'].drop_duplicates().to_list()"
    },
    {
        "question": "What are the least cited papers in the discipline 'Psychology'?",
        "query": "",
        "answer": "",
        "DF": "scholar_data[scholar_data['Discipline'] == 'Psychology'][['Paper Title', 'Citations']].sort_values(by='Citations').drop_duplicates()['Paper Title'].to_list()"
    },
    {
        "question": "what are the top venues for papers on 'Network Science'?",
        "query": "",
        "answer": "",
        "DF": "scholar_data[scholar_data['Topic'] == 'Network Science']['Venue'].value_counts().index.tolist()[:5]"
    },
    {
        "question": "Which authors published papers in 'Medicine' after 2015?",
        "query": "",
        "answer": "",
        "DF": "scholar_data[(scholar_data['Discipline'] == 'Medicine') & (scholar_data['Year Published'] > 2015)]['Author'].to_list()[:5]"
    },
    {
        "question": "which authors have collaborated with 'D. Davies' on papers in 'Chemistry'?",
        "query": "",
        "answer": "",
        "DF": "z_chen_papers = scholar_data[(scholar_data['Author'] == 'D. Davies') & (scholar_data['Discipline'] == 'Chemistry')]['Paper Title']/scholar_data[scholar_data['Paper Title'].isin(z_chen_papers) & (scholar_data['Author'] != 'Z. Chen')]['Author'].drop_duplicates().to_list()"
    }
]


In [14]:
print("number of test cases: ", len(TEST_SET))

number of test cases:  19


In [15]:
for test_data in TEST_SET:
    question = test_data["question"]
    query = test_data["query"]
    answer = test_data["answer"]
    test_query(question)
    print(f"Answer: {answer}")
    print("-------------------------------------------------------------------")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (a:Author {name: 'Jianmin Chen'})-[:AUTHORED]->(p:Paper {year: 2016})
RETURN p.title;[0m
Full Context:
[32;1m[1;3m[{'p.title': 'TensorFlow: A system for large-scale machine learning'}][0m

[1m> Finished chain.[0m
Q: List the titles of papers authored by 'Jianmin Chen' in 2016?
A: The paper authored by Jianmin Chen in 2016 is "TensorFlow: A system for large-scale machine learning".

Answer: Jianmin Chen authored the paper 'TensorFlow: A system for large-scale machine learning' in 2016.
-------------------------------------------------------------------


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Paper)-[:BELONGS_TO]->(:Discipline{name:'Physics'})
RETURN p.title, p.citations
ORDER BY p.citations DESC[0m
Full Context:
[32;1m[1;3m[{'p.title': 'Machine Learning for Fluid Mechanics', 'p.citations': 1830}, {'p.title': 'MoleculeNet: a benchmark for

In [None]:
import evaluate
from sklearn.metrics.pairwise import cosine_similarity
import pprint
import json
from langchain.embeddings.huggingface import HuggingFaceEmbeddings


embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Load evaluation metrics
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

# Load the JSON file
with open('Q_A_LLM.json', 'r') as f:
    evaluation_data = json.load(f)

# Accumulate results
rouge_scores = []
bleu_scores = []
cosine_similarities = []

for data in evaluation_data:
    question = data["question"]
    expected_answer = data["answer"]
    generated_answer = data["LLM"]  # Use the generated answer from the JSON data
    
    # Compute evaluation metrics
    rouge_result = rouge.compute(predictions=[generated_answer], references=[expected_answer])
    bleu_result = bleu.compute(predictions=[generated_answer], references=[[expected_answer]])

    # Compute cosine similarity between embeddings
    reference_embedding = embed_model.embed_query(expected_answer)  # Replace with your embedding method
    generated_embedding = embed_model.embed_query(generated_answer)  # Replace with your embedding method
    cosine_sim = cosine_similarity([reference_embedding], [generated_embedding])[0][0]

    # Store the results
    rouge_scores.append(rouge_result)
    bleu_scores.append(bleu_result)
    cosine_similarities.append(cosine_sim)


In [None]:
import numpy as np

# Compute the average ROUGE score
avg_rouge1 = np.mean([score['rouge1'] for score in rouge_scores])
avg_rouge2 = np.mean([score['rouge2'] for score in rouge_scores])
avg_rougeL = np.mean([score['rougeL'] for score in rouge_scores])

# Compute the average BLEU score
avg_bleu = np.mean([score['bleu'] for score in bleu_scores])

# Compute the average Cosine Similarity
avg_cosine_similarity = np.mean(cosine_similarities)

# Print the average scores
print(f"Average ROUGE-1: {avg_rouge1}")
print(f"Average ROUGE-2: {avg_rouge2}")
print(f"Average ROUGE-L: {avg_rougeL}")
print(f"Average BLEU: {avg_bleu}")
print(f"Average Cosine Similarity: {avg_cosine_similarity}")

In [None]:
# Accumulate results
results = []

for data in evaluation_data:
    question = data["question"]
    expected_answer = data["answer"]

    # Query the system
    generated_answer = data["LLM"]  # Using the LLM's response as generated answer
    
    # Check for specific phrases in the generated answer
    if "User Mistake." in generated_answer or "ERROR: SAFETY Issue." in generated_answer:
        # Skip score calculation for these responses
        continue

    # Compute evaluation metrics
    rouge_result = rouge.compute(predictions=[generated_answer], references=[expected_answer])
    bleu_result = bleu.compute(predictions=[generated_answer], references=[[expected_answer]])

    # Compute cosine similarity between embeddings
    reference_embedding = embed_model.embed_query(expected_answer)
    generated_embedding = embed_model.embed_query(generated_answer)
    cosine_sim = cosine_similarity([reference_embedding], [generated_embedding])[0][0]

    # Append the result as a dictionary
    results.append({
        "Question": question,
        "Generated Answer": generated_answer,
        "Expected Answer": expected_answer,
        "Cosine Similarity": cosine_sim,
        # Separate ROUGE metrics
        "ROUGE-1": rouge_result['rouge1'],
        "ROUGE-2": rouge_result['rouge2'],
        "ROUGE-L": rouge_result['rougeL'],
        "ROUGE-Lsum": rouge_result['rougeLsum'],
        # Separate BLEU metrics
        "BLEU": bleu_result['bleu'],
        "Precision_1": bleu_result['precisions'][0] if len(bleu_result['precisions']) > 0 else None,
        "Precision_2": bleu_result['precisions'][1] if len(bleu_result['precisions']) > 1 else None,
        "Precision_3": bleu_result['precisions'][2] if len(bleu_result['precisions']) > 2 else None,
        "Precision_4": bleu_result['precisions'][3] if len(bleu_result['precisions']) > 3 else None,
        "Brevity Penalty": bleu_result['brevity_penalty'],
        "Length Ratio": bleu_result['length_ratio'],
        "Translation Length": bleu_result['translation_length'],
        "Reference Length": bleu_result['reference_length'],
    })

# Create a DataFrame for tabular results
df = pd.DataFrame(results)

In [None]:
df.head()

In [None]:
# Print the average scores
print(f"Average ROUGE-1: {df['ROUGE-1'].mean()}")
print(f"Average ROUGE-2: {df['ROUGE-2'].mean()}")
print(f"Average ROUGE-L: {df['ROUGE-L'].mean()}")
print(f"Average ROUGE-L: {df['ROUGE-Lsum'].mean()}")
print(f"Average BLEU: {df['BLEU'].mean()}")
print(f"Average Cosine Similarity: {df['Cosine Similarity'].mean()}")

In [None]:
import numpy as np

def calculate_ndcg(relevant_items, generated_items, k):
    # Get relevance scores for the generated items
    relevance_scores = [1 if item in relevant_items else 0 for item in generated_items[:k]]
    
    # Calculate DCG
    dcg = sum(relevance_scores[i] / np.log2(i + 2) for i in range(len(relevance_scores)))
    
    # Calculate IDCG
    sorted_relevance_scores = sorted(relevance_scores, reverse=True)
    idcg = sum(sorted_relevance_scores[i] / np.log2(i + 2) for i in range(len(sorted_relevance_scores)))

    # Handle the case where IDCG is zero to avoid division by zero
    if idcg == 0:
        return 0.0
    
    # Calculate NDCG
    ndcg = dcg / idcg
    return ndcg


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

def calculate_metrics(evaluation_data, k):
    ndcg_scores = []
    precision_scores = []
    recall_scores = []
    f1_scores = []

    for data in evaluation_data:
        question = data["question"]
        expected_answer = data["answer"]
        generated_answer = data["LLM"]

        if "User Mistake." in generated_answer or "ERROR: SAFETY Issue." in generated_answer:
            continue  # Skip this entry if there's an error message
        
        # Assume the expected answer is the only relevant item for simplicity
        relevant_items = [expected_answer]
        generated_items = generated_answer.split(", ")  # Assuming comma-separated answers

        # Calculate NDCG@k
        # You can implement the NDCG function according to your relevance scoring
        ndcg_score = calculate_ndcg(relevant_items, generated_items, k)  # Implement this function
        ndcg_scores.append(ndcg_score)

        # Calculate Precision@k
        y_true = [1 if answer in relevant_items else 0 for answer in generated_items[:k]]
        precision = precision_score([1] * len(y_true), y_true)
        precision_scores.append(precision)

        # Calculate Recall@k
        recall = recall_score([1] * len(relevant_items), y_true)
        recall_scores.append(recall)

        # Calculate F1@k
        f1 = f1_score([1] * len(relevant_items), y_true)
        f1_scores.append(f1)

    return ndcg_scores, precision_scores, recall_scores, f1_scores

# Example of using the function
ndcg_scores, precision_scores, recall_scores, f1_scores = calculate_metrics(evaluation_data, k=5)
