In [6]:
from langchain_ollama import ChatOllama
from langchain_community.graphs import Neo4jGraph
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

DEMO_URL = "neo4j+s://demo.neo4jlabs.com"
DATABASE = "recommendations"

graph = Neo4jGraph(
    url=DEMO_URL,
    database=DATABASE,
    username=DATABASE,
    password=DATABASE,
    enhanced_schema=True,
    sanitize=True,
)
llm = ChatOllama(model="tomasonjo/llama3-text2cypher-demo")
llm_tes = ChatOllama(model="projectwilsen/llama3.1-8b-recommendations-4bit")

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Given an input question, convert it to a Cypher query. No pre-amble.",
        ),
        (
            "human",
            (
                "Based on the Neo4j graph schema below, write a Cypher query that would answer the user's question: "
                "\n{schema} \nQuestion: {question} \nCypher query:"
            ),
        ),
    ]
)
chain = prompt | llm

question = "How many movies did Tom Hanks play in?"
response = chain.invoke({"question": question, "schema": graph.schema})
print(response.content)



MATCH (a:Actor {name: "Tom Hanks"})-[:ACTED_IN]->(m:Movie)
RETURN count(m) AS numberOfMoviesPlayedIn


In [8]:
from langchain.chains import GraphCypherQAChain

questions = ["Who is the oldest director?",
             "Find all directors who have directed a movie in Spanish language.",
             "Give me 5 movies where a director has also acted?",
             "List all movies with an IMDb rating greater than 5 that have been directed by a director born in China."
             ]

chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm_tes, verbose=True)

for q in questions:
    print("\n", q)
    try:
        result = chain.invoke(q)['result']
        print(result)
    except:
        pass


 Who is the oldest director?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Director)
WHERE d.born IS NOT NULL
RETURN d.name, d.born AS bornDate
ORDER BY bornDate ASC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'d.name': 'Georges Méliès', 'bornDate': neo4j.time.Date(1861, 12, 8)}][0m

[1m> Finished chain.[0m
{d.name} is the oldest director. They were born on {bornDate}.

 Find all directors who have directed a movie in Spanish language.


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Director)-[:DIRECTED]->(m:Movie)
WHERE m.languages CONTAINS 'Spanish'
RETURN d.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
The requested information is not available.

 Give me 5 movies where a director has also acted?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Director)-[:ACTED_IN]->(m:Movie)
WITH d, m
WHERE NOT (d)-[:DIRECTED]->(m)
RETURN 

In [3]:
from langchain.chains import GraphCypherQAChain

questions = ["Who is the oldest director?",
             "Find all directors who have directed a movie in Spanish language.",
             "Give me 5 movies where a director has also acted?",
             "List all movies with an IMDb rating greater than 5 that have been directed by a director born in China."
             ]

chain = GraphCypherQAChain.from_llm(graph=graph, llm=llm, verbose=True)

for q in questions:
    print("\n", q)
    try:
        result = chain.invoke(q)['result']
        print(result)
    except:
        pass


 Who is the oldest director?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Director)
RETURN d.name, d.born
ORDER BY d.born ASC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'d.name': 'Georges Méliès', 'd.born': neo4j.time.Date(1861, 12, 8)}][0m

[1m> Finished chain.[0m
Georges Méliès is the oldest director.

 Find all directors who have directed a movie in Spanish language.


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Director)-[:DIRECTED]->(m:Movie)
WHERE "Spanish" IN m.languages
RETURN d.name[0m
Full Context:
[32;1m[1;3m[{'d.name': 'Alejandro Jodorowsky'}, {'d.name': 'Alejandro Jodorowsky'}, {'d.name': 'Alfonso Arau'}, {'d.name': 'Abel Ferrara'}, {'d.name': 'Nacho Vigalondo'}, {'d.name': 'Luis Buñuel'}, {'d.name': 'Luis Buñuel'}, {'d.name': 'Luis Buñuel'}, {'d.name': 'Luis Buñuel'}, {'d.name': 'Luis Buñuel'}][0m

[1m> Finished chain.[0m
Alejandro Jodorowsky, Alfonso Arau, Abel Ferrara,

# Set Up Database and Model

In [None]:
from langchain_community.graphs import Neo4jGraph
from langchain_groq import ChatGroq
from langchain.chains import GraphCypherQAChain
# from google.colab import userdata

# groq_api_key = userdata.get('GROQ_API')
# hf_api = userdata.get('HF_API')

#https://demo.neo4jlabs.com:7473/browser/
DEMO_URL = "neo4j+s://demo.neo4jlabs.com"
database = "recommendations"

# model = ChatGroq(temperature=0, model_name="gemma2-9b-it", groq_api_key = groq_api_key)

# Database Overview

In [None]:
graph = Neo4jGraph(url=DEMO_URL,database=database,username=database,password=database,sanitize=True,enhanced_schema=True)
print(graph.schema)



Node properties:
- **Movie**
  - `url`: STRING Example: "https://themoviedb.org/movie/862"
  - `runtime`: INTEGER Min: 2, Max: 910
  - `revenue`: INTEGER Min: 1, Max: 2787965087
  - `budget`: INTEGER Min: 1, Max: 380000000
  - `imdbRating`: FLOAT Min: 1.6, Max: 9.6
  - `released`: STRING Example: "1995-11-22"
  - `countries`: LIST Min Size: 1, Max Size: 16
  - `languages`: LIST Min Size: 1, Max Size: 19
  - `plot`: STRING Example: "A cowboy doll is profoundly threatened and jealous"
  - `imdbVotes`: INTEGER Min: 13, Max: 1626900
  - `imdbId`: STRING Example: "0114709"
  - `year`: INTEGER Min: 1902, Max: 2016
  - `poster`: STRING Example: "https://image.tmdb.org/t/p/w440_and_h660_face/uXDf"
  - `movieId`: STRING Example: "1"
  - `tmdbId`: STRING Example: "862"
  - `title`: STRING Example: "Toy Story"
- **Genre**
  - `name`: STRING Example: "Adventure"
- **User**
  - `userId`: STRING Example: "1"
  - `name`: STRING Example: "Omar Huffman"
- **Actor**
  - `url`: STRING Example: "https://t

# Trial 1 ( without any enhancement )

List of Questions

In [None]:
questions = ["Who is the oldest director?",
             "Find all directors who have directed a movie in Spanish language.",
             "Give me 5 movies where a director has also acted?",
             "List all movies with an IMDb rating greater than 5 that have been directed by a director born in China."
             ]

Possible correct Cypher Query:

1. MATCH (d:Director) WHERE d.born IS NOT NULL RETURN d ORDER BY d.born ASC LIMIT 1
2. MATCH (d:Director)-[:DIRECTED]->(m:Movie) WHERE 'Spanish' IN m.languages RETURN d.name
3. MATCH (d:Director)-[:ACTED_IN]->(m:Movie) WHERE exists{ (d)-[:DIRECTED]->(m) } RETURN m.title AS MovieTitle, m.movieId AS MovieID LIMIT 5
4. MATCH (m:Movie)<-[:DIRECTED]-(d:Director) WHERE m.imdbRating > 5 AND d.bornIn = 'China' RETURN m

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("projectwilsen/gemma2_9b_text2cypher_recommendations_merged_16bit_tes")
model = AutoModelForCausalLM.from_pretrained("projectwilsen/gemma2_9b_text2cypher_recommendations_merged_16bit_tes").to("cuda:0")
def generate_text(text):
    inputs = tokenizer(text, return_tensors="pt").to("cuda:0")
    outputs = model.generate(**inputs, max_new_tokens=20)
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))
print("Before training\n")
generate_text("What are the tips for a successful business plan?")

tokenizer_config.json:   0%|          | 0.00/40.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/956 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/39.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

pytorch_model-00001-of-00004.bin:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

pytorch_model-00002-of-00004.bin:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

pytorch_model-00003-of-00004.bin:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

pytorch_model-00004-of-00004.bin:   0%|          | 0.00/3.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
from langchain_huggingface import HuggingFacePipeline

llm = HuggingFacePipeline.from_model_id(
    model_id="projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf",
    task="text-generation",
    pipeline_kwargs={
        "max_new_tokens": 100,
        "top_k": 50,
        "temperature": 0.1,
    },
)
llm.invoke("Hugging Face is")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/30.0 [00:00<?, ?B/s]

OSError: Can't load tokenizer for 'projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf' is the correct path to a directory containing all relevant files for a GemmaTokenizerFast tokenizer.

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

llm = HuggingFaceEndpoint(
    # repo_id="projectwilsen/gemma2_9b_text2cypher_recommendations_merged_16bit_tes",
    repo_id="projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf",
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
)

from huggingface_hub import login
login(hf_api)

chat_model = ChatHuggingFace(llm=llm)

chain = GraphCypherQAChain.from_llm(graph=graph, llm=chat_model, verbose=True)

for q in questions:
    print("\n", q)
    try:
        result = chain.invoke(q)['result']
        print(result)
    except:
        pass

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


config.json:   0%|          | 0.00/30.0 [00:00<?, ?B/s]

OSError: Can't load tokenizer for 'projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf' is the correct path to a directory containing all relevant files for a GemmaTokenizerFast tokenizer.

In [None]:
chain = GraphCypherQAChain.from_llm(graph=graph, llm=model, verbose=True)

for q in questions:
    print("\n", q)
    try:
        result = chain.invoke(q)['result']
        print(result)
    except:
        pass


 Who is the oldest director?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Director) WITH d ORDER BY d.born ASC LIMIT 1 RETURN d.name[0m
Full Context:
[32;1m[1;3m[{'d.name': 'Georges Méliès'}][0m

[1m> Finished chain.[0m
I don't know the answer. 


 Find all directors who have directed a movie in Spanish language.


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Director)-[:DIRECTED]->(m:Movie)
WHERE m.languages CONTAINS "Spanish"
RETURN d.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
I don't know the answer. 


 Give me 5 movies where a director has also acted?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (m:Movie)-[:DIRECTED]->(d:Director)<-[:ACTED_IN]-(d)
RETURN m.title LIMIT 5 
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
I don't know the answer. 


 List all movies with an IMDb rating greater tha

Adding validate_cypher parameter will force the llm to validate the generated cypher based on the provided nodes and properties

In [None]:
chain = GraphCypherQAChain.from_llm(graph=graph, llm=model, verbose=True, validate_cypher = True)

for q in questions:
    print("\n", q)
    try:
        result = chain.invoke(q)['result']
        print(result)
    except:
        pass


 Who is the oldest director?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Director) WITH d ORDER BY d.born ASC LIMIT 1 RETURN d.name[0m
Full Context:
[32;1m[1;3m[{'d.name': 'Georges Méliès'}][0m

[1m> Finished chain.[0m
I don't know the answer. 


 Find all directors who have directed a movie in Spanish language.


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Director)-[:DIRECTED]->(m:Movie)
WHERE m.languages CONTAINS "Spanish"
RETURN d.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
I don't know the answer. 


 Give me 5 movies where a director has also acted?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
I don't know the answer. 


 List all movies with an IMDb rating greater than 5 that have been directed by a director born in China.


[1m> Entering new Graph

In the example above, eventhough the full context is already provided but somehow the final output is I don't know the answer. To solve this, we could simply add use_function_response = True as one of the parameter

In [None]:
chain = GraphCypherQAChain.from_llm(graph=graph, llm=model, verbose=True, validate_cypher = True, use_function_response = True)

for q in questions:
    print("\n", q)
    try:
        result = chain.invoke(q)['result']
        print(result)
    except:
        pass


 Who is the oldest director?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Director) WITH d ORDER BY d.born ASC LIMIT 1 RETURN d.name[0m
Full Context:
[32;1m[1;3m[{'d.name': 'Georges Méliès'}][0m

[1m> Finished chain.[0m
Georges Méliès is the oldest director.  


 Find all directors who have directed a movie in Spanish language.


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (d:Director)-[:DIRECTED]->(m:Movie)
WHERE m.languages CONTAINS "Spanish"
RETURN d.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
No directors found. 


 Give me 5 movies where a director has also acted?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
I apologize, the tool did not provide any results.  


 List all movies with an IMDb rating greater than 5 that have been directed by a director born

# Trial 2 - Few Shot Prompting

{{}} not {}

In [None]:
examples= [
    {
        "question": "What are the top 5 movies with a runtime greater than 120 minutes?",
        "query": "MATCH (m:Movie) WHERE m.runtime > 120 RETURN m ORDER BY m.runtime DESC LIMIT 5",
    },
    {
        "question": "List the top 5 movies with a plot involving a 'young boy' or 'orphan'",
        "query": "MATCH (m:Movie) WHERE m.plot CONTAINS 'young boy' OR m.plot CONTAINS 'orphan' RETURN m ORDER BY m.imdbRating DESC LIMIT 5",
    },
    {
        "question": "What are the top 3 movies with the lowest IMDb votes?",
        "query": "MATCH (m:Movie) WHERE m.imdbVotes IS NOT NULL RETURN m ORDER BY m.imdbVotes ASC LIMIT 3",
    },
    {
        "question":"List the top 5 oldest directors in the database.",
        "query": "MATCH (d:Director) WHERE d.born IS NOT NULL RETURN d ORDER BY d.born LIMIT 5",
    },
    {
        "question": "List all movies that have a language list including Japanese.",
        "query": "MATCH (m:Movie) WHERE 'Japanese' IN m.languages RETURN m",
    },
    {
        "question": "Which 3 directors have directed movies in more than three genres?",
        "query": "MATCH (d:Director)-[:DIRECTED]->(m:Movie)-[:IN_GENRE]->(g:Genre) WITH d, count(DISTINCT g) AS genreCount WHERE genreCount > 3 RETURN d.name, genreCount ORDER BY genreCount DESC LIMIT 3",
    },
]

In [None]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

example_prompt = PromptTemplate.from_template(
    "User input: {question}\nCypher query: {query}"
)
prompt = FewShotPromptTemplate(
    examples=examples[:3],
    example_prompt=example_prompt,
    prefix="You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.\n\nHere is the schema information\n{schema}.\n\nBelow are a number of examples of questions and their corresponding Cypher queries.",
    suffix="User input: {question}\nCypher query: ",
    input_variables=["question", "schema"],
)

In [None]:
print(prompt.format(question="List all movies with an IMDb rating greater than 5 that have been directed by a director born in China", schema="foo"))

You are a Neo4j expert. Given an input question, create a syntactically correct Cypher query to run.

Here is the schema information
foo.

Below are a number of examples of questions and their corresponding Cypher queries.

User input: What are the top 5 movies with a runtime greater than 120 minutes?
Cypher query: MATCH (m:Movie) WHERE m.runtime > 120 RETURN m ORDER BY m.runtime DESC LIMIT 5

User input: List the top 5 movies with a plot involving a 'young boy' or 'orphan'
Cypher query: MATCH (m:Movie) WHERE m.plot CONTAINS 'young boy' OR m.plot CONTAINS 'orphan' RETURN m ORDER BY m.imdbRating DESC LIMIT 5

User input: What are the top 3 movies with the lowest IMDb votes?
Cypher query: MATCH (m:Movie) WHERE m.imdbVotes IS NOT NULL RETURN m ORDER BY m.imdbVotes ASC LIMIT 3

User input: List all movies with an IMDb rating greater than 5 that have been directed by a director born in China
Cypher query: 


In [None]:
chain_with_few_shot = GraphCypherQAChain.from_llm(graph=graph, llm=model, cypher_prompt=prompt, verbose=True, validate_cypher = True, use_function_response=True)

for q in questions:
    print("\n", q)
    try:
        result = chain_with_few_shot.invoke(q)['result']
        print(result)
    except:
        pass

  warn_deprecated(


NameError: name 'questions' is not defined

# Trial 3 - Dynamic Few Shot Prompting

In [None]:
from langchain_community.vectorstores import Neo4jVector
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_community.embeddings import HuggingFaceEmbeddings

example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    HuggingFaceEmbeddings(),
    Neo4jVector,
    url = "neo4j+s://demo.neo4jlabs.com",
    database = "recommendations",
    username = "recommendations",
    password = "recommendations",
    k=3,
    input_keys=["question"],
)

  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Forbidden: {code: Neo.ClientError.Security.Forbidden} {message: Creating new node label on database 'recommendations' is not allowed for user 'recommendations' with roles [PUBLIC, recommendations] restricted to SCHEMA. See GRANT CREATE NEW NODE LABEL ON DATABASE `recommendations`...}

# Fine Tuning

In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

In [None]:
pip uninstall pyarrow

Found existing installation: pyarrow 17.0.0
Uninstalling pyarrow-17.0.0:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/benchmarks/*
    /usr/local/lib/python3.10/dist-packages/cmake_modules/AWSSDKVariables.cmake
    /usr/local/lib/python3.10/dist-packages/cmake_modules/BuildUtils.cmake
    /usr/local/lib/python3.10/dist-packages/cmake_modules/DefineOptions.cmake
    /usr/local/lib/python3.10/dist-packages/cmake_modules/FindAWSSDKAlt.cmake
    /usr/local/lib/python3.10/dist-packages/cmake_modules/FindAzure.cmake
    /usr/local/lib/python3.10/dist-packages/cmake_modules/FindBrotliAlt.cmake
    /usr/local/lib/python3.10/dist-packages/cmake_modules/FindClangTools.cmake
    /usr/local/lib/python3.10/dist-packages/cmake_modules/FindGTestAlt.cmake
    /usr/local/lib/python3.10/dist-packages/cmake_modules/FindInferTools.cmake
    /usr/local/lib/python3.10/dist-packages/cmake_modules/FindLLVMAlt.cmake
    /usr/local/lib/python3.10/dist-packages/cmake_modules/FindOpenSSLAlt.cmake
 

In [None]:
pip install pyarrow==15.0.2

Collecting pyarrow==15.0.2
  Downloading pyarrow-15.0.2-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.0 kB)
Downloading pyarrow-15.0.2-cp310-cp310-manylinux_2_28_x86_64.whl (38.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.3/38.3 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyarrow
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 15.0.2 which is incompatible.[0m[31m
[0mSuccessfully installed pyarrow-15.0.2


In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-2-9b",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.8: Fast Gemma2 patching. Transformers = 4.43.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/6.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/40.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = True,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.8 patched 42 layers with 42 QKV layers, 42 O layers and 42 MLP layers.


In [None]:

prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = f"Convert text to cypher query based on this schema: {graph.schema}"
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for input, output in zip(inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = prompt.format(instructions, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

In [None]:
import pandas as pd

df = pd.read_csv('/content/text2cypher_gpt4turbo.csv')
df = df[(df['database'] == 'recommendations') & (df['syntax_error'] == False) & (df['timeout'] == False)]
df

Unnamed: 0,question,cypher,type,database,syntax_error,timeout,returns_results,false_schema
7275,What are the top 5 movies with a runtime great...,MATCH (m:Movie)\nWHERE m.runtime > 120\nRETURN...,Simple Retrieval Queries,recommendations,False,False,True,
7276,List the first 3 genres with movies having an ...,MATCH (m:Movie)-[:IN_GENRE]->(g:Genre)\nWHERE ...,Verbose query,recommendations,False,False,True,
7277,List the first 5 directors who have a biograph...,MATCH (d:Director)\nWHERE d.bio IS NOT NULL\nR...,Simple Retrieval Queries,recommendations,False,False,True,
7278,Which 3 movies have the most detailed plot des...,"MATCH (m:Movie)\nRETURN m.title, m.plot\nORDER...",Simple Retrieval Queries,recommendations,False,False,True,
7279,Show the top 5 actors who have acted in movies...,MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)<-[:DIRE...,Simple Retrieval Queries,recommendations,False,False,True,
...,...,...,...,...,...,...,...,...
8067,Which movies have been acted in by more than 1...,MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\nWITH m...,Complex Retrieval Queries,recommendations,False,False,True,
8068,Find all movies where the director has directe...,MATCH (d:Director)-[:DIRECTED]->(m:Movie)\nWIT...,Complex Retrieval Queries,recommendations,False,False,False,
8069,Find all movies that have a plot mentioning 'h...,MATCH (m:Movie)\nWHERE m.plot CONTAINS 'hero'\...,Complex Retrieval Queries,recommendations,False,False,True,
8070,Which movies have been rated the highest by us...,"MATCH (u:User)-[r:RATED]->(m:Movie)\nWITH u, c...",Complex Retrieval Queries,recommendations,False,False,True,


In [None]:
df = df[['question','cypher']]
df.rename(columns={'question': 'input','cypher':'output'}, inplace=True)
df.reset_index(drop=True, inplace=True)
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'question': 'input','cypher':'output'}, inplace=True)


Unnamed: 0,input,output
0,What are the top 5 movies with a runtime great...,MATCH (m:Movie)\nWHERE m.runtime > 120\nRETURN...
1,List the first 3 genres with movies having an ...,MATCH (m:Movie)-[:IN_GENRE]->(g:Genre)\nWHERE ...
2,List the first 5 directors who have a biograph...,MATCH (d:Director)\nWHERE d.bio IS NOT NULL\nR...
3,Which 3 movies have the most detailed plot des...,"MATCH (m:Movie)\nRETURN m.title, m.plot\nORDER..."
4,Show the top 5 actors who have acted in movies...,MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)<-[:DIRE...
...,...,...
757,Which movies have been acted in by more than 1...,MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\nWITH m...
758,Find all movies where the director has directe...,MATCH (d:Director)-[:DIRECTED]->(m:Movie)\nWIT...
759,Find all movies that have a plot mentioning 'h...,MATCH (m:Movie)\nWHERE m.plot CONTAINS 'hero'\...
760,Which movies have been rated the highest by us...,"MATCH (u:User)-[r:RATED]->(m:Movie)\nWITH u, c..."


In [None]:
from datasets import Dataset
dataset = Dataset.from_pandas(df)
dataset = dataset.map(formatting_prompts_func, batched = True)
dataset



Map:   0%|          | 0/762 [00:00<?, ? examples/s]

Dataset({
    features: ['input', 'output', 'text'],
    num_rows: 762
})

In [None]:
dataset[0]

{'input': 'What are the top 5 movies with a runtime greater than 120 minutes?',
 'output': 'MATCH (m:Movie)\nWHERE m.runtime > 120\nRETURN m\nORDER BY m.runtime DESC\nLIMIT 5',
 'text': 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nConvert text to cypher query based on this schema: Node properties:\n- **Movie**\n  - `url`: STRING Example: "https://themoviedb.org/movie/862"\n  - `runtime`: INTEGER Min: 2, Max: 910\n  - `revenue`: INTEGER Min: 1, Max: 2787965087\n  - `imdbRating`: FLOAT Min: 1.6, Max: 9.6\n  - `released`: STRING Example: "1995-11-22"\n  - `countries`: LIST Min Size: 1, Max Size: 16\n  - `languages`: LIST Min Size: 1, Max Size: 19\n  - `plot`: STRING Example: "A cowboy doll is profoundly threatened and jealous"\n  - `imdbVotes`: INTEGER Min: 13, Max: 1626900\n  - `imdbId`: STRING Example: "0114709"\n  - `year`: INTEGER Min: 1902, Max: 2016\

In [None]:

from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # max_steps = 60,
        num_train_epochs=1,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Map (num_proc=2):   0%|          | 0/762 [00:00<?, ? examples/s]

In [None]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.748 GB.
12.951 GB of memory reserved.


In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 762 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 95
 "-____-"     Number of trainable parameters = 54,018,048


Step,Training Loss
1,0.9044
2,0.9031
3,0.7546
4,0.5679
5,0.3723
6,0.2449
7,0.1454
8,0.1483
9,0.089
10,0.0738


In [None]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    prompt.format(
        f"Convert text to cypher query based on this schema: {graph.schema}", # instruction
        "Who is the oldest director?", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<bos>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Convert text to cypher query based on this schema: Node properties:
- **Movie**
  - `url`: STRING Example: "https://themoviedb.org/movie/862"
  - `runtime`: INTEGER Min: 2, Max: 910
  - `revenue`: INTEGER Min: 1, Max: 2787965087
  - `budget`: INTEGER Min: 1, Max: 380000000
  - `imdbRating`: FLOAT Min: 1.6, Max: 9.6
  - `released`: STRING Example: "1995-11-22"
  - `countries`: LIST Min Size: 1, Max Size: 16
  - `languages`: LIST Min Size: 1, Max Size: 19
  - `plot`: STRING Example: "A cowboy doll is profoundly threatened and jealous"
  - `imdbVotes`: INTEGER Min: 13, Max: 1626900
  - `imdbId`: STRING Example: "0114709"
  - `year`: INTEGER Min: 1902, Max: 2016
  - `poster`: STRING Example: "https://image.tmdb.org/t/p/w440_and_h660_face/uXDf"
  - `movieId`: STRING Example: "1"
  - `tmdbId`: STRING Example: "862

## Saving the finetuned model

Local saving

In [None]:
# model.save_pretrained("lora_model") # Local saving
# tokenizer.save_pretrained("lora_model")

Online saving ( Hugging Face)

In [None]:
# should have write access

model.push_to_hub("projectwilsen/gemma2_9b_text2cypher_recommendations_tes", token = hf_api)
tokenizer.push_to_hub("projectwilsen/gemma2_9b_text2cypher_recommendations_tes", token = hf_api)

## Load the finetuned model from Hugging Face

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "projectwilsen/gemma2_9b_text2cypher_recommendations", # YOUR MODEL YOU USED FOR TRAINING
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

==((====))==  Unsloth 2024.8: Fast Gemma2 patching. Transformers = 4.43.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
inputs = tokenizer(
[
    prompt.format(
        f"Convert text to cypher query based on this schema: {graph.schema}", # instruction
        "Who is the oldest director?", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
result = tokenizer.batch_decode(outputs)
response = result[0].split("### Response:")[1].split("###")[0].strip().replace("<|end_of_text|>", "")
print(response)

In [None]:
# model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",)
model.push_to_hub_merged("projectwilsen/gemma2_9b_text2cypher_recommendations_merged_16bit_tes", tokenizer, save_method = "merged_16bit", token = hf_api)

Unsloth: You are pushing to hub, but you passed your HF username = projectwilsen.
We shall truncate projectwilsen/gemma2_9b_text2cypher_recommendations_merged_16bit_tes to gemma2_9b_text2cypher_recommendations_merged_16bit_tes
Unsloth: You have 1 CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which will take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 6.1G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 4.21 out of 12.67 RAM for saving.


 14%|█▍        | 6/42 [00:00<00:03, 10.35it/s]We will save to Disk and not RAM now.
100%|██████████| 42/42 [02:49<00:00,  4.03s/it]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...
Unsloth: Saving gemma2_9b_text2cypher_recommendations_merged_16bit_tes/pytorch_model-00001-of-00004.bin...
Unsloth: Saving gemma2_9b_text2cypher_recommendations_merged_16bit_tes/pytorch_model-00002-of-00004.bin...
Unsloth: Saving gemma2_9b_text2cypher_recommendations_merged_16bit_tes/pytorch_model-00003-of-00004.bin...
Unsloth: Saving gemma2_9b_text2cypher_recommendations_merged_16bit_tes/pytorch_model-00004-of-00004.bin...
Done.
Saved merged model to https://huggingface.co/projectwilsen/gemma2_9b_text2cypher_recommendations_merged_16bit_tes


In [None]:
model.push_to_hub_gguf("projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf", tokenizer, quantization_method = "q4_k_m", token = hf_api)

Unsloth: You have 1 CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which will take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 6.1G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 5.09 out of 12.67 RAM for saving.


 33%|███▎      | 14/42 [00:02<00:04,  6.48it/s]We will save to Disk and not RAM now.
100%|██████████| 42/42 [03:18<00:00,  4.72s/it]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...
Unsloth: Saving projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf/pytorch_model-00001-of-00004.bin...
Unsloth: Saving projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf/pytorch_model-00002-of-00004.bin...
Unsloth: Saving projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf/pytorch_model-00003-of-00004.bin...
Unsloth: Saving projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf/pytorch_model-00004-of-00004.bin...
Done.


Unsloth: Converting gemma2 model. Can use fast conversion = False.


==((====))==  Unsloth: Conversion from QLoRA to GGUF information
   \\   /|    [0] Installing llama.cpp will take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF 16bits will take 3 minutes.
\        /    [2] Converting GGUF 16bits to ['q4_k_m'] will take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: [0] Installing llama.cpp. This will take 3 minutes...
Unsloth: [1] Converting model at projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf into f16 GGUF format.
The output location will be ./projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf/unsloth.F16.gguf
This will take 3 minutes...
INFO:hf-to-gguf:Loading model: gemma2_9b_text2cypher_recommendations_4bit_gguf
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
INFO:hf-to-gguf:Exporting model...
INFO:hf-to-gguf:gguf: loading model weight map from 'pytorch_model.bin.index.json'
INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00001-of-00004.bin

  0%|          | 0/1 [00:00<?, ?it/s]

unsloth.F16.gguf:   0%|          | 0.00/18.5G [00:00<?, ?B/s]

Saved GGUF to https://huggingface.co/projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf
Unsloth: Uploading GGUF to Huggingface Hub...


  0%|          | 0/1 [00:00<?, ?it/s]

unsloth.Q4_K_M.gguf:   0%|          | 0.00/5.76G [00:00<?, ?B/s]

Saved GGUF to https://huggingface.co/projectwilsen/gemma2_9b_text2cypher_recommendations_4bit_gguf
