To generate text2cypher dataset based on your own data, you could refer to: https://github.com/tomasonjo/text2cypher

In [1]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!.
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.26" trl peft accelerate bitsandbytes
!pip install langchain_community neo4j langchain langchain_groq

# Initial Set Up

In [2]:
from langchain_community.graphs import Neo4jGraph
from langchain_groq import ChatGroq
from langchain.chains import GraphCypherQAChain
from google.colab import userdata
import os

os.environ["GROQ_API"] = "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"
os.environ["HF_API"] = "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"

groq_api_key = os.getenv('GROQ_API')
hf_api = os.getenv('HF_API')

neo4j_url = "neo4j+ssc://63de7d0e.databases.neo4j.io"
neo4j_user = "neo4j"
neo4j_password = "bkR1Ib1eerfMqlKdG7B2wOWuS5ki8qiJ7kivxx_5zKM"

In [3]:
graph = Neo4jGraph(neo4j_url, neo4j_user, neo4j_password,sanitize=True)
print(graph.schema)

  graph = Neo4jGraph(neo4j_url, neo4j_user, neo4j_password,sanitize=True)


Node properties:
Employee {name: STRING, nationality: STRING, sex: STRING, dob: STRING, sen: STRING}
Nationality {name: STRING}
Sex {type: STRING}
Seniority {name: STRING}
Language {name: STRING}
Level {name: STRING}
ProgramingLanguage {name: STRING}
Tool {name: STRING}
AutomationTool {name: STRING}
Microservice {name: STRING}
OS {name: STRING}
Database {name: STRING}
Cloud {name: STRING}
Relationship properties:
HAS_LANGUAGE_LEVEL {language: STRING}
The relationships:
(:Employee)-[:HAS_NATIONALITY]->(:Nationality)
(:Employee)-[:HAS_SEX]->(:Sex)
(:Employee)-[:HAS_SENIORITY]->(:Seniority)
(:Employee)-[:LANGUAGE]->(:Language)
(:Employee)-[:KNOWS_PROGRAM_LANGUAGE]->(:ProgramingLanguage)
(:Employee)-[:USES_TOOL]->(:Tool)
(:Employee)-[:USES_AUTOMATION_TOOL]->(:AutomationTool)
(:Employee)-[:HAS_EXPERIENCE_WITH_MICROSERVICE]->(:Microservice)
(:Employee)-[:USES_OS]->(:OS)
(:Employee)-[:WORKS_WITH_DATABASE]->(:Database)
(:Employee)-[:USES_CLOUD]->(:Cloud)
(:Employee)-[:HAS_LANGUAGE_LEVEL]->(:Le

New Update from Langchain (09/05/24): an enhanced schema parameter representation that samples the database values and return them to the LLM to be able to generate more accurate Cypher statements

https://python.langchain.com/v0.1/docs/integrations/graphs/neo4j_cypher/#enhanced-schema-information

In [4]:
graph = Neo4jGraph(neo4j_url, neo4j_user, neo4j_password,sanitize=True,enhanced_schema=True)
print(graph.schema)



Node properties:
- **Employee**
  - `name`: STRING Example: "Vu Viet A"
  - `nationality`: STRING Available options: ['Viet Nam']
  - `sex`: STRING Available options: ['Male', 'Female']
  - `dob`: STRING Example: "1993"
  - `sen`: STRING Available options: ['7 year', '1 year', '5 year', '3.5 year', '6 year', '2 year', '3 year', '9 year', '4 year']
- **Nationality**
  - `name`: STRING Available options: ['Viet Nam']
- **Sex**
  - `type`: STRING Available options: ['Male', 'Female']
- **Seniority**
  - `name`: STRING Available options: ['7 year', '1 year', '5 year', '3.5 year', '6 year', '2 year', '3 year', '9 year', '4 year']
- **Language**
  - `name`: STRING Available options: ['Vietnamese', 'English', 'Japanese', 'German', 'Korean']
- **Level**
  - `name`: STRING Available options: ['Native', 'C1 Advance', 'Deutsche Zertifikat B1', 'Around 700 Toeic', 'IELTS 6.5, Toeic: 785', 'Toeic 600', 'IELTS 6.5', 'Medium', 'Low', 'High']
- **ProgramingLanguage**
  - `name`: STRING Example: "Pytho

# Testing

In [5]:
model = ChatGroq(temperature=0, model_name="llama3-8b-8192", groq_api_key = groq_api_key)
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=model,
    verbose=True,
    allow_dangerous_requests=True  # Enable this
)

In [6]:
questions = ["Which employees use Azure?",
             "Find employees using Terraform",
             "List employees who use Docker",
             "List all employees who know Python and Java"
             ]

# POSSIBLE CORRECT CYPHER QUERY
# 1. MATCH (d:Director) WHERE d.born IS NOT NULL RETURN d ORDER BY d.born ASC LIMIT 1
# 2. MATCH (d:Director)-[:DIRECTED]->(m:Movie) WHERE 'Spanish' IN m.languages RETURN d.name
# 3. MATCH (d:Director)-[:ACTED_IN]->(m:Movie) WHERE exists{ (d)-[:DIRECTED]->(m) } RETURN m.title AS MovieTitle, m.movieId AS MovieID LIMIT 5
# 4. MATCH (m:Movie)<-[:DIRECTED]-(d:Director) WHERE m.imdbRating > 5 AND d.bornIn = 'China' RETURN m

for q in questions:
    print("\n", q)
    try:
        result = chain.invoke(q)['result']
        print(result)
    except:
        pass


 Which employees use Azure?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (e:Employee)-[:USES_CLOUD]->(c:Cloud {name: "Azure"}) RETURN e;[0m
Full Context:
[32;1m[1;3m[{'e': {'nationality': 'Viet Nam', 'dob': '1993', 'sex': 'Male', 'name': 'Pham Viet H', 'sen': '5 year'}}, {'e': {'nationality': 'Viet Nam', 'dob': '1997', 'sex': 'Male', 'name': 'Nguyen Quang H'}}, {'e': {'nationality': 'Viet Nam', 'dob': '1990', 'sex': 'Male', 'name': 'Chu Ngoc M', 'sen': '4 year'}}, {'e': {'nationality': 'Viet Nam', 'dob': '2001', 'sex': 'Male', 'name': 'Nguyen Duc T', 'sen': '3 year'}}, {'e': {'nationality': 'Viet Nam', 'dob': '1998', 'sex': 'Male', 'name': 'Dang The A', 'sen': '4 year'}}, {'e': {'nationality': 'Viet Nam', 'dob': '2000', 'sex': 'Male', 'name': 'Luong Ba H'}}][0m

[1m> Finished chain.[0m
I don't know the answer.

 Find employees using Terraform


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (e:

New Update from Langchain (09/05/2024): use validate_cypher parameter with enhanced schema parameter to get the best results

In [7]:
model = ChatGroq(temperature=0, model_name="llama-3.3-70b-versatile", groq_api_key = groq_api_key)
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=model,
    verbose=True,
    allow_dangerous_requests=True  # Enable this
)

In [8]:
questions = ["Which employees use Azure?",
             "Find employees using Terraform",
             "List employees who use Docker",
             "List all employees who know Python and Java"
             ]

for q in questions:
    print("\n", q)
    try:
        result = chain.invoke(q)['result']
        print(result)
    except:
        pass


 Which employees use Azure?


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (e:Employee)-[:USES_CLOUD]->(c:Cloud) WHERE c.name = 'Azure' RETURN e.name[0m
Full Context:
[32;1m[1;3m[{'e.name': 'Pham Viet H'}, {'e.name': 'Nguyen Quang H'}, {'e.name': 'Chu Ngoc M'}, {'e.name': 'Nguyen Duc T'}, {'e.name': 'Dang The A'}, {'e.name': 'Luong Ba H'}][0m

[1m> Finished chain.[0m
I don't know the answer.

 Find employees using Terraform


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (e:Employee)-[:USES_TOOL]->(t:Tool {name: 'Terraform'}) RETURN e.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
I don't know the answer.

 List employees who use Docker


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (e:Employee)-[:USES_TOOL]->(t:Tool) WHERE t.name = 'Docker' RETURN e.name[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
Docker is a

# Fine Tuning using Unsloth

The code below is from the Unsloth repository: https://colab.research.google.com/drive/135ced7oHytdxu3N2DNe1Z0kqjyYIkDXp?usp=sharing

In [9]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

Unsloth: Patching Xformers to fix some performance issues.
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.5.1+cu124)
    Python  3.11.8 (you have 3.11.11)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.9: Fast Llama patching. Transformers: 4.48.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/198 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Adding LORA

In [10]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.3.9 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


## Create a dataset

In [11]:
prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = f"Convert text to cypher query based on this schema: {graph.schema}"
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for input, output in zip(inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = prompt.format(instructions, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

Load data from HuggingFace

In [13]:
from datasets import load_dataset
dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)
dataset

README.md:   0%|          | 0.00/11.6k [00:00<?, ?B/s]

alpaca_data_cleaned.json:   0%|          | 0.00/44.3M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/51760 [00:00<?, ? examples/s]



Map:   0%|          | 0/51760 [00:00<?, ? examples/s]

Dataset({
    features: ['output', 'input', 'instruction', 'text'],
    num_rows: 51760
})

Load our own data

we're going to use: https://github.com/tomasonjo/text2cypher/blob/main/datasets/synthetic_gpt4turbo_demodbs/text2cypher_gpt4turbo.csv

In [14]:
import pandas as pd

df = pd.read_csv('/content/text2cypher_gpt4turbo.csv')
df = df[(df['database'] == 'recommendations') & (df['syntax_error'] == False) & (df['timeout'] == False)]
df

Unnamed: 0,question,cypher,type,database,syntax_error,timeout,returns_results,false_schema
7275,What are the top 5 movies with a runtime great...,MATCH (m:Movie)\nWHERE m.runtime > 120\nRETURN...,Simple Retrieval Queries,recommendations,False,False,True,
7276,List the first 3 genres with movies having an ...,MATCH (m:Movie)-[:IN_GENRE]->(g:Genre)\nWHERE ...,Verbose query,recommendations,False,False,True,
7277,List the first 5 directors who have a biograph...,MATCH (d:Director)\nWHERE d.bio IS NOT NULL\nR...,Simple Retrieval Queries,recommendations,False,False,True,
7278,Which 3 movies have the most detailed plot des...,"MATCH (m:Movie)\nRETURN m.title, m.plot\nORDER...",Simple Retrieval Queries,recommendations,False,False,True,
7279,Show the top 5 actors who have acted in movies...,MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)<-[:DIRE...,Simple Retrieval Queries,recommendations,False,False,True,
...,...,...,...,...,...,...,...,...
8067,Which movies have been acted in by more than 1...,MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\nWITH m...,Complex Retrieval Queries,recommendations,False,False,True,
8068,Find all movies where the director has directe...,MATCH (d:Director)-[:DIRECTED]->(m:Movie)\nWIT...,Complex Retrieval Queries,recommendations,False,False,False,
8069,Find all movies that have a plot mentioning 'h...,MATCH (m:Movie)\nWHERE m.plot CONTAINS 'hero'\...,Complex Retrieval Queries,recommendations,False,False,True,
8070,Which movies have been rated the highest by us...,"MATCH (u:User)-[r:RATED]->(m:Movie)\nWITH u, c...",Complex Retrieval Queries,recommendations,False,False,True,


In [15]:
df = df[['question','cypher']]
df.rename(columns={'question': 'input','cypher':'output'}, inplace=True)
df.reset_index(drop=True, inplace=True)
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'question': 'input','cypher':'output'}, inplace=True)


Unnamed: 0,input,output
0,What are the top 5 movies with a runtime great...,MATCH (m:Movie)\nWHERE m.runtime > 120\nRETURN...
1,List the first 3 genres with movies having an ...,MATCH (m:Movie)-[:IN_GENRE]->(g:Genre)\nWHERE ...
2,List the first 5 directors who have a biograph...,MATCH (d:Director)\nWHERE d.bio IS NOT NULL\nR...
3,Which 3 movies have the most detailed plot des...,"MATCH (m:Movie)\nRETURN m.title, m.plot\nORDER..."
4,Show the top 5 actors who have acted in movies...,MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)<-[:DIRE...
...,...,...
757,Which movies have been acted in by more than 1...,MATCH (a:Actor)-[:ACTED_IN]->(m:Movie)\nWITH m...
758,Find all movies where the director has directe...,MATCH (d:Director)-[:DIRECTED]->(m:Movie)\nWIT...
759,Find all movies that have a plot mentioning 'h...,MATCH (m:Movie)\nWHERE m.plot CONTAINS 'hero'\...
760,Which movies have been rated the highest by us...,"MATCH (u:User)-[r:RATED]->(m:Movie)\nWITH u, c..."


In [16]:
from datasets import Dataset
dataset = Dataset.from_pandas(df)
dataset = dataset.map(formatting_prompts_func, batched = True)
dataset

Map:   0%|          | 0/762 [00:00<?, ? examples/s]

Dataset({
    features: ['input', 'output', 'text'],
    num_rows: 762
})

In [17]:
dataset[0]

{'input': 'What are the top 5 movies with a runtime greater than 120 minutes?',
 'output': 'MATCH (m:Movie)\nWHERE m.runtime > 120\nRETURN m\nORDER BY m.runtime DESC\nLIMIT 5',
 'text': 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nConvert text to cypher query based on this schema: Node properties:\n- **Employee**\n  - `name`: STRING Example: "Vu Viet A"\n  - `nationality`: STRING Available options: [\'Viet Nam\']\n  - `sex`: STRING Available options: [\'Male\', \'Female\']\n  - `dob`: STRING Example: "1993"\n  - `sen`: STRING Available options: [\'7 year\', \'1 year\', \'5 year\', \'3.5 year\', \'6 year\', \'2 year\', \'3 year\', \'9 year\', \'4 year\']\n- **Nationality**\n  - `name`: STRING Available options: [\'Viet Nam\']\n- **Sex**\n  - `type`: STRING Available options: [\'Male\', \'Female\']\n- **Seniority**\n  - `name`: STRING Available options: [

# Train the model

In [19]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # max_steps = 60,
        num_train_epochs=1,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Tokenizing to ["text"] (num_proc=2):   0%|          | 0/762 [00:00<?, ? examples/s]

In [20]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.741 GB.
5.496 GB of memory reserved.


In [21]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 762 | Num Epochs = 1 | Total steps = 95
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/4,582,543,360 (0.92% trained)
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mleminhnghia203[0m ([33mleminhnghia203-fpt-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,1.5859
2,1.6448
3,1.6609
4,1.4085
5,0.8511
6,0.3795
7,0.1081
8,0.0361
9,0.0266
10,0.0229


In [22]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

2448.0929 seconds used for training.
40.8 minutes used for training.
Peak reserved memory = 7.188 GB.
Peak reserved memory for training = 1.692 GB.
Peak reserved memory % of max memory = 48.762 %.
Peak reserved memory for training % of max memory = 11.478 %.


# Inference

In [23]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    prompt.format(
        f"Convert text to cypher query based on this schema: {graph.schema}", # instruction
        "Which employees use Azure?", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Convert text to cypher query based on this schema: Node properties:
- **Employee**
  - `name`: STRING Example: "Vu Viet A"
  - `nationality`: STRING Available options: ['Viet Nam']
  - `sex`: STRING Available options: ['Male', 'Female']
  - `dob`: STRING Example: "1993"
  - `sen`: STRING Available options: ['7 year', '1 year', '5 year', '3.5 year', '6 year', '2 year', '3 year', '9 year', '4 year']
- **Nationality**
  - `name`: STRING Available options: ['Viet Nam']
- **Sex**
  - `type`: STRING Available options: ['Male', 'Female']
- **Seniority**
  - `name`: STRING Available options: ['7 year', '1 year', '5 year', '3.5 year', '6 year', '2 year', '3 year', '9 year', '4 year']
- **Language**
  - `name`: STRING Available options: ['Vietnamese', 'English', 'Japanese', 'German', 'Korean']
- **Level**


# Save the Finetuned

Local Saving

In [None]:
# model.save_pretrained("lora_model") # Local saving
# tokenizer.save_pretrained("lora_model")

Online Saving to HuggingFace

In [None]:
# should have write access

model.push_to_hub("projectwilsen/llama3_text2cypher_recommendations", token = hf_api)
tokenizer.push_to_hub("projectwilsen/llama3_text2cypher_recommendations", token = hf_api)

# Load Finetuned Model from HuggingFace

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "projectwilsen/llama3_text2cypher_recommendations_epoch2", # YOUR MODEL YOU USED FOR TRAINING
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

==((====))==  Unsloth 2025.3.8: Fast Llama patching. Transformers: 4.48.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096, padding_idx=128255)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

In [24]:
inputs = tokenizer(
[
    prompt.format(
        f"Convert text to cypher query based on this schema: {graph.schema}", # instruction
        "Which employees use Azure?", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
result = tokenizer.batch_decode(outputs)
response = result[0].split("### Response:")[1].split("###")[0].strip().replace("<|end_of_text|>", "")
print(response)

Response:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:


# Evaluating

Unsloth has not integrated in Langchain, so need little adjustment

In [25]:
from langchain.chains import LLMChain
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from google.colab import userdata
os.environ["GROQ_API"] = "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"

groq_api_key = os.getenv('GROQ_API')

CYPHER_QA_TEMPLATE = """You convert context to a final answer. Understand the question, the context, then generate result.
Here is an example:

Question: Which employees use Azure?
Context: [{'e.name': 'Pham Viet H'}, {'e.name': 'Nguyen Quang H'}, {'e.name': 'Chu Ngoc M'}, {'e.name': 'Nguyen Duc T'}, {'e.name': 'Dang The A'}, {'e.name': 'Luong Ba H'}]
Helpful Answer: Pham Viet H, Nguyen Quang H, Chu Ngoc M, Nguyen Duc T, Dang The A, Luong Ba H are employees using Azure

Follow this example when generating answers.
Answer in short, don't hallucinate!
Question: {question}
Information: {context}
Helpful Answer:
"""

qa_prompt = ChatPromptTemplate.from_template(CYPHER_QA_TEMPLATE)
output_parser = StrOutputParser()
llm = ChatGroq(temperature=0, model_name="llama3-8b-8192", groq_api_key = groq_api_key)
chain = qa_prompt | llm | output_parser

context = graph.query(response)
question = "Which employees use Azure?"

chain.invoke({"context":context , "question":question})

CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'Response': expected 'FOREACH', 'ALTER', 'ORDER BY', 'CALL', 'USING PERIODIC COMMIT', 'CREATE', 'LOAD CSV', 'START DATABASE', 'STOP DATABASE', 'DEALLOCATE', 'DELETE', 'DENY', 'DETACH', 'DROP', 'DRYRUN', 'FINISH', 'GRANT', 'INSERT', 'LIMIT', 'MATCH', 'MERGE', 'NODETACH', 'OFFSET', 'OPTIONAL', 'REALLOCATE', 'REMOVE', 'RENAME', 'RETURN', 'REVOKE', 'ENABLE SERVER', 'SET', 'SHOW', 'SKIP', 'TERMINATE', 'UNWIND', 'USE' or 'WITH' (line 1, column 1 (offset: 0))
"Response:"
 ^}

In [26]:
questions = ["Which employees use Azure?",
             "Find employees using Terraform",
             "List employees who use Docker",
             "List all employees who know Python and Java"
             ]

def generate_cypher_query(question):
  inputs = tokenizer(
  [
      prompt.format(
          f"Convert text to cypher query based on this schema: {graph.schema}", # instruction
          question, # input
          "", # output - leave this blank for generation!
      )
  ], return_tensors = "pt").to("cuda")

  outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
  result = tokenizer.batch_decode(outputs)
  cypher_query = result[0].split("### Response:")[1].split("###")[0].strip().replace("<|end_of_text|>", "")
  return cypher_query

for q in questions:
    print("\n",q)
    cypher_query = generate_cypher_query(q)
    print(cypher_query)
    context = graph.query(cypher_query)
    print('context: ', context)
    result = chain.invoke({"context":context , "question":q})
    print(result)


 Which employees use Azure?
Response:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:
:


CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'Response': expected 'FOREACH', 'ALTER', 'ORDER BY', 'CALL', 'USING PERIODIC COMMIT', 'CREATE', 'LOAD CSV', 'START DATABASE', 'STOP DATABASE', 'DEALLOCATE', 'DELETE', 'DENY', 'DETACH', 'DROP', 'DRYRUN', 'FINISH', 'GRANT', 'INSERT', 'LIMIT', 'MATCH', 'MERGE', 'NODETACH', 'OFFSET', 'OPTIONAL', 'REALLOCATE', 'REMOVE', 'RENAME', 'RETURN', 'REVOKE', 'ENABLE SERVER', 'SET', 'SHOW', 'SKIP', 'TERMINATE', 'UNWIND', 'USE' or 'WITH' (line 1, column 1 (offset: 0))
"Response:"
 ^}