In [1]:
import os
import requests
from dotenv import load_dotenv
import pandas as pd

from predibase import PredibaseClient
from neo4j import GraphDatabase, RoutingControl

In [None]:
neo4j_uri = os.environ.get('NEO4J_URI')
neo4j_username = os.environ.get('NEO4J_USERNAME')
neo4j_password = os.environ.get('NEO4J_PASSWORD')
neo4j_auth =  (neo4j_username, neo4j_password)

In [None]:
open_ai_api_key = os.environ.get('OPENAI_API_KEY')

In [None]:
pc = PredibaseClient()
driver = GraphDatabase.driver(URI, auth=(AUTH))

In [11]:
# helper function to execute the predibase driver

def execute_query(driver, query):
    with driver.session() as session:
        def _execute(tx):
            result = tx.run(query)
            return [record.data() for record in result]
        return session.execute_read(_execute)

#### Load and preview training data

Using the training data created synthetically using `generate_synthetic_data`directory in this repository.

In [None]:
training_data = 'training-data-twitter.csv'
df = pd.read_csv(file_path)

In [None]:
print(f"Number of rows: {len(df)}")

In [None]:
display(df.head())

#### Upload training data to Predibase

In [None]:
dataset = pc.upload_dataset(training_data)

#### Configure and start the training job

In [4]:
# Defines the template used to prompt the model for each example

prompt_template = """Below is an instruction that describes a task, paired with an input
    that may provide further context. Write a response that appropriately
    completes the request.

    ### Instruction: {instruction}

    ### Input: {input}

    ### Response:
"""

In [None]:
# Specifies the Huggingface LLM you want to fine-tune
# Kick off a fine-tuning job on the uploaded dataset
llm = pc.LLM("hf://meta-llama/Llama-2-7b")
job = llm.finetune(
    prompt_template=prompt_template,
    target="output",
    dataset=dataset,
)

# Wait for the job to finish and get training updates and metrics
model = job.get()

#### Load new model with adapter

Wait for the model to have finished training in the previous step before setting up the deployment. 

In [None]:
base_deployment = pc.LLM("pb://deployments/llama-2-7b")

# Set the adapapter to our newly fine tuned model
adapter_deployment = base_deployment.with_adapter(model)

#### Test the new model 

In [None]:
# introspect schema from neo4j
query = "CALL apoc.meta.graph()"
meta_graph = execute_query(driver, query)
meta_graph_str = json.dumps(meta_graph)

result = adapter_deployment.prompt(
    {
      "instruction": f"Given this schema, write a Cypher query that returns the data I am looking for.  Schema:  {meta_graph_str}",
      "input": "I am neo4j. Find the hashtags used in my tweets that have the most favourites."
    },
    max_new_tokens=256)

#### Deploy new model 

In [None]:
finetuned_llm = model.deploy("llama-2-7b").get()

#### Test newly deployed model

In [None]:
result = finetuned_llm.prompt(
    {
        "instruction": f"Given this schema, write a Cypher query that returns the data I am looking for.  Schema:  {meta_graph_str}",
      "input": "I am neo4j. How many of my tweets did 'nsmith_piano' reply to?"
    },
    max_new_tokens=256)

print(result.response)

#### Call Neo4J with the query

In [None]:



query = "CALL apoc.meta.graph()"
meta_graph = execute_query(driver, query)

print("Meta Graph: \n\n")
print(meta_graph)