In [None]:
import csv
import os
import json
from openai import OpenAI
from neo4j import GraphDatabase, RoutingControl
from predibase import PredibaseClient

In [None]:
neo4j_uri = os.environ.get('NEO4J_URI')
neo4j_username = os.environ.get('NEO4J_USERNAME')
neo4j_password = os.environ.get('NEO4J_PASSWORD')
neo4j_auth =  (neo4j_username, neo4j_password)

In [None]:
open_ai_api_key = os.environ.get('OPENAI_API_KEY')

In [None]:
client = OpenAI(api_key=open_ai_api_key)

In [None]:
pc = PredibaseClient()
driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_auth))

In [None]:
# helper function to execute the predibase driver

def execute_query(driver, query):
    with driver.session() as session:
        def _execute(tx):
            result = tx.run(query)
            return [record.data() for record in result]
        return session.execute_read(_execute)

#### Load the questions file and write a query for each question

In [None]:
# The database schema description
query = "CALL apoc.meta.graph()"
meta_graph = execute_query(driver, query)
meta_graph_str = json.dumps(meta_graph)

In [None]:
def generate_cypher_query(question):
    """
    Generate a Cypher query for a given question using OpenAI's API.
    """
    prompt = f"Given the schema: {meta_graph_str}, and the question: '{question}', write a Cypher query to answer the question."
    
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a software engineer trained in writing Cypher queries for Neo4j."},
            {"role": "user", "content": prompt}
        ]
    )
    
    if response.choices:
        cypher_query = response.choices[0].message.content
        return cypher_query.strip()
    else:
        return "Query generation failed."

In [None]:
def process_questions_csv(input_csv, output_csv):
    """
    Read questions from input_csv, generate Cypher queries, save to output_csv,
    and preview the first 5 rows before saving.
    """
    rows = []  # A list to store the generated rows
    with open(input_csv, 'r', encoding='utf-8') as infile:
        reader = csv.reader(infile)
        header = next(reader)  # Skip header row
        for row in reader:
            question = row[0]
            cypher_query = generate_cypher_query(question)
            rows.append([question, cypher_query])
            
    # Preview the first 5 rows
    print("Preview of the first 5 rows:")
    for row in rows[:5]:
        print(row)
    
    # Ask for confirmation to save to CSV
    confirmation = input("Proceed to save the output to CSV? (yes/no): ")
    if confirmation.lower() == "yes":
        with open(output_csv, 'w', newline='', encoding='utf-8') as outfile:
            writer = csv.writer(outfile)
            writer.writerow(['Input', 'Output'])  # Writing header for the output CSV
            writer.writerows(rows)  # Writing all generated rows
            print(f"Processed questions and generated queries have been saved to {output_csv}")
    else:
        print("Operation cancelled. The output was not saved.")


In [None]:
# Path to the input CSV file containing questions
input_csv_path = 'questions.csv'
# Path to the output CSV file to save questions and their corresponding Cypher queries
output_csv_path = 'questions_with_queries.csv'

# Execute the process
process_questions_csv(input_csv_path, output_csv_path)