In [None]:
import csv
import os
import json
from openai import OpenAI
from neo4j import GraphDatabase
from predibase import PredibaseClient

In [None]:
neo4j_uri = os.environ.get('NEO4J_URI')
neo4j_username = os.environ.get('NEO4J_USERNAME')
neo4j_password = os.environ.get('NEO4J_PASSWORD')
neo4j_auth =  (neo4j_username, neo4j_password)

In [None]:
open_ai_api_key = os.environ.get('OPENAI_API_KEY')

In [None]:
client = OpenAI(api_key=open_ai_api_key)

In [None]:
pc = PredibaseClient()
driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_auth))

In [None]:
# helper function to execute the predibase driver

def execute_query(driver, query):
    with driver.session() as session:
        def _execute(tx):
            result = tx.run(query)
            return [record.data() for record in result]
        return session.execute_read(_execute)

#### Introspect the schema 

In [None]:
query = "CALL apoc.meta.graph()"
meta_graph = execute_query(driver, query)
meta_graph_str = json.dumps(meta_graph)

#### Create themes to generate questions around

In [None]:
themes = {
    'user_interactions': f"Given the schema: {meta_graph_str}. Generate questions about a user's interactions on Twitter, focusing on followers, mutual follows, retweets, and mentions.",
    'tweet_content': f"Given the schema: {meta_graph_str}. Generate questions about the content of tweets, including common words, hashtags, topics, and the most retweeted tweets."
}

#### Use OpenAI to generate questions

In [None]:
def openai_generate_questions(prompt, theme, num_questions=30):
    """
    Generate questions using OpenAI based on a given prompt, adjusted to use chat.completions.
    """
    response = client.chat.completions.create(
      model="gpt-4",
       messages=[
            {"role": "system", "content": "You are a person writing questions that describe a Neo4j database."},
            {"role": "user", "content": f"Given the theme of '{theme}', {prompt} Create {num_questions} questions about this dataset."}
        ]
    )
    print(response)
     # Parsing response to extract questions
    if response.choices:
        response_content = response.choices[0].message['content']
        questions = response_content.split('\n')
        return questions
    else:
        return ["No questions generated."]

In [3]:
def generate_questions_for_all_themes():
    all_questions = []
    
    for theme, prompt in themes.items():
        questions = openai_generate_questions(prompt, theme, 30)
        all_questions.extend(questions)
    
    # Saving questions to a CSV file
    with open('questions.csv', 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Question"])
        for question in all_questions:
            writer.writerow([question])
    
    # Preview of questions and their count
    print(f"Total questions generated: {len(all_questions)}")
    print("Preview of generated questions:")
    for question in all_questions[:10]:  # Previewing the first 10 questions
        print(question)

In [None]:
# Execute the main function to generate questions and save to CSV
generate_questions_for_all_themes()