In [6]:
import os
import openai
import tiktoken
from dotenv import load_dotenv
import llama_index
from llama_index import GPTKeywordTableIndex,SimpleDirectoryReader,LLMPredictor,ServiceContext
from llama_index import GPTVectorStoreIndex,load_index_from_storage

load_dotenv() #read local .env file
openai.api_key = os.getenv('OPENAI_API_KEY')

In [7]:
documents = SimpleDirectoryReader('issuesubsetdata').load_data()

In [8]:
index = GPTVectorStoreIndex.from_documents(documents)

In [9]:
query_engine = index.as_query_engine()

In [12]:
response = query_engine.query("list all json schema fields")
print(response)


- "@context"
- "issue"
- "description"
- "has_author"
- "due_by"
- "status"
- "impacted_region"
- "required"


In [13]:
response = query_engine.query("list all db schema table columns")
print(response)


TestSchema.ENUMVALS
- DESCRIPTION (VARCHAR)
- ENUMVALID (DECIMAL)
- NAME (VARCHAR)

TestSchema.ISSUE
- DESCRIPTION (VARCHAR)
- IMPCTDORG (VARCHAR)
- ISSDESCR (VARCHAR)
- ISSORGNINDT (TIMESTAMP)
- ISSRORG (VARCHAR)
- NAME00 (VARCHAR)
- ORGANIZATIONREGION (VARCHAR)
- SLADUEDATE (TIMESTAMP)
- STTS (DECFLOAT)
- CREATION_DATE (TIMESTAMP)
- CREATOR (DECFLOAT)


In [27]:
query = '''
System: you are an expert of rdbms and json format

using @id and @type properties in the json file map the parent node to db table columns given in schema file
for the below steps to generate the output

Step 1:
add the mapped db column in the json file under the parent node of @id in the format ```SQLField:TABLE.Column```

Step 2: 
generate a SQL db view using the db columns mapped in step 1
'''
response = query_engine.query(f"{query}")
print(response)


Answer:
Step 1:

{
    "@context": {
      "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
      "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
      "owl": "http://www.w3.org/2002/07/owl#",
      "skos": "http://www.w3.org/2004/02/skos/core#",
      "xsd": "http://www.w3.org/2001/XMLSchema#",
      "msrpc": "http://example.org/msrpc/ontology/",
      "msrpc3": "http://example.org/msrpc3/ontology/",
      "msrpcx": "http://example.org/msrpcx/ontology/"
    },
    "issue": {
        "@id": "msrpc:Issue",
        "@type": "xsd:string",
        "SQLField:TestSchema.ISSUE.NAME00": "xsd:string",
        "rdfs:comment": "A potential risk or deficiency for which remidial action or monitoring is planned or anticipated to reduce a residual risk to an acceptable level"
      },   
    "description":{
      "@id": "mscore:description",
      "@type": "xsd:string",
      "SQLField:TestSchema.ISSUE.DESCRIPTION": "xsd:string",
      "rdfs:comment": "The description of the subject Issue"
   