In [24]:
!pip install langchain_community



In [25]:
import json
import re
from typing import Dict, List, Tuple
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI


In [26]:
# Initialize LLM (Replace with actual API key)
llm = ChatOpenAI(model_name="gpt-4-turbo", openai_api_key="")

In [27]:
# Sample database schema extracted from the PDF
database_schema = {
    "ActiveDonors": [
        {"Field": "UserID", "Type": "varchar(450)", "Key": ""},
        {"Field": "Name", "Type": "varchar(450)", "Key": ""},
        {"Field": "Email", "Type": "varchar(256)", "Key": ""},
        {"Field": "PhoneNumber", "Type": "varchar(50)", "Key": ""}
    ],
    "Campaigns": [
        {"Field": "ID", "Type": "int", "Key": "PRI"},
        {"Field": "Title", "Type": "varchar(250)", "Key": ""},
        {"Field": "NgoID", "Type": "int", "Key": "MUL"}
    ],
    "NGOs": [
        {"Field": "ID", "Type": "int", "Key": "PRI"},
        {"Field": "Name", "Type": "varchar(255)", "Key": ""}
    ]
}

In [28]:
# Extract relationships
def extract_relationships(schema: Dict[str, List[Dict[str, str]]]) -> Dict[str, List[str]]:
    relationships = {}
    for table, fields in schema.items():
        relationships[table] = [f["Field"] for f in fields if "MUL" in f["Key"]]
    return relationships

In [29]:
# Define an LLM prompt template for NLP to SQL conversion
prompt_template = PromptTemplate(
    input_variables=["query", "schema"],
    template="""
    Given the following SQL database schema:
    {schema}

    Convert the following natural language query into an SQL query:
    "{query}"
    """
)

In [30]:
# Convert NLP query to SQL using LLM
def nlp_to_sql(nlp_query: str) -> str:
    schema_str = json.dumps(database_schema, indent=2)
    prompt = prompt_template.format(query=nlp_query, schema=schema_str)
    sql_query = llm.invoke(prompt)
    return sql_query

In [31]:
# Test case
nlp_query = "Get all donor names and emails"
sql_query = nlp_to_sql(nlp_query)
print("Generated SQL Query:", sql_query)

Generated SQL Query: content='To convert the natural language query "Get all donor names and emails" into an SQL query, you need to select the relevant fields from the appropriate table. Based on the provided schema, donor information such as names and emails are stored in the `ActiveDonors` table. Here is the SQL query that retrieves all donor names and emails:\n\n```sql\nSELECT Name, Email\nFROM ActiveDonors;\n```\n\nThis SQL query will return a list of all names and email addresses from the `ActiveDonors` table.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 106, 'prompt_tokens': 295, 'total_tokens': 401, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4-turbo', 'system_fingerprint': 'fp_bf9cb2c77f', 'finish_reason': 'stop', 'logprobs': None} id='run-5374c29e-9ba2-4a87-a5e0-6573

In [32]:
# Extract SQL query from LLM response
def extract_sql(response: str) -> str:
    match = re.search(r'```sql\n(.*?)```', response, re.DOTALL)
    if match:
        return match.group(1).strip()
    return response.strip()


In [35]:
# Convert NLP query to SQL using LLM
def nlp_to_sql(nlp_query: str) -> str:
    schema_str = json.dumps(database_schema, indent=2)
    prompt = prompt_template.format(query=nlp_query, schema=schema_str)
    response = llm.invoke(prompt)
    # Get the content string from the AIMessage object
    response_content = response.content
    return extract_sql(response_content)

In [36]:
nlp_query = "Get all donor names and emails"
sql_query = nlp_to_sql(nlp_query)
print("Generated SQL Query:")
print(sql_query)


Generated SQL Query:
SELECT Name, Email 
FROM ActiveDonors;
