In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
LANGCHAIN_TRACING_V2 = os.getenv("LANGCHAIN_TRACING_V2")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")


In [3]:
print(LANGCHAIN_TRACING_V2)

true


<h2>Loading the Database</h2>

In [4]:
from langchain_community.utilities.sql_database import SQLDatabase

In [5]:
db_user = os.getenv("db_user")
db_password = os.getenv("db_password")
db_host = os.getenv("db_host")
db_name = os.getenv("db_name")

In [6]:
mysql_uri = f"mysql+mysqlconnector://{db_user}:{db_password}@{db_host}/{db_name}"

db = SQLDatabase.from_uri(mysql_uri , sample_rows_in_table_info = 4)

In [9]:
print(db.get_usable_table_names())
print(db.table_info)

['attendance', 'login', 'students', 'subjects', 'teacher_subject', 'teachers']

CREATE TABLE attendance (
	student_id_usn VARCHAR(15) NOT NULL, 
	subject_id VARCHAR(15) NOT NULL, 
	date DATE NOT NULL, 
	status ENUM('absent','present') DEFAULT 'absent', 
	PRIMARY KEY (student_id_usn, subject_id, date), 
	CONSTRAINT attendance_ibfk_1 FOREIGN KEY(student_id_usn) REFERENCES students (student_id), 
	CONSTRAINT attendance_ibfk_2 FOREIGN KEY(subject_id) REFERENCES subjects (subject_id)
)COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB

/*
4 rows from attendance table:
student_id_usn	subject_id	date	status
1DS21CS001	21CS71	2024-11-16	absent
1DS21CS001	21CS72	2024-11-16	present
1DS21CS001	21CS73	2024-11-16	absent
1DS21CS001	21CS74	2024-11-16	present
*/


CREATE TABLE login (
	id VARCHAR(20) NOT NULL, 
	name VARCHAR(100) NOT NULL, 
	email VARCHAR(100) NOT NULL, 
	password VARCHAR(255) NOT NULL, 
	type VARCHAR(50) NOT NULL
)COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4 ENGIN

<h2>Creating the model </h2>

In [7]:
from langchain.chains import create_sql_query_chain

In [8]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model="llama3.1" , temperature= 0)

In [9]:
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama3-groq-70b-8192-tool-use-preview",
    temperature=0,
    max_tokens=None,
    timeout=None,
)

In [None]:
# from langchain_openai import OpenAI
# llm = OpenAI()

In [11]:
#The very first step in our interaction
generate_query = create_sql_query_chain(llm , db)

In [12]:
suffix = " Only return the SQL query, Nothing else."

In [13]:
query = generate_query.invoke({"question" : "How many students are there?" + suffix})
print(query)

SELECT COUNT(*) AS total_students FROM students


In [14]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool

In [15]:
execute_query = QuerySQLDataBaseTool(db = db)
execute_query.invoke(query)

'[(284,)]'

In [16]:
#Creating the chain of above 2 steps
chain = generate_query | execute_query
chain.invoke({"question" : "How many students are there?" + suffix})

'[(284,)]'

In [17]:
from operator import itemgetter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

In [18]:
answer_prompt = PromptTemplate.from_template(
     """Given the following user question, corresponding SQL query, and SQL result, answer the user question in natural english language.If the result is blank, then answer in negative.

 Question: {question}
 SQL Query: {query}
 SQL Result: {result}
 Answer: """
 )

In [19]:
rephrase_answer = answer_prompt | llm | StrOutputParser()

In [20]:
chain = (
     RunnablePassthrough.assign(query=generate_query).assign(
         result=itemgetter("query") | execute_query
     )
     | rephrase_answer
 )

In [21]:
chain.invoke({"question": "How many students?" + suffix})

'There are 284 students.'

<h3>Creating Few Shot prompts</h3>

In [22]:
examples = [
    {
        "input" : "List all students of the class" , 
        "query" : "SELECT * FROM students;"
    } , 
    {
        "input" : "List the names of students of the class" , 
        "query" : "SELECT student_id_usn , student_name FROM students;"
    } , 
    { 
        "input": "List all students and their contact details.", 
        "query": "SELECT student_id_usn, student_name, contact, email FROM students;" 
    } ,
    {
        "input" : "Give me the usn of all students" , 
        "query" : "SELECT student_id_usn FROM students;"
    } ,
    {
        "input" : "Give me details of student id 106" , 
        "query" : "SELECT * FROM students WHERE student_id = '1DS21CS106'"
    } ,
    {
        "input" : "Who is 106?" , 
        "query" : "SELECT * FROM students WHERE student_id = '1DS21CS106'"
    } ,
    {
        "input" : "Who is 087?" , 
        "query" : "SELECT * FROM students WHERE student_id = '1DS21CS087'"
    } ,
    
    {
        "input" : "Give me details of student id 1ds21cs087" , 
        "query" : "SELECT * FROM students WHERE student_id = '1DS21CS087'"
    } ,
    {
        "input" : "Give me details of usn 234" , 
        "query" : "SELECT * FROM students WHERE student_id = '1DS21CS234'"
    } ,
    {
        "input" : "Give me details of usn 1ds21cs234" , 
        "query" : "SELECT * FROM students WHERE student_id = '1DS21CS234'"
    } , 
    {
        "input" : "Who teaches ar vr?" ,
        "query" : "SELECT teacher_name FROM teachers INNER JOIN teacher_subject ON teachers.teacher_id = teacher_subject.teacher_id INNER JOIN subjects ON teacher_subject.subject_id = subjects.subject_id WHERE subjects.subject_name = 'AR VR';"
    } , 
    {
        "input" : "Who teaches business intelligence?" ,
        "query" : "SELECT teacher_name FROM teachers INNER JOIN teacher_subject ON teachers.teacher_id = teacher_subject.teacher_id INNER JOIN subjects ON teacher_subject.subject_id = subjects.subject_id WHERE subjects.subject_name = 'Business Intelligence';"
    } ,
    {
        "input" : "What Premsukh teaches?" ,
        "query" : "select su.subject_name from teachers t inner join teacher_subject ts on t.teacher_id = ts.teacher_id inner join subjects su on su.subject_code = ts.subject_code where t.teacher_id in (select teacher_id from teachers where teacher_name like '%premsukh%'); "
    } ,
    {
        "input" : "what Harsh teaches?" , 
        "query" : "select su.subject_name from teachers t inner join teacher_subject ts on t.teacher_id = ts.teacher_id inner join subjects su on su.subject_code = ts.subject_code where t.teacher_id in (select teacher_id from teachers where teacher_name like '%harsh%'); "
    } ,
]

In [23]:
from langchain_core.prompts import ChatPromptTemplate , MessagesPlaceholder , FewShotChatMessagePromptTemplate

In [24]:
example_prompt = ChatPromptTemplate.from_messages(
    [
        # ("human" , "{input}\nSQLQuery:"),
        ("human" , "{input}. " + suffix),
        ("ai" , "{query}"), 
    ]
)

In [71]:
print(example_prompt.format(input = "How?" , query = "WHo?"))

Human: How?.  Only return the SQL query, Nothing else.
AI: WHo?


In [25]:
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt ,
    examples= examples ,
    input_variables=["input"],
)

In [26]:
print(few_shot_prompt.format(input="How many students are there?"))

Human: List all students of the class.  Only return the SQL query, Nothing else.
AI: SELECT * FROM students;
Human: List the names of students of the class.  Only return the SQL query, Nothing else.
AI: SELECT student_id_usn , student_name FROM students;
Human: List all students and their contact details..  Only return the SQL query, Nothing else.
AI: SELECT student_id_usn, student_name, contact, email FROM students;
Human: Give me the usn of all students.  Only return the SQL query, Nothing else.
AI: SELECT student_id_usn FROM students;
Human: Give me details of student id 106.  Only return the SQL query, Nothing else.
AI: SELECT * FROM students WHERE student_id = '1DS21CS106'
Human: Who is 106?.  Only return the SQL query, Nothing else.
AI: SELECT * FROM students WHERE student_id = '1DS21CS106'
Human: Who is 087?.  Only return the SQL query, Nothing else.
AI: SELECT * FROM students WHERE student_id = '1DS21CS087'
Human: Give me details of student id 1ds21cs087.  Only return the SQL q

In [27]:
#Dynamic few-shot selections
from langchain_chroma import Chroma
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_ollama import OllamaEmbeddings

In [28]:
vectorstore = Chroma()
vectorstore.delete_collection()

In [29]:
example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    OllamaEmbeddings(model="llama3.1",),
    vectorstore,
    k=2,
    input_keys=["input"],
)

In [30]:
example_selector_input = "How many students are there?"

In [31]:
example_selector.select_examples({"input": example_selector_input})

[{'input': 'Who teaches business intelligence?',
  'query': "SELECT teacher_name FROM teachers INNER JOIN teacher_subject ON teachers.teacher_id = teacher_subject.teacher_id INNER JOIN subjects ON teacher_subject.subject_id = subjects.subject_id WHERE subjects.subject_name = 'Business Intelligence';"},
 {'input': 'What Premsukh teaches?',
  'query': "select su.subject_name from teachers t inner join teacher_subject ts on t.teacher_id = ts.teacher_id inner join subjects su on su.subject_code = ts.subject_code where t.teacher_id in (select teacher_id from teachers where teacher_name like '%premsukh%'); "}]

In [None]:
# embed = OllamaEmbeddings(
#     model="llama3.1"
# )

# input_text = "Who teaches Generative AI?"
# vector = embed.embed_query(input_text)
# print(vector[:3])

# input_text = "Who teaches business intelligence?"
# vector = embed.embed_query(input_text)
# print(vector[:3])

[-0.0050232355, -0.020094158, 0.020115267]
[-0.00073254365, -0.022771522, 0.01482598]


In [32]:
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    example_selector=example_selector,
    input_variables=["input", "top_k"],
)

In [33]:
few_shot_prompt_input = "Who teaches management entrepreneurship?"

In [34]:
print(few_shot_prompt.format(input=few_shot_prompt_input))

Human: Who teaches business intelligence?.  Only return the SQL query, Nothing else.
AI: SELECT teacher_name FROM teachers INNER JOIN teacher_subject ON teachers.teacher_id = teacher_subject.teacher_id INNER JOIN subjects ON teacher_subject.subject_id = subjects.subject_id WHERE subjects.subject_name = 'Business Intelligence';
Human: Who teaches ar vr?.  Only return the SQL query, Nothing else.
AI: SELECT teacher_name FROM teachers INNER JOIN teacher_subject ON teachers.teacher_id = teacher_subject.teacher_id INNER JOIN subjects ON teacher_subject.subject_id = subjects.subject_id WHERE subjects.subject_name = 'AR VR';


In [35]:
final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.\n\nHere is the relevant table info: {table_info}\n\nBelow are a number of examples of questions and their corresponding SQL queries."),
        few_shot_prompt,
        ("human", "{input}"),
    ]
)

In [36]:
final_prompt_input = "Who is 098?"

In [37]:
print(final_prompt.format(input=final_prompt_input,table_info=db.table_info))

System: You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.

Here is the relevant table info: 
CREATE TABLE attendance (
	student_id_usn VARCHAR(15) NOT NULL, 
	subject_id VARCHAR(15) NOT NULL, 
	date DATE NOT NULL, 
	status ENUM('absent','present') DEFAULT 'absent', 
	PRIMARY KEY (student_id_usn, subject_id, date), 
	CONSTRAINT attendance_ibfk_1 FOREIGN KEY(student_id_usn) REFERENCES students (student_id), 
	CONSTRAINT attendance_ibfk_2 FOREIGN KEY(subject_id) REFERENCES subjects (subject_id)
)ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4

/*
4 rows from attendance table:
student_id_usn	subject_id	date	status
1DS21CS001	21CS71	2024-11-16	absent
1DS21CS001	21CS72	2024-11-16	present
1DS21CS001	21CS73	2024-11-16	absent
1DS21CS001	21CS74	2024-11-16	present
*/


CREATE TABLE login (
	id VARCHAR(20) NOT NULL, 
	name VARCHAR(100) NOT NULL, 
	email VARCHAR(100) NOT NULL, 
	password VARCHAR(255) NOT

In [None]:
generate_query = create_sql_query_chain(llm, db,final_prompt)

In [39]:
chain = (
RunnablePassthrough.assign(query=generate_query).assign(
    result=itemgetter("query") | execute_query
)
| rephrase_answer
)

In [None]:
chain.invoke({"question" : "How many students are there?"})

'Yes, based on the information provided, it appears that the student with the ID 1DS21CS087 was present on the date of November 16, 2024, which suggests that their attendance is regular.'

<h3>Setting up the relevant table selection </h3>

In [92]:
# from operator import itemgetter
# from langchain.chains import create_extraction_chain_pydantic
from langchain.chains.openai_tools import create_extraction_chain_pydantic 
from pydantic import BaseModel, Field
# from typing import List
import pandas as pd

In [102]:
def get_table_details():
    # Read the CSV file into a DataFrame
    table_description = pd.read_csv("database_table_descriptions.csv")
    # table_docs = []

    # Iterate over the DataFrame rows to create Document objects
    table_details = ""
    for index, row in table_description.iterrows():
        table_details = table_details + "Table Name:" + row['Table'] + "\n" + "Table Description:" + row['Description'] + "\n\n"

    return table_details

In [95]:
class Table(BaseModel):
    """Table in SQL database."""

    name: str = Field(description="Name of table in SQL database.")

In [None]:
# table_names = "\n".join(db.get_usable_table_names())
# print(table_names)

attendance
login
students
subjects
teacher_subject
teachers


In [103]:
table_details = get_table_details()
print(table_details)

Table Name:attendance
Table Description:This table records the attendance of students for each subject. It includes a composite primary key consisting of the student identifier (student_id), subject identifier (subject_id), the date of the class (date), and the time slot (slot). The status column indicates whether the student was (present) or (absent). This table helps in tracking and managing student attendance for each subject and period. We can use (subject_id) to figure out which teacher(teacher_id) took the attendance with the help of (teacher_subject table). 

Table Name:login
Table Description:Contains login information.

Table Name:students
Table Description:This table stores essential information about each student. It includes a unique student identifier (student_id), student_id is also known as (USN),  the full name of the student (student_name), contact details (contact), email address (email), and the names of the student's father (student_father_name) and mother (student_

In [104]:
table_details_prompt = f"""Return the names of ALL the SQL tables that MIGHT be relevant to the user question. \
The tables are:

{table_details}

Remember to include ALL POTENTIALLY RELEVANT tables, even if you're not sure that they're needed."""
# table_chain = create_extraction_chain_pydantic(Table, llm)
# tables = table_chain.invoke({"input": "give me details of id 106"})
print(table_details_prompt)

Return the names of ALL the SQL tables that MIGHT be relevant to the user question. The tables are:

Table Name:attendance
Table Description:This table records the attendance of students for each subject. It includes a composite primary key consisting of the student identifier (student_id), subject identifier (subject_id), the date of the class (date), and the time slot (slot). The status column indicates whether the student was (present) or (absent). This table helps in tracking and managing student attendance for each subject and period. We can use (subject_id) to figure out which teacher(teacher_id) took the attendance with the help of (teacher_subject table). 

Table Name:login
Table Description:Contains login information.

Table Name:students
Table Description:This table stores essential information about each student. It includes a unique student identifier (student_id), student_id is also known as (USN),  the full name of the student (student_name), contact details (contact), em

In [None]:
# table_chain = create_extraction_chain_pydantic(Table, llm, system_message=table_details_prompt)
table_chain = llm.with_structured_output(Table)

  table_chain = create_extraction_chain_pydantic(Table, llm, system_message=table_details_prompt)


In [116]:
tables = table_chain.invoke("Who teaches ar vr?")
print(tables)

[]


<h3>History</h3>

In [45]:
from langchain.memory import ChatMessageHistory
history = ChatMessageHistory()

In [59]:
# type(history)

In [46]:
final_prompt = ChatPromptTemplate.from_messages(
     [
         ("system", "You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.\n\nHere is the relevant table info: {table_info}\n\nBelow are a number of examples of questions and their corresponding SQL queries. Those examples are just for reference and should be considered while answering follow up questions"),
         few_shot_prompt,
         MessagesPlaceholder(variable_name="messages"),
         ("human", "{input}"),
     ]
 )
print(final_prompt.format(input="How many students are there?",table_info=db.table_info,messages=[]))


System: You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.

Here is the relevant table info: 
CREATE TABLE attendance (
	student_id_usn VARCHAR(15) NOT NULL, 
	subject_id VARCHAR(15) NOT NULL, 
	date DATE NOT NULL, 
	status ENUM('absent','present') DEFAULT 'absent', 
	PRIMARY KEY (student_id_usn, subject_id, date), 
	CONSTRAINT attendance_ibfk_1 FOREIGN KEY(student_id_usn) REFERENCES students (student_id), 
	CONSTRAINT attendance_ibfk_2 FOREIGN KEY(subject_id) REFERENCES subjects (subject_id)
)ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4

/*
4 rows from attendance table:
student_id_usn	subject_id	date	status
1DS21CS001	21CS71	2024-11-16	absent
1DS21CS001	21CS72	2024-11-16	present
1DS21CS001	21CS73	2024-11-16	absent
1DS21CS001	21CS74	2024-11-16	present
*/


CREATE TABLE login (
	id VARCHAR(20) NOT NULL, 
	name VARCHAR(100) NOT NULL, 
	email VARCHAR(100) NOT NULL, 
	password VARCHAR(255) NOT

In [48]:
generate_query = create_sql_query_chain(llm, db,final_prompt)

In [49]:
chain = (
 RunnablePassthrough.assign(query=generate_query).assign(
     result=itemgetter("query") | execute_query
 )
 | rephrase_answer
 )

In [67]:
history.clear()

In [None]:
# type(history.messages)

list

In [63]:
question = "Give me details of 087?"

In [65]:
print(history.messages)

[]


In [64]:
response = chain.invoke({"question": question,"messages":history.messages})
response

"The student with ID 087 is Harsh Kamal. His contact number is 9080706085, and his email address is 1ds21cs087@dsce.edu.in. Unfortunately, the information about his father's and mother's names is not available."

In [66]:
history.add_user_message(question)
history.add_ai_message(response)
print(history.messages)

[HumanMessage(content='Give me details of 087?', additional_kwargs={}, response_metadata={}), AIMessage(content="The student with ID 087 is Harsh Kamal. His contact number is 9080706085, and his email address is 1ds21cs087@dsce.edu.in. Unfortunately, the information about his father's and mother's names is not available.", additional_kwargs={}, response_metadata={})]


In [None]:
response = chain.invoke({"question": "Can you give me name of first 5?","messages":history.messages})
print(response)