In [1]:
from langchain.llms import CTransformers  # to load the llama2 model

In [2]:
llm = CTransformers(model="model\Llama2-nl-sql2.gguf",
                        model_type="llama", config={'max_new_tokens':3990,'temperature': 0.6,'context_length': 3990})

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# llm(" Please write me a SQL statement that answers the following question: How many transactions did I make this month ")

In [4]:
from langchain_community.utilities import SQLDatabase


# MySQL connection URI
username = 'root'
password = 'prabal9869'
host = '127.0.0.1'
dbname = 'arl_bank'  # Database name

# Constructing the MySQL URI
mysql_uri = f"mysql+pymysql://{username}:{password}@{host}/{dbname}"

# # Initializing SQLDatabase object for MySQL
db = SQLDatabase.from_uri(mysql_uri, sample_rows_in_table_info=3)
print(db.table_info)


CREATE TABLE transactions (
	`Account_No` VARCHAR(50) NOT NULL, 
	`Transaction_details` TEXT, 
	`Withdrawal_amount` INTEGER, 
	`Deposit_amount` INTEGER, 
	`Balance_amount` INTEGER, 
	`Value_date` DATE, 
	`Date` DATE
)ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci DEFAULT CHARSET=utf8mb4

/*
3 rows from transactions table:
Account_No	Transaction_details	Withdrawal_amount	Deposit_amount	Balance_amount	Value_date	Date
409000611074'	TRF FROM  Indiaforensic SERVICES	0	1000000	1000000	2022-10-21	2022-10-21
409000611074'	TRF FROM  Indiaforensic SERVICES	0	1000000	2000000	2022-10-27	2022-10-27
409000611074'	FDRL/INTERNAL FUND TRANSFE	0	500000	2500000	2022-11-09	2022-11-09
*/


In [5]:
from langchain_experimental.sql import SQLDatabaseChain

db_chain = SQLDatabaseChain.from_llm(llm=llm,db=db,verbose=True)


# FEW SHORT LEARNING

In [6]:
#Few_shorts

few_shots=[
    {
        'Question':" Income of last 3 months. ?",
        'SQLQuery':"""SELECT SUM(Deposit_amount) AS Total_Income FROM transactions WHERE Value_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 3 MONTH) """,
        'SQLResult':"1530205868",
        'Answer':"1530205868 is the income of last 3 months."
    }
    ,

    {
        'Question':"Total expenses/spendings of last 8 months ?",
        'SQLQuery':"""SELECT SUM(Withdrawal_amount) AS Total_Expenses FROM transactions WHERE Value_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 8 MONTH) """,
        'SQLResult':"11274528407",
        'Answer':"11274528407 is the total expenses of last 8 months."
    }
    ,
      {
        'Question':"Total saving of last month as my account number is 409000493201'  ?",
        'SQLQuery':"""SELECT (SUM(Deposit_amount) - SUM(Withdrawal_amount)) AS Savings_Last_Month FROM 
transactions WHERE Account_No = "409000493201'" AND YEAR(Value_date) = YEAR(CURRENT_DATE() 
- INTERVAL 1 MONTH) AND MONTH(Value_date) = MONTH(CURRENT_DATE() - INTERVAL 1 MONTH);
        """,
        'SQLResult':"-193509",
        'Answer':"You lost -193509 last month."
    }
,
    {
        'Question':"How many transactions did I make in last 2 week  ",
        'SQLQuery':"""SELECT COUNT(*) AS Total_Transactions FROM transactions WHERE Value_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 2 WEEK) """,
        'SQLResult':"162",
        'Answer':"You made 162 transactions in last 2 week."
    },
     {
        'Question':"Amount spent last week ",
        'SQLQuery':"""SELECT SUM(Withdrawal_amount) AS Total_Spending FROM transactions WHERE Value_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 1 WEEK) """,
        'SQLResult':"",
        'Answer':"0 is your total spending last week."
    }
    , {
        'Question': "Expenses for each day this month ?",
        'SQLQuery': """SELECT Date, SUM(Withdrawal_amount) AS Total_Expenses FROM transactions WHERE YEAR(Value_date) = YEAR(CURRENT_DATE()) AND MONTH(Value_date) = MONTH(CURRENT_DATE()) GROUP BY Date """,
        'SQLResult':
        """
Date                        Total_Expenses
2024-06-02                   183648
2024-06-03                   80666
2024-06-04                   135504
2024-06-05                   92031
2024-06-06                   118475
2024-06-07                   16440
2024-06-09                   62112
2024-06-10                   10331
"""
        ,
        'Answer': """The total_expenses is 183648,80666,135504,92031,118475,16440,62112,10331 for the year 2024 month of 6 and date 02,03,04,05,06,07,09 and 10.  """         
    },
    {
        'Question':"savings of previous year for each month ",
        'SQLQuery':"""SELECT MONTH(Value_date) AS Month, (SUM(Deposit_amount) - SUM(Withdrawal_amount)) AS Total_Savings FROM transactions WHERE YEAR(Value_date) = YEAR(CURRENT_DATE() - INTERVAL 1 YEAR) GROUP BY MONTH(Value_date) ORDER BY MONTH(Value_date) """,
        'SQLResult':"""
        Date                Total_Expenses
        1                       -311244
        2                          9101
        3                         3476          
        4                       -15971
        5                       322249
        6                       -172560
        7                       361471
        8                       -446844
        9                       93215
        10                      -73917
        11                      294502
        12                      -404028""",
        'Answer':"Month-1:-311244, Month-2:9101, Month-3:3476"
    },
     {
        'Question':"Amount Saved each year",
        'SQLQuery':"""SELECT YEAR(Value_date) AS Year,SUM(Deposit_amount) - SUM(Withdrawal_amount) AS Savings FROM transactions GROUP BY YEAR(Value_date) """,
        'SQLResult':"""
        Year                Savings
        2021                 497766
        2022                 272458 
        2023                 -340550          
        2024                 -148114
       """,
        'Answer':"You saved 497766 in 2021 ,272458 in 2022, -340550 in 2023 and -148114 in 2024."
    }
    ,
     {
        'Question':"Savings of last 2 year  ",
        'SQLQuery':"""SELECT YEAR(Value_date) AS Year, SUM(Deposit_amount) - SUM(Withdrawal_amount) AS Net_Savings FROM transactions WHERE Value_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 2 YEAR) GROUP BY YEAR(Value_date) """,
        'SQLResult':"""
        Year                Savings
        2022                 2270453
        2023                 -895783          
        2024                 -912470
        """,
        'Answer':"You saved 2270453 in 2022, -89578 in 2023 and -912470 in 2024."
    }
    ,
     {
        'Question':"Expenses of each day of last week  ",
        'SQLQuery':"""SELECT DATE(Value_date) AS Date, SUM(Withdrawal_amount) AS Total_Spending FROM transactions WHERE Value_date >= DATE_SUB(CURDATE(), INTERVAL 1 WEEK) GROUP BY DATE(Value_date) """,
        'SQLResult':"""
        Year                Savings
        2024-06-06           118475
        2024-06-07           16440         
        2024-06-09           62112
        2024-06-10           10331

        """,
        'Answer':"You spend 118475,16440,62112,10331."
    },
    {
        'Question':"Amount spent this year ?",
        'SQLQuery':"""SELECT year(Value_date) as Year,sum(Withdrawal_amount) AS Total_Expenses FROM transactions WHERE year(Value_date) = year(curdate()) group by year(Value_date) """,
        'SQLResult':"""
        Year                Total_Expenses
       2024                    6527128485
        """,
        'Answer':"You spent 36616174 this year."
    },
    {
        'Question':"amount/money saved each month previous year ?",
        'SQLQuery':"""SELECT YEAR(Value_date) AS Year, MONTH(Value_date) AS Month, SUM(Deposit_amount) - SUM(Withdrawal_amount) AS Savings FROM transactions WHERE YEAR(Value_date) = YEAR(CURRENT_DATE - INTERVAL 1 YEAR) GROUP BY YEAR(Value_date), MONTH(Value_date) """,
        'SQLResult':"""
        Year            Month              Savings
       2023              1                  -311244
       2023             2                       9101
       2023             3                       3476    
       2023             4                       -15971
       2023             5                       322249
       2023             6                       -172560
        2023             7                   361471
       2023             8                       -446844
       2023             9                       93215    
       2023             10                       -73917
       2023            11                       294502
       2023             12                       -404028
        """,
        'Answer':"You lost 311244 in month 1 ,saved 9101,3476 in month 2 and month 3  previous year."
    }
     ,
      {
        'Question':"income of the previous year ",
        'SQLQuery':"""SELECT SUM(Deposit_amount) AS Total_Income FROM transactions WHERE YEAR(Value_date) = YEAR(CURRENT_DATE) - 1 """,
        'SQLResult':"38593214103",
        'Answer':"Your income previous ysear was 38593214103."
    }
    ,
      {
        'Question':"income of the first 4 month of previous year ",
        'SQLQuery':"""SELECT SUM(Deposit_amount) AS Total_Income FROM transactions WHERE MONTH(Value_date) IN (1,2,3,4) AND YEAR(Value_date) = YEAR(CURRENT_DATE) - 1 """,
        'SQLResult':"17847505444",
        'Answer':"Your income of first 4 month of previous year was 17847505444."
    },
    {
    'Question': "amount I lost each year?",
    'SQLQuery': """
                SELECT YEAR(Value_date) AS Year,SUM(Withdrawal_amount) - SUM(Deposit_amount) AS Losses 
                FROM transactions 
                GROUP BY YEAR(Value_date);
                """,
    'SQLResult': """[
        (2022, Decimal('211175202')),
        (2023, Decimal('575324140')),
        (2024, Decimal('490771857')),
        (2021, Decimal('906602921')),
        (2020, Decimal('1076993185'))

    ]"""
    ,
    'Answer': "You lost 497766 in 2021, 272458 in 2022, -340550 in 2023, and -148114 in 2024."
}

]



In [7]:
# # Define a function to transform the few_shots into the desired format
# import json
# def transform_few_shots(few_shots):
#     transformed = []
#     for shot in few_shots:
#         input_text = shot['Question']
#         output_text = shot['Answer']
#         reformatted_text = f'<s>[INST] {input_text} [/INST] {output_text} </s>'
#         transformed.append({'text': reformatted_text})
#     return transformed

# # Transform the few_shots
# transformed_few_shots = transform_few_shots(few_shots)

# # Save the transformed few_shots to a file
# with open('transformed_few_shots.json', 'w') as f:
#     json.dump(transformed_few_shots, f, indent=4)

In [8]:
from langchain.prompts import SemanticSimilarityExampleSelector
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')

to_vectorize = [" ".join(example.values()) for example in few_shots]

  warn_deprecated(


In [9]:
to_vectorize 

[' Income of last 3 months. ? SELECT SUM(Deposit_amount) AS Total_Income FROM transactions WHERE Value_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 3 MONTH)  1530205868 1530205868 is the income of last 3 months.',
 'Total expenses/spendings of last 8 months ? SELECT SUM(Withdrawal_amount) AS Total_Expenses FROM transactions WHERE Value_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 8 MONTH)  11274528407 11274528407 is the total expenses of last 8 months.',
 'Total saving of last month as my account number is 409000493201\'  ? SELECT (SUM(Deposit_amount) - SUM(Withdrawal_amount)) AS Savings_Last_Month FROM \ntransactions WHERE Account_No = "409000493201\'" AND YEAR(Value_date) = YEAR(CURRENT_DATE() \n- INTERVAL 1 MONTH) AND MONTH(Value_date) = MONTH(CURRENT_DATE() - INTERVAL 1 MONTH);\n         -193509 You lost -193509 last month.',
 'How many transactions did I make in last 2 week   SELECT COUNT(*) AS Total_Transactions FROM transactions WHERE Value_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 2 

In [10]:
vectorstore = Chroma.from_texts(to_vectorize, embeddings, metadatas=few_shots)

In [11]:
example_selector = SemanticSimilarityExampleSelector(
    vectorstore=vectorstore,
    k=1,
)
example_selector.select_examples({"Question": "How many transactions did I make this month  "})

# give me the Breakdown of income for each day this week as my account number is 409000493201'

[{'Answer': 'You made 162 transactions in last 2 week.',
  'Question': 'How many transactions did I make in last 2 week  ',
  'SQLQuery': 'SELECT COUNT(*) AS Total_Transactions FROM transactions WHERE Value_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 2 WEEK) ',
  'SQLResult': '162'}]

In [12]:
# # Define the custom prompt for SQL database interactions
# custom_mysql_prompt = """
# You are a MySQL expert. Given an input question, first create a syntactically correct MySQL query to run, then look at the results of the query and return the answer to the input question. Prepare the correct MySQL query by looking at the top {top_k} similar queries.
# Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.
# Never query for all columns from a table. You must query only the columns that are needed to answer the question.
# Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. 
# Pay attention to use CURDATE() function to get the current date, if the question involves "today","this year","this month" ,"this week".

# Table information:
# 1. `transactions` with columns: `Account_No`, `Date`, `Transaction_details`, `Value_date`, `Withdrawal_amount`, `Deposit_amount`, `Balance_amount`

# Remember to only use the provided table columns and structure your queries to handle the specified requests accurately.Only answer the question given by the user and dont assume any question by yourself.
#  """

In [13]:
# # Define the custom prompt for SQL database interactions
# custom_mysql_prompt = """You are an expert in converting natural language into MySQL query.Only use the columns that are specified and needed to answer the question.
# Use query for at most {top_k} results.
# Table information:
# 1. `transactions` with columns: `Account_No`, `Date`, `Transaction_details`, `Value_date`, `Withdrawal_amount`, `Deposit_amount`, `Balance_amount`
# """

In [14]:
mysql_prompt = """
You are a MySQL expert. Given an input question, first create a syntactically correct MySQL query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. 
Pay attention to use only the column names given in the below table information and never use any columns that does not exist. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use CURDATE() function to get the current date, if the question involves "today","this year","this month","this week".

Table information:
1. `transactions` with columns: `Account_No`, `Date`, `Transaction_details`, `Value_date`, `Withdrawal_amount`, `Deposit_amount`, `Balance_amount`
"""

In [15]:
from langchain.prompts import FewShotPromptTemplate
from langchain.chains.sql_database.prompt import PROMPT_SUFFIX,_mysql_prompt

In [16]:
print(_mysql_prompt)

You are a MySQL expert. Given an input question, first create a syntactically correct MySQL query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in backticks (`) to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use CURDATE() function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of

In [17]:
print(PROMPT_SUFFIX)

Only use the following tables:
{table_info}

Question: {input}


In [18]:
from langchain.prompts.prompt import PromptTemplate

example_prompt = PromptTemplate(
    input_variables=["Answer", "Question", "SQLQuery","SQLResult",],
    template="\nAnswer: {Answer}\nQuestion: {Question}\nSQLQuery: {SQLQuery}\nSQLResult: {SQLResult}",
)

In [19]:
print(PROMPT_SUFFIX)

Only use the following tables:
{table_info}

Question: {input}


In [20]:
suffix_eg="Question: {input}"

In [21]:
print(suffix_eg)

Question: {input}


In [22]:
few_shot_prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix=mysql_prompt,
    suffix="Question: {input}",
    input_variables=["input", "top_k"], 
)

In [23]:
new_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, prompt=few_shot_prompt)


In [24]:
print(few_shot_prompt)

input_variables=['input', 'top_k'] example_selector=SemanticSimilarityExampleSelector(vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000023677F21010>, k=1, example_keys=None, input_keys=None, vectorstore_kwargs=None) example_prompt=PromptTemplate(input_variables=['Answer', 'Question', 'SQLQuery', 'SQLResult'], template='\nAnswer: {Answer}\nQuestion: {Question}\nSQLQuery: {SQLQuery}\nSQLResult: {SQLResult}') suffix='Question: {input}' prefix='\nYou are a MySQL expert. Given an input question, first create a syntactically correct MySQL query to run, then look at the results of the query and return the answer to the input question.\nUnless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.\nNever query for all columns from a table. You must query only the columns that are needed to answer th

In [25]:
# result = new_chain("What was my total earning last week as my account number is 409000493201'")

In [26]:
example_questions = [shot['Question'].strip() for shot in few_shots]
print(example_questions)

['Income of last 3 months. ?', 'Total expenses/spendings of last 8 months ?', "Total saving of last month as my account number is 409000493201'  ?", 'How many transactions did I make in last 2 week', 'Amount spent last week', 'Expenses for each day this month ?', 'savings of previous year for each month', 'Amount Saved each year', 'Savings of last 2 year', 'Expenses of each day of last week', 'Amount spent this year ?', 'amount/money saved each month previous year ?', 'income of the previous year', 'income of the first 4 month of previous year', 'amount I lost each year?']


In [27]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')



In [28]:
example_embeddings = model.encode(example_questions)

In [29]:
from sklearn.metrics.pairwise import cosine_similarity  
# Function to determine if a user question is relevant to the database context
def is_question_relevant(user_question, similarity_threshold=0.60):
    user_embedding = model.encode([user_question])[0]  
    
    # calculating the cosine similarity between user question and example questions
    similarity_scores = cosine_similarity(user_embedding.reshape(1, -1), example_embeddings)[0]
    print(similarity_scores)
    
    max_similarity_score = max(similarity_scores)
    print(max_similarity_score)
    return max_similarity_score > similarity_threshold

In [30]:
from guardrails import Guard, OnFailAction
from guardrails.hub import CompetitorCheck, ToxicLanguage
from guardrails.hub import ProfanityFree

    Importing validators from `guardrails.validators` is deprecated.
    All validators are now available in the Guardrails Hub. Please install
    and import them from the hub instead. All validators will be
    removed from this module in the next major release.

    Install with: `guardrails hub install hub://<namespace>/<validator_name>`
    Import as: from guardrails.hub import `ValidatorName`
    
  warn(


In [31]:

guard = Guard().use_many(
    CompetitorCheck(["khalti", "fusemachine"], on_fail=OnFailAction.EXCEPTION),
    ToxicLanguage(threshold=0.9, validation_method="sentence", on_fail=OnFailAction.EXCEPTION),
    ProfanityFree(on_fail=OnFailAction.EXCEPTION))




In [32]:
# Function to handle user queries
def handle_user_query(user_question):
    if is_question_relevant(user_question):
        # response=new_chain(user_question)
        return True
    else:
        return "Your Question is not Relevant"

In [33]:
user_question = "Give me the average spendings of mine each month in this year"

In [34]:
try:
    # Validate the user question
    guard.validate(user_question)  # If validation fails, an exception will be thrown
    # If validation passes, handle the user query
    response = handle_user_query(user_question)
except Exception as e:
    response = str(e)

[0.61915165 0.6830846  0.58956814 0.46739852 0.57483757 0.6680057
 0.4967662  0.443633   0.4657925  0.5154394  0.58743286 0.488149
 0.40804064 0.4355242  0.4264807 ]
0.6830846


In [35]:
print(response)

True


In [38]:
if response==True:
    new_chain(user_question)
else:
    print(response)

  warn_deprecated(




[1m> Entering new SQLDatabaseChain chain...[0m
Give me the average spendings of mine each month in this year
SQLQuery:[32;1m[1;3mSELECT AVG(Value_date) AS Average_Monthly_Spending FROM transactions WHERE YEAR(Value_date) = YEAR(CURRENT_DATE()) GROUP BY DATE(Value_date)[0m
SQLResult: [33;1m[1;3m[(Decimal('20240102.0000'),), (Decimal('20240103.0000'),), (Decimal('20240104.0000'),), (Decimal('20240106.0000'),), (Decimal('20240107.0000'),), (Decimal('20240109.0000'),), (Decimal('20240110.0000'),), (Decimal('20240111.0000'),), (Decimal('20240112.0000'),), (Decimal('20240113.0000'),), (Decimal('20240116.0000'),), (Decimal('20240117.0000'),), (Decimal('20240118.0000'),), (Decimal('20240119.0000'),), (Decimal('20240120.0000'),), (Decimal('20240121.0000'),), (Decimal('20240123.0000'),), (Decimal('20240125.0000'),), (Decimal('20240126.0000'),), (Decimal('20240127.0000'),), (Decimal('20240128.0000'),), (Decimal('20240130.0000'),), (Decimal('20240131.0000'),), (Decimal('20240201.0000'),),

In [37]:
# db_chain.run("""SELECT SUM(Deposit_amount) - SUM(Withdrawal_amount) 
#              AS Savings FROM transactions WHERE Account_No = "409000493201'" AND year(Value_date) = year(curdate())""")