### Import Necessary Packages

In [31]:
import openai
import os
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.llms import AzureOpenAI

from langchain.vectorstores import AzureSearch
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
import azure.identity


import pyodbc
import json

import warnings
warnings.filterwarnings('ignore')

### Connect to LLM and Embeddings

In [32]:
from langchain_openai import AzureOpenAIEmbeddings
from langchain_openai import AzureOpenAI
from langchain.chat_models import AzureChatOpenAI

load_dotenv() # Load environment variables from the .env file
deployment_name                       = "CART"
embedding_deployment_name             = os.getenv("embedding_deployment_name") 
AZURE_OPENAI_API_TYPE                 = os.getenv("AZURE_OPENAI_API_TYPE")
AZURE_OPENAI_API_KEY                  = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT                 = os.getenv("AZURE_OPENAI_API_BASE")
AZURE_OPENAI_API_VERSION              = os.getenv("AZURE_OPENAI_API_VERSION_CHAT")
AZURE_OPENAI_API_VERSION_EMBEDDING    = os.getenv("AZURE_OPENAI_API_VERSION_EMBEDDING")

os.environ["OPENAI_API_VERSION"]      = AZURE_OPENAI_API_VERSION
os.environ["AZURE_OPENAI_ENDPOINT"]   = AZURE_OPENAI_ENDPOINT
os.environ["AZURE_OPENAI_API_KEY"]    = AZURE_OPENAI_API_KEY

print("Establishing connection with GPT-4 Turbo OpenAI LLM.")
llm = AzureChatOpenAI(
                        deployment_name = deployment_name,
                        temperature=0.0
                    )
print("Established connection with GPT-4 Turbo OpenAI LLM.")

print("Fetching GPT-4 OpenAI Embeddings.")
embeddings_model = AzureOpenAIEmbeddings(
                                            model          = "text-embedding-ada-002",
                                            azure_endpoint = AZURE_OPENAI_ENDPOINT,
                                            api_key        = AZURE_OPENAI_API_KEY,
                                            openai_api_version = AZURE_OPENAI_API_VERSION_EMBEDDING
                                        )
print("Fetched with GPT-4 OpenAI Embeddings.")

Establishing connection with GPT-4 Turbo OpenAI LLM.
Established connection with GPT-4 Turbo OpenAI LLM.
Fetching GPT-4 OpenAI Embeddings.
Fetched with GPT-4 OpenAI Embeddings.


In [33]:
print(llm.invoke("Tell me a joke."))

content='Why did the tomato turn red?\n\nBecause it saw the salad dressing!' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 13, 'total_tokens': 27, 'completion_tokens_details': None}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-9a2b0a5b-998e-44f6-a0de-c42ebba89349-0'


In [34]:
#embeddings_model.embed_query("Tell me a joke.")

### Connect to Azure AI Search

In [35]:
AZURE_COGNITIVE_SEARCH_SERVICE_NAME     = os.getenv("AZURE_COGNITIVE_SEARCH_SERVICE_NAME")
AZURE_COGNITIVE_SEARCH_API_KEY          = os.getenv("AZURE_COGNITIVE_SEARCH_API_KEY")
AZURE_COGNITIVE_SEARCH_INDEX_NAME       = os.getenv("AZURE_COGNITIVE_SEARCH_INDEX_NAME")
AZURE_COGNITIVE_SEARCH_SERVICE_ENDPOINT =  f"https://{AZURE_COGNITIVE_SEARCH_SERVICE_NAME}.search.windows.net"


print("Initializing Azure Search.")

Azure_Client = AzureSearch(
                                 azure_search_endpoint = AZURE_COGNITIVE_SEARCH_SERVICE_ENDPOINT,
                                 azure_search_key      = AZURE_COGNITIVE_SEARCH_API_KEY,
                                 index_name            = AZURE_COGNITIVE_SEARCH_INDEX_NAME,
                                 embedding_function    = embeddings_model.embed_query ,
                          )

print("AzureSearch client initialized successfully.")

Initializing Azure Search.
AzureSearch client initialized successfully.


### Load PDF into Azure AI Search

In [36]:
# from langchain.docstore.document import Document

# # Define the paths for all regulations
# regulation_files = [
#     {"regulation_name": "Reg AA", "path": r"C:\GenAI\CART-Azure-IAAS\regulations\Reg_AA (Unfair or Deceptive Acts or Practices)\archive\Regulation_AA.pdf", "file_name": "Regulation_AA.pdf"},
#     {"regulation_name": "Reg B", "path": r"C:\GenAI\CART-Azure-IAAS\regulations\Reg_B (Equal Credit Opportunity Act - ECOA)\fair_lend_reg_b.pdf", "file_name": "fair_lend_reg_b.pdf"},
#     {"regulation_name": "Reg C", "path": r"C:\GenAI\CART-Azure-IAAS\regulations\Reg_C (Home Mortgage Disclosure Act - HMDA)\cfpb_hmda_small-entity-compliance-guide.pdf", "file_name": "cfpb_hmda_small-entity-compliance-guide.pdf"},
#     {"regulation_name": "Reg C", "path": r"C:\GenAI\CART-Azure-IAAS\regulations\Reg_C (Home Mortgage Disclosure Act - HMDA)\CFR-2024-title12-vol8-part1003.pdf", "file_name": "CFR-2024-title12-vol8-part1003.pdf"}
# ]

# # Loop through each regulation and process the files
# for file_info in regulation_files:
#     print(f"Processing {file_info['regulation_name']} - {file_info['file_name']}...")

#     # Load the PDF for the current regulation
#     reg_loader = PyPDFLoader(file_info["path"])
#     reg_documents = reg_loader.load()

#     # Split the documents into chunks
#     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
#     reg_text_chunks = text_splitter.split_documents(reg_documents)

#     # Add metadata to each chunk
#     for i, chunk in enumerate(reg_text_chunks):
#         chunk.metadata = {
#             "source": file_info["path"],
#             "document_type": "Regulation",
#             "regulation_name": file_info["regulation_name"],  # Use regulation name as metadata
#             "file_name": file_info["file_name"],
#             "chunk_index": i  # Track chunk index
#         }

#     # Convert the documents to the format AzureSearch expects
#     azure_documents = [
#         Document(page_content=doc.page_content, metadata=doc.metadata) for doc in reg_text_chunks
#     ]

#     # Upload the chunks to Azure Cognitive Search
#     print(f"Uploading {len(azure_documents)} chunks for {file_info['file_name']} to Azure Cognitive Search...")
#     Azure_Client.add_documents(azure_documents)

# print("All regulation documents uploaded to Azure Cognitive Search.")


### Prompt Templates

In [37]:
Reg_AA_prompt_template = """
                                    You are an expert in CFPB regulations. Given data is the context of Regulation AA.
                                    Context:{context}
                                    Does the following complaint fall under Regulation AA?
                                    Complaint:"{complaint}"       
            
                                    Answer with 'Yes' or 'No'. 

                                    Provide an explanation. Make sure the explanation ends in four or less that four sentences.
                                    Explanation:
                                    
                                    Please return the answer in the following dictionary format:
                                    {{"Answer": "Yes" or "No", "Explanation": "Your explanation here."}}

                                    Do not return anything except the dictionary.
                                """

Reg_B_prompt_template =      """
                                You are an expert in CFPB regulations. Given data is the context of Regulation B, Equal Credit Opportunity Act - ECOA.
                                Context:{context}
                                 Does the following complaint fall under Regulation B, Equal Credit Opportunity Act - ECOA? 
                                Complaint:"{complaint}"       
                
                                Answer with 'Yes' or 'No'. 
                
                                Provide an explanation. The explanation should be based on the context. Make sure the explanation ends in four or less than four sentences.
                                Explanation:
                                
                                Please return the answer in the following dictionary format:
                                {{"Answer": "Yes" or "No", "Explanation": "Your explanation here."}}

                                Do not return anything except the dictionary.
                             """

Reg_C_prompt_template = """
                                You are an expert in CFPB regulations. Given data is the context of Regulation C, Home Mortgage Disclosure Act - HMDA.
                                Context:{context}
                                Does the following complaint fall under Regulation C, Home Mortgage Disclosure Act - HMDA? 
                                Complaint:"{complaint}"       
        
                                Answer with 'Yes' or 'No'. 
        
                                Provide an explanation.The explaination should be based on the context. Make sure the explanation ends in four or less that four sentences.
                                Explanation:
                                
                                Please return the answer in the following dictionary format:
                                {{"Answer": "Yes" or "No", "Explanation": "Your explanation here."}}

                                Do not return anything except the dictionary. Strictly a dictionary.
                             """

Reg_AA_PROMPT      = PromptTemplate(template=Reg_AA_prompt_template, input_variables=["context", "complaint"])
Reg_B_PROMPT       = PromptTemplate(template=Reg_B_prompt_template, input_variables=["context", "complaint"])
Reg_C_PROMPT       = PromptTemplate(template=Reg_C_prompt_template, input_variables=["context", "complaint"])


### Create a chain and a retriever

In [38]:
# Reg AA Retriever and Chain
Reg_AA_Retriever = Azure_Client.as_retriever(search_type="similarity", search_kwargs={"filter": "regulation_name eq 'Reg AA'"})
Reg_AA_chain = (
                    {"context": Reg_AA_Retriever, "complaint": RunnablePassthrough()}
                    | Reg_AA_PROMPT
                    | llm
               )

# Reg B Retriever and Chain
Reg_B_Retriever = Azure_Client.as_retriever(search_type="similarity", search_kwargs={"filter": "regulation_name eq 'Reg B'"})
Reg_B_chain = (
                    {"context": Reg_B_Retriever, "complaint": RunnablePassthrough()}
                    | Reg_B_PROMPT
                    | llm
               )

# Reg C Retriever and Chain
Reg_C_Retriever = Azure_Client.as_retriever(search_type="similarity", search_kwargs={"filter": "regulation_name eq 'Reg C'"})
Reg_C_chain = (
                    {"context": Reg_C_Retriever, "complaint": RunnablePassthrough()}
                    | Reg_C_PROMPT
                    | llm
               )

### Classifying complaints

In [39]:
############################################################# Connecting to SQL Server ################################################################
print("Connecting to SQL Server.")
server    = 'DESKTOP-VONKKUH'  # e.g., 'localhost\SQLEXPRESS'
database  = 'CART'  # e.g., 'CART_DB'
driver    = '{ODBC Driver 17 for SQL Server}'  # Ensure you have the correct ODBC driver installed
conn      = pyodbc.connect(f'DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection=yes;')
print("Established  Connection to the CART Database in SQL Server.")

Connecting to SQL Server.
Established  Connection to the CART Database in SQL Server.


In [40]:
complaint_text = "I never agreed to information sharing with a company I never agreed or signed a legal binding contract with XXXXXXXX XXXX I never signed a contract with this company My information was shared by Navy federal without my permission XXXX stole my information They do not have my permission to report inaccurate accounts with balances Thats a charge off And late payments is violated my consumer rights"
complaint_text

'I never agreed to information sharing with a company I never agreed or signed a legal binding contract with XXXXXXXX XXXX I never signed a contract with this company My information was shared by Navy federal without my permission XXXX stole my information They do not have my permission to report inaccurate accounts with balances Thats a charge off And late payments is violated my consumer rights'

In [41]:
def classify_complaint(complaint_text,chain):
    response = chain.invoke(complaint_text).content
    response = json.loads(response)
    if response['Answer'].lower() == "yes":
        answer = 1
    elif response['Answer'].lower() == "no":
        answer = 0
    else:
        answer = None
    regulation_explanation = response["Explanation"]
    return answer, regulation_explanation

In [42]:
response = Reg_AA_chain.invoke(complaint_text).content
response = json.loads(response)
response



{'Answer': 'Yes',
 'Explanation': "The complaint involves issues related to information sharing, inaccurate reporting, and violation of consumer rights, which are covered under Regulation AA's standards for unfair or deceptive acts or practices."}

In [43]:
response = Reg_B_chain.invoke(complaint_text).content
response = json.loads(response)
response


{'Answer': 'No',
 'Explanation': 'The complaint does not specifically relate to any violation of Regulation B, Equal Credit Opportunity Act - ECOA. It appears to be more focused on issues related to information sharing and inaccurate reporting of accounts, which may fall under other regulations or laws.'}

In [44]:
response = Reg_C_chain.invoke(complaint_text).content
response = json.loads(response)
response

{'Answer': 'No',
 'Explanation': 'The complaint does not pertain to Regulation C, Home Mortgage Disclosure Act - HMDA. It is related to information sharing and inaccurate reporting of accounts, which falls under other regulations such as Fair Credit Reporting Act (FCRA) and Fair Debt Collection Practices Act (FDCPA).'}