### Import Necessary Packages

In [1]:
import openai
import os
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.llms import AzureOpenAI

from langchain.vectorstores import AzureSearch
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
import azure.identity


import pyodbc
import json

import warnings
warnings.filterwarnings('ignore')

### Connect to LLM and Embeddings

In [2]:
from langchain_openai import AzureOpenAIEmbeddings
from langchain_openai import AzureOpenAI
from langchain.chat_models import AzureChatOpenAI

load_dotenv() # Load environment variables from the .env file
deployment_name                       = "CART"
embedding_deployment_name             = os.getenv("embedding_deployment_name") 
AZURE_OPENAI_API_TYPE                 = os.getenv("AZURE_OPENAI_API_TYPE")
AZURE_OPENAI_API_KEY                  = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT                 = os.getenv("AZURE_OPENAI_API_BASE")
AZURE_OPENAI_API_VERSION              = os.getenv("AZURE_OPENAI_API_VERSION_CHAT")
AZURE_OPENAI_API_VERSION_EMBEDDING    = os.getenv("AZURE_OPENAI_API_VERSION_EMBEDDING")

os.environ["OPENAI_API_VERSION"]      = AZURE_OPENAI_API_VERSION
os.environ["AZURE_OPENAI_ENDPOINT"]   = AZURE_OPENAI_ENDPOINT
os.environ["AZURE_OPENAI_API_KEY"]    = AZURE_OPENAI_API_KEY

print("Establishing connection with GPT-4 Turbo OpenAI LLM.")
llm = AzureChatOpenAI(
                        deployment_name = deployment_name,
                        temperature=0.0
                    )
print("Established connection with GPT-4 Turbo OpenAI LLM.")

print("Fetching GPT-4 OpenAI Embeddings.")
embeddings_model = AzureOpenAIEmbeddings(
                                            model          = "text-embedding-ada-002",
                                            azure_endpoint = AZURE_OPENAI_ENDPOINT,
                                            api_key        = AZURE_OPENAI_API_KEY,
                                            openai_api_version = AZURE_OPENAI_API_VERSION_EMBEDDING
                                        )
print("Fetched with GPT-4 OpenAI Embeddings.")

Establishing connection with GPT-4 Turbo OpenAI LLM.
Established connection with GPT-4 Turbo OpenAI LLM.
Fetching GPT-4 OpenAI Embeddings.
Fetched with GPT-4 OpenAI Embeddings.


In [3]:
print(llm.invoke("Tell me a joke."))

content='Why did the tomato turn red?\n\nBecause it saw the salad dressing!' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 13, 'total_tokens': 27, 'completion_tokens_details': None}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-972bff2c-73ce-43f3-aa4e-b46f144b8300-0'


In [4]:
#embeddings_model.embed_query("Tell me a joke.")

### Connect to Azure AI Search

In [5]:
AZURE_COGNITIVE_SEARCH_SERVICE_NAME     = os.getenv("AZURE_COGNITIVE_SEARCH_SERVICE_NAME")
AZURE_COGNITIVE_SEARCH_API_KEY          = os.getenv("AZURE_COGNITIVE_SEARCH_API_KEY")
AZURE_COGNITIVE_SEARCH_INDEX_NAME       = os.getenv("AZURE_COGNITIVE_SEARCH_INDEX_NAME")
AZURE_COGNITIVE_SEARCH_SERVICE_ENDPOINT =  f"https://{AZURE_COGNITIVE_SEARCH_SERVICE_NAME}.search.windows.net"


print("Initializing Azure Search.")

Azure_Client = AzureSearch(
                                 azure_search_endpoint = AZURE_COGNITIVE_SEARCH_SERVICE_ENDPOINT,
                                 azure_search_key      = AZURE_COGNITIVE_SEARCH_API_KEY,
                                 index_name            = AZURE_COGNITIVE_SEARCH_INDEX_NAME,
                                 embedding_function    = embeddings_model.embed_query ,
                          )

print("AzureSearch client initialized successfully.")

Initializing Azure Search.
AzureSearch client initialized successfully.


### Load PDF into Azure AI Search

In [6]:
Reg_AA_Path = r"C:\GenAI\CART-Azure-IAAS\regulations\Reg_AA (Unfair or Deceptive Acts or Practices)\archive\Regulation_AA.pdf"

Reg_AA_Loader = PyPDFLoader(Reg_AA_Path, extract_images=True)
Reg_AA_Documents = Reg_AA_Loader.load()

# Splitting the documents into chunks
Text_Splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
Reg_AA_Text = Text_Splitter.split_documents(Reg_AA_Documents)


In [7]:
# Assuming the previous code has already established the connection to Azure Cognitive Search and split the PDF into chunks

from langchain.docstore.document import Document

# Assuming Reg_AA_Text already contains the split documents
# Add metadata to each chunk
for i, chunk in enumerate(Reg_AA_Text):
    # Example of metadata: you can add custom fields like source, page number, etc.
    chunk.metadata = {
        "source": Reg_AA_Path,
        "document_type": "Regulation",
        "regulation_name": "Reg AA",
        "chunk_index": i  # Add a chunk index to track chunks
    }

# Insert the chunks (with metadata) into the Azure Cognitive Search index
print("Uploading document chunks to Azure Cognitive Search...")

# Convert the documents to the format AzureSearch expects, which is typically a list of `Document` objects
azure_documents = [
    Document(page_content=doc.page_content, metadata=doc.metadata) for doc in Reg_AA_Text
]

# Add the documents to the Azure Search index
Azure_Client.add_documents(azure_documents)

print(f"Successfully uploaded {len(azure_documents)} chunks to Azure Cognitive Search.")

Uploading document chunks to Azure Cognitive Search...
Successfully uploaded 61 chunks to Azure Cognitive Search.


In [8]:
docs = Azure_Client.similarity_search(
    query="What is RegAA about?",
    k=3,
    search_type="similarity",
)
print(docs[1].page_content)

or practice has occurred and whether further supervisory or enforcement actions are appropriate.  
General Guidanc e 
Based on the results of the risk assessment of  the entity, e xaminers should review for potential 
unfair, deceptive, or abusive acts or practices , taking into account an entity’s  marketing 
programs , product and service mix, customer base , and other factor s, as appropriate.  Even if the 
risk assessment has not identified potential unfair, deceptive , or abusive acts or practices, 
examiner s should be alert throughout an examination for situations that warrant review.  
1. Document Review  
a. To initially identify potential areas of UDAAP concerns, o btain and review copies of the 
following to the extent relevant to the examination:  
b. Training materia ls. 
c. Lists of products and services, including descriptions, fee structure , disclosures, notices, 
agreements, and periodic and account statements .


### Prompt Templates

In [9]:
Reg_AA_prompt_template = """
                                    You are an expert in CFPB regulations. Given data is the context of Regulation AA.
                                    Context:{context}
                                    Does the following complaint fall under Regulation AA?
                                    Complaint:"{complaint}"       
            
                                    Answer with 'Yes' or 'No'. 

                                    Provide an explanation. Make sure the explanation ends in four or less that four sentences.
                                    Explanation:
                                    
                                    Please return the answer in the following dictionary format:
                                    {{"Answer": "Yes" or "No", "Explanation": "Your explanation here."}}
                                """

Reg_AA_PROMPT      = PromptTemplate(template=Reg_AA_prompt_template, input_variables=["context", "complaint"])

### Create a chain and a retriever

In [10]:
# Initialize the Retriever
Reg_AA_Retriever = Azure_Client.as_retriever()

# Creating a chain
REG_AA_chain = (
                    {"context": Reg_AA_Retriever,"complaint": RunnablePassthrough()}
                    | Reg_AA_PROMPT
                    | llm
                )

### Classifying complaints

In [11]:
############################################################# Connecting to SQL Server ################################################################
print("Connecting to SQL Server.")
server    = 'DESKTOP-VONKKUH'  # e.g., 'localhost\SQLEXPRESS'
database  = 'CART'  # e.g., 'CART_DB'
driver    = '{ODBC Driver 17 for SQL Server}'  # Ensure you have the correct ODBC driver installed
conn      = pyodbc.connect(f'DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection=yes;')
print("Established  Connection to the CART Database in SQL Server.")

Connecting to SQL Server.
Established  Connection to the CART Database in SQL Server.


In [12]:
complaint_text = "I never agreed to information sharing with a company I never agreed or signed a legal binding contract with XXXXXXXX XXXX I never signed a contract with this company My information was shared by Navy federal without my permission XXXX stole my information They do not have my permission to report inaccurate accounts with balances Thats a charge off And late payments is violated my consumer rights"
complaint_text

'I never agreed to information sharing with a company I never agreed or signed a legal binding contract with XXXXXXXX XXXX I never signed a contract with this company My information was shared by Navy federal without my permission XXXX stole my information They do not have my permission to report inaccurate accounts with balances Thats a charge off And late payments is violated my consumer rights'

In [13]:
def classify_complaint(complaint_text,chain):
    response = chain.invoke(complaint_text).content
    response = json.loads(response)
    if response['Answer'].lower() == "yes":
        answer = 1
    elif response['Answer'].lower() == "no":
        answer = 0
    else:
        answer = None
    regulation_explanation = response["Explanation"]
    return answer, regulation_explanation

chain = REG_AA_chain


In [20]:
response = chain.invoke(complaint_text).content
response = json.loads(response)
response

{'Answer': 'No',
 'Explanation': 'The complaint does not directly relate to any of the specific standards outlined in Regulation AA. It appears to be more focused on issues related to information sharing and inaccurate reporting, which may fall under other regulations or laws.'}

In [14]:
answer, regulation_explanation = classify_complaint(complaint_text,chain)


In [15]:
answer

0

In [16]:
regulation_explanation

'The complaint does not directly relate to any of the specific standards outlined in Regulation AA. It appears to be more focused on issues related to information sharing and inaccurate reporting, which may fall under other regulations or laws.'

In [17]:
print(response)

NameError: name 'response' is not defined

In [16]:
explanation = str(response) 
explanation = explanation.replace("\n\nExplanation:",'')
explanation

"content='Answer: Yes\\n\\nExplanation: The complaint involves issues related to information sharing without clear justification, inaccurate reporting of accounts with balances, and violation of consumer rights, which are all covered under Regulation AA.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 1383, 'total_tokens': 1422, 'completion_tokens_details': None}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-cecf2552-4dcc-41ba-835f-8b01f24d5e2e-0'"