In [1]:
import os
import openai
import sys
from langchain.schema import Document
from typing import List, Dict

# Required to bypass some issues with ChromaDB and python3.11
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

# Allow nested asyncio loops
import nest_asyncio
nest_asyncio.apply()

In [2]:
from openai import AzureOpenAI
    
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-02-01",
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )

def evaluate(prediction, expected):
    response = client.chat.completions.create(
        model="gpt-4", 
        messages=[
            {"role": "system", "content": "You are a judge that determins if two sentences mean basically the same. It does not matter they use different words. Evaluate just the main intention and numbers. Answer EQUAL or DIFFERENT with one sentence explaining why. The first sentece is delimited by ####, the second sentence is delimited by $$$$"},
            {"role": "user", "content": f"####{prediction}####  $$$${expected}$$$$"},
        ])
    print(response.choices[0].message.content)

### Loading and splitting PDF data

In [3]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
# RecursiveCharacterTextSplitter
def load_pdf(file_path: str) -> List[Document]:
    loader = PyPDFLoader(file_path)
    return loader.load()

pdf_docs =[]
pdf_docs.append(load_pdf.load("../doc/vfcon072758.pdf"))
pdf_docs.append(load_pdf.load("../doc/vfcon072758.pdf"))

text_splitter = CharacterTextSplitter()
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500, length_function=len)

pdf_segments = text_splitter.split_documents(pdf_docs)
print(f'Number of PDF segments is  {len(pdf_segments)}')

Number of PDF segments is  5


### Preparing a very simple Vector Store

In [4]:
from langchain.vectorstores import Chroma
from langchain_openai.embeddings.azure import AzureOpenAIEmbeddings


all_segments = pdf_segments

embeddings = AzureOpenAIEmbeddings(
    openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    azure_deployment="text-embedding-ada-002")


#persist_directory = './chroma_db'
print(f'Adding {len(all_segments)} segments to the vector store')
vectordb =  Chroma.from_documents(
    documents=all_segments,
    #persist_directory=persist_directory, 
    embedding=embeddings)

#vectordb.persist()


Adding 5 segments to the vector store


## RetrievalQA

In [5]:
from langchain.chains import RetrievalQA
from langchain_openai import AzureChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain

llm = AzureChatOpenAI(
    openai_api_key=os.getenv("AZURE_OPENAI_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    deployment_name="gpt-35-turbo",
    temperature=0.2,
    openai_api_version="2023-05-15")


qa_chain = RetrievalQA.from_chain_type(
    llm,
    chain_type="stuff",
    retriever=vectordb.as_retriever(
        search_type="similarity", #default
        search_kwargs={"k":1} #default
        ) 
)

def qa(query):
    return qa_chain(query)['result']


### Lest test it!

In [6]:
result = qa("What are the speed limits for Unlimited Lite?")
print(result)
evaluate(result, "The speed limit for Vodafone Unlimited Lite is 2Mbps")

  warn_deprecated(


The speed limit for Vodafone Unlimited Lite is 2Mbps.
EQUAL. The sentences communicate the same thing: the speed limit of Vodafone Unlimited Lite is 2Mbps.


In [7]:
result = qa("What is the cost of video calling to UK mobiles?")
print(result)
evaluate(result, "The cost of video calling to UK mobiles is 55p per minute.")

The cost of video calling to UK mobiles is 55p per minute.
EQUAL - Both sentences convey the same message about the cost of video calling to UK mobiles being 55p per minute.


In [8]:
result = qa("How much is sending a message to EU?")
print(result)
evaluate(result, "Sending a standard text message to EU destinations costs 19p per message for Pay monthly and SIM only plans")

Sending a standard text message to EU destinations costs 19p per message for Pay monthly and SIM only plans.
EQUAL - Both sentences convey the same information about the cost of sending a standard text message to EU destinations for Pay monthly and SIM only plans.


In [13]:
result = qa("I disagree about when I have to end my agreement. Which number I can call?") # Right answer= Phone: 191 from your Vodafone phone or 0333 3040 191 (from UK landlines or other mobiles);
print(result)
evaluate(result, "You can contact Vodafone customer service by calling 191 from your Vodafone phone or 0333 3040 191 from UK landlines or other mobiles.They will be able to assist you with any questions or concerns regarding the termination of your agreement.")

You can contact Vodafone customer service by calling 191 from your Vodafone phone or 0333 3040 191 from UK landlines or other mobiles. They will be able to assist you with any questions or concerns about ending your agreement.
EQUAL - Both sentences are instructing to contact Vodafone customer service using the same numbers for assistance with any queries or concerns about wrapping up your contract.


In [21]:
result = qa("I am making a call to a number starting with 09. What is the cost?")
print(result)
evaluate(result, "The cost of making a call to a number starting with 09 is 65p per minute for Vodafone's Access Charge, plus the Service Charge set by the service or organization you are calling.")

The cost of making a call to a number starting with 09 is 65p per minute for Vodafone's Access Charge, plus the Service Charge set by the service or organization you are calling.
EQUAL, because both sentences convey the exact same information regarding the cost of making a call to a number starting with 09 for Vodafone's Access Charge.


### Tricky ones


In [11]:
result = qa("What is included in my allowance?")
print(result)
evaluate(result, "I don't know what is your current allowance!")

Your allowance includes calls to any mobile network within the UK, standard UK landlines (starting 01, 02, 03), voicemail, standard text messages, and data usage. It may also include a monthly roaming allowance for calls to any mobile network in the UK or in our Roam-free destinations.
DIFFERENT because the first sentence is describing what the allowance includes, while the second sentence is a statement of not knowing what the current allowance is.


In [9]:
result = qa("How much is sending a message to abroad, to the EU?")
print(result)
evaluate(result, "Sending a standard text message to EU destinations costs 19p per message for Pay monthly and SIM only plans")

Sending a text message to EU destinations from the UK costs 19p per minute for Pay monthly and SIM only plans.
DIFFERENT because the first sentence charges per minute, while the second sentence charges per message.
