In [9]:
import os
import openai
import sys
from langchain.schema import Document
from typing import List, Dict

# Required to bypass some issues with ChromaDB and python3.11
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [10]:
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter

# Allow nested asyncio loops
import nest_asyncio
nest_asyncio.apply()

urls=[
    "https://www.vodafone.co.uk/business/sme-business/small-business-advice/top-5-file-sharing-tools-to-secure-your-business",
    "https://www.vodafone.co.uk/mobile/extras#abroadminpaym",
    "https://www.vodafone.co.uk/mobile/global-roaming#destinations",    
    ]
loader = WebBaseLoader()
data = loader.scrape_all(urls)


Fetching pages:   0%|          | 0/3 [00:00<?, ?it/s]

Fetching pages: 100%|##########| 3/3 [00:00<00:00,  5.25it/s]


### Splitting text

In [11]:
# https://blog.langchain.dev/a-chunk-by-any-other-name/
# select headers to split on, and map these to metadata field names
headers_to_split_on = [
    ("h1", "article_h1_main"),
    ("h2", "article_h2_subsection"),
    ("h3", "article_h3_subsection"),
    ("h4", "article_h4_subsection"),
]

html_splitter = HTMLHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on,
    # combine elements with the same metadata by setting return_each_element to False (default)
    return_each_element=False
)

docs = html_splitter.split_text(str(data))

### Preparing a very simple Vector Store

In [12]:
from langchain.vectorstores import Chroma
from langchain_openai.embeddings.azure import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
    openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    azure_deployment="text-embedding-ada-002")

In [13]:
from langchain.vectorstores import Chroma
from langchain_openai.embeddings.azure import AzureOpenAIEmbeddings



embeddings = AzureOpenAIEmbeddings(
    openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    azure_deployment="text-embedding-ada-002",
    model="text-embedding-ada-002")


persist_directory = './chroma_db'
number_of_docs = len(docs)
print(number_of_docs)
# save to disk
vectordb =  Chroma.from_documents(
    documents=docs,
    persist_directory=persist_directory, 
    embedding=embeddings)

vectordb.persist()


319


## RetrievalQA

In [14]:
from langchain.chains import RetrievalQA
from langchain_openai import AzureChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain

llm = AzureChatOpenAI(
    openai_api_key=os.getenv("AZURE_OPENAI_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    deployment_name="gpt-35-turbo",
    temperature=0.7,
    openai_api_version="2023-05-15")


qa_chain = RetrievalQA .from_chain_type(
    llm,
    chain_type="stuff",
    retriever=vectordb.as_retriever()
)



### Lest test it!

In [15]:
qa_chain("Best solution to store losts and lots of pictures")

{'query': 'Best solution to store losts and lots of pictures',
 'result': 'Based on the provided context, the best solution to store lots and lots of pictures would not be SecureSafe. It only offers a mere 100MB of storage space in the free version, which is not suitable for storing a large number of pictures. Additionally, the upgraded version with 100GB of storage space is quite expensive compared to other options like Microsoft OneDrive. Therefore, I would recommend considering other cloud storage options that provide a larger amount of storage space at a more affordable price.'}

In [16]:
qa_chain("Which provider gives the most of the space free of charge?")

{'query': 'Which provider gives the most of the space free of charge?',
 'result': 'Google gives the most storage space free of charge, offering 15 GB when you create a Google account.'}

In [18]:
qa_chain("How many destinations can I call with 8-Day European Roaming Pass?")

{'query': 'How many destinations can I call with 8-Day European Roaming Pass?',
 'result': 'You can use your plan allowance in 47 destinations with the 8-Day European Roaming Pass.'}

In [19]:
qa_chain("In which zone is andorra?")

{'query': 'In which zone is andorra?', 'result': 'Andorra is in Zone D.'}

In [20]:
qa_chain("How much does it cost my roaming pass?")

{'query': 'How much does it cost my roaming pass?',
 'result': 'The cost of a roaming pass depends on the duration of the pass. The 8-day pass is £12 and the 15-day pass is £17.'}

In [22]:
qa_chain("What pass your recommend for a weekend in France?")

{'query': 'What pass  your recommend for a weekend in France?',
 'result': 'Based on the information provided, it seems that the 8-day European Roaming pass would not be suitable for a weekend trip to France since it is valid for 8 days. However, without more specific information about available options, it is difficult to recommend a specific pass. It would be best to check with your mobile service provider for any specific weekend passes or options for roaming in France.'}

#### Let's find difficult questions

In [23]:
qa_chain("I am travelling to france this weekend. What can I do to save money?")

{'query': 'I am travelling to france this weekend. What can I do to save money?',
 'result': 'When travelling to France, there are several things you can do to save money:\n\n1. Research and book accommodations in advance: Look for budget-friendly accommodations such as hostels or guesthouses. Booking in advance can help you find better deals.\n\n2. Use public transportation: Instead of relying on taxis or rental cars, use public transportation like buses or trains. They are usually cheaper and can help you save on transportation costs.\n\n3. Eat like a local: Explore local markets and street food stalls to try affordable and delicious meals. Avoid eating at touristy restaurants, as they tend to be more expensive.\n\n4. Take advantage of free attractions and activities: Many museums, parks, and landmarks in France offer free admission or discounted rates on certain days. Research the attractions you want to visit and plan your visit accordingly to save money.\n\n5. Use a local SIM card

In [17]:
qa_chain("How much does it cost to call andorra?")

{'query': 'How much does it cost to call andorra?',
 'result': "I'm sorry, but I don't have access to specific pricing information. It would be best to check with your service provider to determine the cost of calling Andorra."}