<a href="https://colab.research.google.com/github/kyalan/DataScienceFactory/blob/master/20230409_Chatbot_ChatGPT_PoC_insert_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing packages

In [None]:
!pip install langchain
!pip install openai
!pip install chromadb
!pip install tiktoken

# Import packages, and OpenAI key as well

In [2]:
# Import necessary packages
from pathlib import Path
import os
import openai

# Delete the key after using
os.environ['OPENAI_API_KEY'] = '<your OpenAI API key>'
openai.api_key = os.getenv("OPENAI_API_KEY")

print(f'api base = {openai.api_base}')


api base = https://api.openai.com/v1


# Loading Data via LangChain
Ref: [CSV Loader](https://python.langchain.com/en/latest/modules/document_loaders/examples/csv.html)

Ref: [Retrieval Question/Answering](https://python.langchain.com/en/latest/modules/chains/index_examples/vector_db_qa.html)

In [3]:
from langchain.document_loaders.csv_loader import CSVLoader

try:
    loader = CSVLoader(file_path='./df_vessel_mini.csv')
    data = loader.load()
except:
    print('Have you uploaded df_vessel_mini.csv to your corresponding folder?')

In [5]:
# Preview the data into LLM
data[0:5]

[Document(page_content='vessel_key: 200\nvessel_name: Australian Express\nstatus: archived\nimo_number: 8813609\nowner: Vroon BV\nvessel_type: Container Vessel\nvessel_category: dry\nshipyard_text: SHIN KURUSHIMA\ndate_of_delivery: 1989-1-1\ndate_of_takeover: 1998-1-12\ntechgroup: Tech D1\nflag: Republic of Panama, USA\nsupdt_name: \nmisc_engine: MITSUBISHI UEC\nsummer_draft: 8.235\nsummer_dwt: 14867\ninternational_grt: 9949\ninternational_nrt: 5492\nparis2_vessel_key: 78\nmcr_kw: \nmcr_rpm: \nemission_type: ', metadata={'source': './df_vessel_mini.csv', 'row': 0}),
 Document(page_content='vessel_key: 800\nvessel_name: Chembulk Shanghai\nstatus: archived\nimo_number: 9223916\nowner: Saito Shipping Co. Ltd.\nvessel_type: Chemical Tanker\nvessel_category: tanker\nshipyard_text: FUKUOKA SHIPBUILDING CO.,LTD.\ndate_of_delivery: 2000-1-1\ndate_of_takeover: 2000-9-25\ntechgroup: Tech T1\nflag: Republic of Panama, USA\nsupdt_name: \nmisc_engine: AKASAKA UEC\nsummer_draft: 9.56\nsummer_dwt: 19

In [6]:
# How large is the data
import sys
sys.getsizeof(data)

14360

### Constructing Q&A Bot

In [7]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

In [10]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_documents(texts, embeddings)



In [11]:
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())

### Responsing from Q&A Bot, with OpenAI

In [16]:
query = "What is the vessel key and deasweight of Explorer Asia?"
result = qa.run(query)

In [17]:
result

' The vessel key for Explorer Asia is 5267 and its deadweight is 81093.9.'

In [18]:
qa.run("What is the owner of Explorer Asia?")

' The owner of Explorer Asia is Caravel Group Limited.'