<div>

</div>

# Building a Website Chatbot with LangChain, Pinecone, and Webᵀ Crawl

Create a chatbot using OpenAI, LangChain, Pinecone, and Webᵀ Crawl

In [None]:
!pip install openai pinecone-client langchain tiktoken

In [None]:
import json
import os
import uuid
import zipfile

import openai
import pinecone
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.schema import Document
from langchain.vectorstores import Pinecone

In [None]:
index_name = "pokemon-index"

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') or "OPENAI_API_KEY"
PINECONE_API_KEY = "PINECONE_API_KEY"

pinecone.init(api_key=PINECONE_API_KEY, environment="us-central1-gcp")

if index_name not in pinecone.list_indexes():
    pinecone.create_index(name=index_name, metric="cosine", dimension=1536)

index = pinecone.Index(index_name)

In [None]:
# drag your download zip file to the current directory
download_id = "67c88ae5-4e4b-48dd-b2a1-c946fe8087b7"

with zipfile.ZipFile(f"{download_id}.zip", 'r') as zip_ref:
    zip_ref.extractall(download_id)

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
docsearch = Pinecone.from_existing_index(index_name, embeddings)

In [None]:
# load all of the chunks into pinecone
for filename in os.listdir(download_id):
    if filename.endswith(".json"):
        file_path = os.path.join(download_id, filename)

        with open(file_path, "r") as file:
            try:
                web_json = json.loads(file.read())
                chunk_list = [
                    Document(page_content=x, metadata={"url": web_json["url"]})
                    for x in web_json["chunks"]
                ]
                docsearch.add_documents(chunk_list)

            except json.JSONDecodeError as e:
                print(f"Error loading {filename}: {e}")

## Let's Test Our Model

In [None]:
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY)
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=docsearch.as_retriever()
)

In [None]:
query = """
What is Scovillain?
"""
qa.run(query)

'Scovillain is a Grass/Fire type Pokémon introduced in Generation 9. It is known as the Spicy Pepper Pokémon. Scovillain evolves from Capsakid and has the abilities Chlorophyll, Insomnia, and Moody (hidden ability). It has a height of 0.9 m (2\'11") and weighs 15.0 kg (33.1 lbs). Scovillain has a base experience of 170 and a catch rate of 75. Its base stats are as follows: HP 65, Attack 108, Defense 65, Special Attack 108, Special Defense 65, and Speed 75.'

In [None]:
query = """
What does Scovillian evolve into?
"""
qa.run(query)

'Scovillain evolves from Capsakid, but it does not evolve further.'

### Now, let's compare it with the original model.

In [None]:
x = """
What is Scovillain?
"""
response = openai.ChatCompletion.create(
    model='gpt-3.5-turbo-0613',
    temperature=0,
    messages=[{"role": "user", "content": x}]
)
response['choices'][0]['message']['content']

'Scovillain is a term coined to describe someone who enjoys and seeks out extremely spicy or hot foods. It is a combination of the words "Scoville," which is a measurement of the heat or spiciness of chili peppers, and "villain," which refers to someone who enjoys or takes pleasure in something that may be considered extreme or challenging. A Scovillain is someone who actively seeks out and enjoys the intense heat and spiciness of foods, often pushing their tolerance levels to the limit.'