In [1]:
# Import libraries

import json
import openai
import pinecone
import serpapi
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Pinecone as LangchainPinecone

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()  # Loads the environment variables from a .env file

True

In [3]:
from pinecone import Pinecone
import os

# Load Pinecone API key
pinecone_api_key = os.getenv("PINECONE_API_KEY")
if not pinecone_api_key:
    raise ValueError("PINECONE_API_KEY not found. Check your .env file or environment variables.")

# Initialize Pinecone client
pc = Pinecone(api_key=pinecone_api_key)

In [4]:

# Connect to the index
index = pc.Index("langchainvectors")

print("Pinecone index created and connected successfully!")

Pinecone index created and connected successfully!


In [5]:
# Initialize the Sentence Transformer model for embedding generation
model = SentenceTransformer('all-MiniLM-L6-v2')

In [7]:
# Generate embeddings for content and upload them to Pinecone
def upload_embeddings_to_pinecone(json_file):
    with open(json_file, 'r', encoding='utf-8') as file:
        content = json.load(file)
    
    for i, text in enumerate(content):
        embedding = model.encode(text).tolist()
        index.upsert([(str(i), embedding, {"text": text})])
    print(f"Embeddings uploaded to Pinecone index: {'langchainvectors'}")

upload_embeddings_to_pinecone("changi_jewel_content.json")


Embeddings uploaded to Pinecone index: langchainvectors


In [10]:
# Load SerpAPI key
serpapi_key = os.getenv("SERPAPI_API_KEY")
if not serpapi_key:
    raise ValueError("SERPAPI_API_KEY not found. Check your .env file or environment variables.")


In [12]:
def get_answer(query):
    # Generate embedding for the query
    query_embedding = model.encode(query).tolist()
    
    # Query Pinecone for the most relevant match
    results = index.query(vector=query_embedding, top_k=1, include_metadata=True)
    
    # Check if any matches were found
    if results["matches"]:
        return results["matches"][0]["metadata"]["text"]
    
    # If no results found, fallback to web search
    return search_web(query)

def search_web(query):
    params = {
        "q": query,
        "api_key": serpapi_key,
        "num": 1  # Get only the top result
    }
    response = requests.get("https://serpapi.com/search", params=params)
    
    if response.status_code == 200:
        data = response.json()
        if "organic_results" in data and data["organic_results"]:
            return data["organic_results"][0]["snippet"]  # Return the snippet of the top result
    
    return "Sorry, I couldn't find an answer online either."

# Chatbot interaction
print("Chatbot is ready! Type 'exit' to quit.")
while True:
    user_query = input("You: ")
    if user_query.lower() == 'exit':
        break
    response = get_answer(user_query)
    print(f"Bot: {response}")

Chatbot is ready! Type 'exit' to quit.
Bot: All about onsens & ryokans
