# Pinecone

This notebook shows how to use functionality related to the Pinecone vector database.

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.document_loaders import GCSDirectoryLoader
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
import pinecone
import os
import configparser
import pandas as pd
from google.cloud import storage

In [None]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/teekyboy/code/gcp/chatbot-t1-firebase.json'

In [None]:
config = configparser.ConfigParser()
config.read('../config.ini')

In [None]:
OPENAI_API_KEY = config.get('api_key', 'openai')
PINECONE_API_KEY = config.get('api_key', 'pinecone')
PINECONE_ENV = config.get('env', 'pinecone')
PINECONE_INDEX = config.get('index', 'pinecone')

In [None]:
loader = GCSDirectoryLoader(project_name="chatbot", bucket="chatbot-t1.appspot.com", prefix="data/input")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [None]:
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_ENV 
)

index_name = PINECONE_INDEX

db = Pinecone.from_documents(docs, embeddings, index_name=index_name)

In [None]:
# if you already have an index, you can load it like this
# db = Pinecone.from_existing_index(index_name, embeddings)
db = Pinecone.from_existing_index(index_name, embeddings)

In [None]:
llm = OpenAI(temperature=0.5, openai_api_key=OPENAI_API_KEY)

In [None]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())

In [None]:
query = "What was Apple's most profitable product? give me some details."
answer = qa.run(query)
answer

In [None]:
def log_question_answer(query, answer):
    prefix = 'data/output/'
    log_file = f'{prefix}questions_answers.csv'
    bucket_name = 'chatbot-t1.appspot.com'
    data = {'question': [query], 'answer': [answer]}

    df = pd.DataFrame(data)

    # Initialize Google Cloud Storage client
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)

    # Check if the file exists in the bucket
    blob = storage.Blob(log_file, bucket)
    if blob.exists():
        # Read the existing data and append the new data
        content = blob.download_as_text()
        existing_df = pd.read_csv(pd.StringIO(content))
        new_df = existing_df.append(df, ignore_index=True)
    else:
        new_df = df

    # Upload the updated data to Google Cloud Storage
    new_content = new_df.to_csv(index=False)
    blob.upload_from_string(new_content, content_type='text/csv')


In [None]:
log_question_answer(query, answer)