### How to save OpenAI embeddings in database

prepare apikey and secrets

In [1]:
import os
os.environ["PINECONE_ENVIRONMENT"]="XXXXXX"
os.environ["PINECONE_API_KEY"]="XXXXXX"
os.environ["OPENAI_API_KEY"]="XXXXXX"

In [2]:
from langchain.document_loaders import DirectoryLoader,TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.chat_models import ChatOpenAI as OpenAI

#### Read directory files, load them and split them in chunk to overcome token limit

In [3]:
loader = TextLoader('./homelessness.txt')
docs = loader.load()
char_text_splitter=CharacterTextSplitter(chunk_size=1000,chunk_overlap=0)
doc_texts=char_text_splitter.split_documents(docs)
doc_texts

Created a chunk of size 1149, which is longer than the specified 1000
Created a chunk of size 1456, which is longer than the specified 1000


[Document(page_content='Homelessness is a complex social issue with many interrelated causes and effects. In this essay,\nwe will explore some of the most common causes of homelessness, as well as the short-term and\nlong-term effects that homelessness can have on individuals, families, and communities.', metadata={'source': './homelessness.txt'}),
 Document(page_content='Causes of Homelessness:\n1. Lack of Affordable Housing: The most significant cause of homelessness is a lack of affordable \n   housing. As housing prices rise and wages stagnate, many low-income individuals and families cannot \n   afford to rent or buy a home, forcing them to live in substandard conditions or become homeless.\n2. Poverty: Poverty is another significant cause of homelessness. Individuals living in poverty \n   often have limited access to education, healthcare, and job opportunities, making it difficult to\n   achieve financial stability and secure housing.\n3. Mental Illness: Mental illness, particu

#### Preparing pinecone db

In [4]:
from langchain.vectorstores import pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

  from tqdm.autonotebook import tqdm


In [5]:
EMBEDDING_MODEL="text-embedding-ada-002"
embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])

In [6]:
pinecone.init(
    api_key=os.environ["PINECONE_API_KEY"],
    environment=os.environ["PINECONE_ENVIRONMENT"]
)

index_name="homelessness"


In [7]:
index = pinecone.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 20}},
 'total_vector_count': 20}

### push vector data into pinecone

In [8]:
doc_store = Pinecone.from_texts([d.page_content for d in doc_texts],embedding=embeddings,index_name=index_name)

#### Query from doc_store

find context from doc_store with similar cosines

In [9]:
query="what is life?"
docs = doc_store.similarity_search(query)
docs

KeyError: 'text'

answer query using contexts

In [None]:
from langchain.chains.question_answering import load_qa_chain
llm = OpenAI(temperature=0, openai_api_key=os.environ["OPENAI_API_KEY"])

qa_chain = load_qa_chain(llm, chain_type="stuff")


qa_chain.run(input_documents=docs, question=query)