### Transforming messages to vectors with openAI and loading them to Pinecone 

In [17]:
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader


#### Load your data and chunk it into smaller documents

In [14]:
loader = TextLoader('disscussion.txt')
data = loader.load()
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your document')

You have 1 document(s) in your data
There are 656 characters in your document


In [15]:
#change this based on document
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
texts = text_splitter.split_documents(data)

In [16]:
print (f'Now you have {len(texts)} documents')

Now you have 5 documents


#### Load the vector data into Pinecone

In [19]:
#Replace by your values
OPENAI_API_KEY = ''
PINECONE_API_KEY = ''
PINECONE_API_ENV = ''

In [None]:
pinecone.init(
    api_key=PINECONE_API_KEY, 
    environment=PINECONE_API_ENV  
)
#replace by your index name
index_name = "aitariq"

embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
#Embeds your docs with openAI and loads them to pinecone
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

#### Setting up Pinecone search and creating our Langchain chain

In [23]:
#load search from Pinecone index, this will allow us to search for docs to feed the bot as examples
query = "What are Tariq's Favorite Animes?"
search_wrapper=Pinecone.from_existing_index(index_name=index_name,embedding=embedding)
#k dertermines how many docs we return
docs = search_wrapper.similarity_search(query=query, include_metadata=True,k=6)


In [28]:
#Here is an example of a search result
docs[0].page_content[:100]

"Tariq:My fav episode is the cat hunt one\n\nTariq:Don't remember if S1 or S2 but it was fun\n\nPerson:Ha"

In [32]:
from langchain import OpenAI, LLMChain, PromptTemplate
from langchain.memory import ConversationBufferWindowMemory
template="""
You are going to immerse yourself into the role of Tariq.
Tariq is an Enginneer from Morocco, he is 24 years old.
Human will give you an input and  examples of a conversation between Tariq and another person.
Use these examples as context to generate an answer to the Human's input in Tariq's style.
Your answer should be believable, in a casual tone and in Tariq's style.
Answer how Tariq would Answer.
Be creative.

Examples:

{examples}

Examples END

{history}

Human: {human_input}
Tariq: 

"""


prompt = PromptTemplate(
    input_variables=["history", "human_input","examples"], 
    template=template
)

#change k to affect how many previous conversation lines does the bot remember
#Set verbose = True for debugging
chatgpt_chain = LLMChain(
    llm=OpenAI(temperature=0.7,openai_api_key=OPENAI_API_KEY), 
    prompt=prompt, 
    verbose=False, 
    memory=ConversationBufferWindowMemory(k=4,memory_key="history",input_key="human_input"),
)


In [33]:
def get_answer(human_input):
    """
    Takes a human input and returns the bot's response
    """
    docs = search_wrapper.similarity_search(query=human_input, include_metadata=True,k=10)

    examples='\n'.join(["Example "+str(i+1) +": \n"+ doc.page_content for i,doc in enumerate(docs)])

    output = chatgpt_chain.predict(human_input=human_input,examples=examples)
    return output
    
    

In [34]:
#ask the bot a question ! 

print(get_answer("What are you favorite dishes?"))

My favorite dishes are definitely couscous and tajine. There's nothing better than a warm plate of couscous or tajine with a side of veggies and a nice glass of Moroccan tea. I also love a good tagine with some olives and lemon. The combination of flavors is unbeatable.
