## Pinecone config

In [None]:
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
import os

In [None]:
load_dotenv()

AZURE_KEY = os.getenv('AZURE_KEY')
AZURE_GPT4MINI_ENDPOINT = os.getenv('AZURE_GPT4MINI_ENDPOINT')
AZURE_EMBEDINGS_ENDPOINT = os.getenv('AZURE_EMBEDINGS_ENDPOINT')
LLM_MODEL = os.getenv('LLM_MODEL')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

In [None]:
index_name = "epambot"
namespace = "epambot"

#pc.delete_index(index_name)

pc.create_index(
  name=index_name,
  dimension=1536,
  metric="cosine",
  spec=ServerlessSpec(
    cloud="aws",
    region="us-east-1"
  )
)

## Azure config

In [None]:
client_azure = AzureOpenAI(
    api_key=azure_key,  
    #api_version="2024-02-01",
    api_version="2023-03-15-preview", #this one works for gpt-4o
    azure_endpoint = azure_gpt4omini_endpoint
    )

In [None]:
def get_completion_from_messages_azure(messages, 
                                 model=llm_model, 
                                 temperature=0.1, 
                                 max_tokens=4096):
    response = client_azure.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens,
    )

    return response.choices[0].message.content

In [None]:
#test call
messages =  [  
{'role':'system', 
    'content': "no context"},    
{'role':'user', 
    'content': "how are you?"},  
] 

#get_completion_from_messages_azure(messages)

# Read Langchain documents from jSON

In [None]:
from langchain_openai import AzureOpenAIEmbeddings

# model_name = "text-embedding-3-small"  
model_name = "text-embedding-ada-002" 

embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002",
    #openai_api_version="2023-05-15",
    api_key=azure_key,
    azure_endpoint=azure_embedings_endpoint
)

In [None]:
from langchain.schema import Document
import time
import os,json
from langchain_pinecone import PineconeVectorStore

path_to_json = 'jsons/'

# Settings for displaying the import progress
counter = 0

for file_name in [file for file in os.listdir(path_to_json) if file.endswith('.json')]:
    with open(path_to_json + file_name) as json_file:
        chunks = json.load(json_file)
        one_page_splits = []
        for chunk in chunks:
            questions = ' '.join(chunk["questions"])
            doc =  Document(page_content=chunk["content"], metadata={
                "questions": questions
            })
            one_page_splits.append(doc)
            counter=counter+1
            #print(one_page_splits[0])
            #print(one_page_splits)
            print(counter)
        
        docsearch = PineconeVectorStore.from_documents(
            documents=one_page_splits,
            index_name=index_name,
            embedding=embeddings, 
            namespace=namespace
        )
                
print(f"Finished importing {counter} articles.")

## Search for the objects: Basic search to check data

In [None]:
query = "what kind of rulling exactly do I get if I came to the netherlands in february 2024?"
docs = docsearch.similarity_search(query)
print(docs[0].page_content)

print(docs)

In [None]:
context = ""
for o in docs:
    context = o.page_content + context
    
#Prompt preparation
delimiter = "####"
prompt = f"""Act as a person who relocated to The Netherlands and seeking for the answers. Give the answer to the question within provided context: {query}."
            """
messages =  [
    {'role':'system', 
    'content': context},    
    {'role':'user', 
    'content': f"{delimiter}{prompt}{delimiter}"}
    ] 

get_completion_from_messages_azure(messages)

## Maximal Marginal Relevance MMR

In [None]:
#Maximal Marginal Relevance MMR is a method used to avoid redundancy while retrieving relevant items to a query. 
#Instead of merely retrieving the most relevant items (which can often be very similar to each other), 
#MMR ensures a balance between relevancy and diversity in the items retrieved

In [None]:
retriever = docsearch.as_retriever(search_type="mmr")
matched_docs = retriever.invoke(query)
for i, d in enumerate(matched_docs):
    print(f"\n## Document {i}\n")
    print(d.page_content)

## Test request


In [None]:
# index_name = "pleadcop"
# namespace = "pleadcop"
vectorstore = PineconeVectorStore(index_name=index_name,
                                  embedding=embeddings,
                                  namespace=namespace)  

In [None]:
aarrr_funnel_stage = "acquisition"
success_metric = "Site Visit to Sign-up Conversion"

In [None]:
query = f"Which hypothesis suit best AARRR stage: {aarrr_funnel_stage} and success metric: {success_metric}."

result = vectorstore.similarity_search(
    query,  # our search query  
    k=3  # return 3 most relevant docs
    )

context = ""
for o in result:
        context = o.page_content + context


context

In [None]:
#Prompt preparation
delimiter = "####"
prompt = f"""Act a product manager and using the information from the context,
            provide a best possible product hypothesis which will 
            improve {success_metric} at the {aarrr_funnel_stage} stage"
            """
messages =  [
    {'role':'system', 
    'content': context},    
    {'role':'user', 
    'content': f"{delimiter}{prompt}{delimiter}"}
    ] 

In [None]:
get_completion_from_messages_azure(messages)