
# <span style="color:red">INSTANTIATION OF THE LLM MODEL AND THE EMBEDDING</span>

In [1]:
from langchain_ollama import OllamaEmbeddings
from langchain_ollama.chat_models import ChatOllama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA 
# from langchain_openai import OpenAIEmbeddings

from pinecone import Pinecone, ServerlessSpec

import os
import dotenv

from langchain_groq import ChatGroq
import getpass

  from tqdm.autonotebook import tqdm


In [2]:
# chat = ChatOllama(model="llama3",)

chat = ChatGroq(
    model="mixtral-8x7b-32768",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)


In [3]:
### EMBEDDINGS

## Embedding Techinque of OPENAI
embed_model = OllamaEmbeddings(model="nomic-embed-text")

print(len(embed_model.embed_query('hola')))

768


In [7]:
## READ THE DIRECTORY AND LOAD THE FILE
from langchain.document_loaders import PyPDFDirectoryLoader
# read documents
def read_doc(directory):
    file_loader=PyPDFDirectoryLoader(directory)
    documents = file_loader.load()
    return documents
dir='./CV2'
doc=read_doc(dir)

total=doc
total



[Document(metadata={'source': 'CV2\\Cv2.pdf', 'page': 0}, page_content='Curriculum Vitae  \n \n Nicolás Cacheda                                \nEstado Civil: Soltero  \nDIR.: Pje. Petunias 60, alto jardín botánico.    \nTeléfono : (0294)  15-4-332816  \nE-mail:  n.cacheda@gmail.com  \nDNI: 34721887  \nCuil: 20 -34721887 -2 \nNacionalidad: Argentina  \nFecha de nacimiento: 12/12/1989  \n \n \n \nFORMACION \nACADEMICA  \n \n \n \n \n \nIDIOMAS  \n \n  \nINFORMATICA  \n \n \n \n \n \n \n \n \n \nCURSOS / OTROS  \n \n \n \n \n \n \n \n \n \n \n \n \nEXPERIENCIA LABORAL  \n \n  \n- Técnico en automatización de sistemas y sistemas de control.  \nEscuela Co operativa Técnica Los Andes  | 2008 | Bariloche | Río Negro  \n- Licenciatura en Psicología  de la Universidad  de Buenos Aires sin \nfinalizar.  \n- Ingles avanzado. Hablado y escrito  \n \n \nFormación en:  \n- Autocad                -    Programación en Python                   \n- Solidworks            -    Herramientas Microsoft Offi

In [8]:

def chunk_data(docs, chunk_size=800, chunk_overlap=50):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    doc=text_splitter.split_documents(docs)
    return doc

documents=chunk_data(docs=total,chunk_size=1500, chunk_overlap=50)
# documents_cv=chunk_data(docs=doc_cv,chunk_size=3000, chunk_overlap=50)

type(documents)

list


# <span style="color:red">LOAD THE DOCUMENTS AND VECTORS TO PINESTORE DB</span>

In [9]:
## CONNECT WITH PINECONE DATABASE
from pinecone import Pinecone, ServerlessSpec
dotenv.load_dotenv()
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")

#Connect to DB Pinecone
pc=Pinecone(api_key=PINECONE_API_KEY)
cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'
spec = ServerlessSpec(cloud=cloud, region=region)
index_name = 'nico'

if index_name in pc.list_indexes().names():
  pc.delete_index(index_name)
  print("index {} borrado".format(index_name))

# check if index already exists (it shouldn't if this is first time)
if index_name not in pc.list_indexes().names():
    # if does not exist, create index
    print("index creado con el nombre: {}".format(index_name))
    pc.create_index(
        index_name,
        dimension=768,  # dimensionality of text-embedding models/embedding-001
        metric='cosine',
        spec=spec
        )
else:
    print("el index con el nombre {} ya estaba creado".format(index_name))

index creado con el nombre: nico


In [10]:
## UPSERT THE VECTORS IN TO THE PINECONE DATABASE

import time
from langchain_pinecone import PineconeVectorStore
namespace = "espacio"

index_name = 'nico'

docsearch = PineconeVectorStore.from_documents(
    documents=documents,
    index_name=index_name,
    embedding=embed_model, 
    namespace=namespace
)
print("upserted values to {} index".format(index_name))

time.sleep(1)



upserted values to nico index



# <span style="color:red">RETRIEVE AND SEARCH INTO THE CREATED PINECONE DATABASES</span>

In [11]:

PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")
pc=Pinecone(api_key=PINECONE_API_KEY)
index_name = 'nico'
namespace = "espacio"


In [12]:
vectorstore = PineconeVectorStore(
    index_name=index_name,
    embedding=embed_model,
    namespace=namespace,
)

retriever=vectorstore.as_retriever()

In [14]:
query = "Donde trabaja Nicolas"
vectorstore.similarity_search(query, k=1)

[Document(id='07c2116f-9d68-43c3-b653-f57d94d2b1b0', metadata={'page': 0.0, 'source': 'CV2\\Cv2.pdf'}, page_content='Curriculum Vitae  \n \n Nicolás Cacheda                                \nEstado Civil: Soltero  \nDIR.: Pje. Petunias 60, alto jardín botánico.    \nTeléfono : (0294)  15-4-332816  \nE-mail:  n.cacheda@gmail.com  \nDNI: 34721887  \nCuil: 20 -34721887 -2 \nNacionalidad: Argentina  \nFecha de nacimiento: 12/12/1989  \n \n \n \nFORMACION \nACADEMICA  \n \n \n \n \n \nIDIOMAS  \n \n  \nINFORMATICA  \n \n \n \n \n \n \n \n \n \nCURSOS / OTROS  \n \n \n \n \n \n \n \n \n \n \n \n \nEXPERIENCIA LABORAL  \n \n  \n- Técnico en automatización de sistemas y sistemas de control.  \nEscuela Co operativa Técnica Los Andes  | 2008 | Bariloche | Río Negro  \n- Licenciatura en Psicología  de la Universidad  de Buenos Aires sin \nfinalizar.  \n- Ingles avanzado. Hablado y escrito  \n \n \nFormación en:  \n- Autocad                -    Programación en Python                   \n- Solidwork

In [16]:

query = "Que experiencia tiene Nicolas"


qa = RetrievalQA.from_chain_type(  
    llm=chat,  
    chain_type="stuff",  
    retriever=vectorstore.as_retriever()  
)  


result = qa.invoke(query)

print(result['result'])

Based on the provided CV, Nicolás Cacheda has experience in automation of systems and control, having worked as a technician in this field. He has also studied Licenciatura in Psychology and has advanced knowledge of English. He has experience with various software such as AutoCAD, Python, Solidworks, Microsoft Office tools, Visual Basic, Protel, Smart Plant Instrumentation, CATIA V5, and ENOVIA.

He has completed a supervised internship in RACAL, a metal carpentry company, and has experience in designing and constructing orthopedic elements for disabled people using CNC. He has also worked as a projectist in a textile company, taken a temporary job in INGELSUD for automation of a hydroelectric plant, and worked as a dibujante in ISB (engineering in means of elevation). Currently, he is working as a service provider for the CAREM project through the CNEA-UNSAM Specific Agreement.


In [17]:
print(result)

{'query': 'Que experiencia tiene Nicolas', 'result': 'Based on the provided CV, Nicolás Cacheda has experience in automation of systems and control, having worked as a technician in this field. He has also studied Licenciatura in Psychology and has advanced knowledge of English. He has experience with various software such as AutoCAD, Python, Solidworks, Microsoft Office tools, Visual Basic, Protel, Smart Plant Instrumentation, CATIA V5, and ENOVIA.\n\nHe has completed a supervised internship in RACAL, a metal carpentry company, and has experience in designing and constructing orthopedic elements for disabled people using CNC. He has also worked as a projectist in a textile company, taken a temporary job in INGELSUD for automation of a hydroelectric plant, and worked as a dibujante in ISB (engineering in means of elevation). Currently, he is working as a service provider for the CAREM project through the CNEA-UNSAM Specific Agreement.'}
