
# <span style="color:red">INSTANTIATION OF THE LLM MODEL AND THE EMBEDDING</span>

In [1]:
from langchain_ollama import OllamaEmbeddings
from langchain_ollama.chat_models import ChatOllama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA 
from langchain_openai import OpenAIEmbeddings

from pinecone import Pinecone, ServerlessSpec

import os
import dotenv

from langchain_groq import ChatGroq
import getpass

In [2]:
# chat = ChatOllama(model="llama3",)

chat = ChatGroq(
    model="mixtral-8x7b-32768",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)


In [3]:
### EMBEDDINGS

## Embedding Techinque of OPENAI
embed_model = OllamaEmbeddings(model="nomic-embed-text")

print(len(embed_model.embed_query('hola')))

768


In [5]:
## READ THE DIRECTORY AND LOAD THE FILE
from langchain.document_loaders import PyPDFDirectoryLoader
# read documents
def read_doc(directory):
    file_loader=PyPDFDirectoryLoader(directory)
    documents = file_loader.load()
    return documents
dir='./'
doc=read_doc(dir)

total=doc
total



[Document(metadata={'source': 'CV.pdf', 'page': 0}, page_content='Página  1  \n  \nJUAN PABLO SCHAMUN  \nIngeniero Industrial  \n \nDATOS PESONALES  Teléfono:  0291 -155714529  \nE-mail: juanpsch@gmail.com  \nCiudad: Bariloche  \nEDUCACIÓN  Posgrado: Master en tecnología energética para el desarrollo \nsostenible – Universidad Politécnica de Valencia – Valencia, \nEspaña. (finalizado en Enero de 2012)  \nUniversitario: Ingeniería Industrial  - Universidad Nacional del Sur . \nFecha de graduación 19 de Octubre de 2009  \nSecundario:  (1997 -2001) Bachiller - orientación Economía y Gestión de las \nOrganizaciones - Colegio del Solar – Bahía Blanca  \nIDIOMAS  Inglés:  Curso completo en la Asociación Bahiense de Cultura Inglesa – \nNivel avanzado . Exámenes internacionales:  \n● Preliminary English Text ( PET)  \n● First Certificate Examination ( FCE) \n● Certificate in Advance English ( CAE ) \nOtorgados por la Universidad de Cambridge.  \nAlemán:  Cursado hasta 5to nivel en el la Socied

In [6]:

def chunk_data(docs, chunk_size=800, chunk_overlap=50):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    doc=text_splitter.split_documents(docs)
    return doc

documents=chunk_data(docs=total,chunk_size=1500, chunk_overlap=50)
# documents_cv=chunk_data(docs=doc_cv,chunk_size=3000, chunk_overlap=50)

type(documents)

list


# <span style="color:red">LOAD THE DOCUMENTS AND VECTORS TO PINESTORE DB</span>

In [91]:
## CONNECT WITH PINECONE DATABASE
from pinecone import Pinecone, ServerlessSpec
dotenv.load_dotenv()
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")

#Connect to DB Pinecone
pc=Pinecone(api_key=PINECONE_API_KEY)
cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'
spec = ServerlessSpec(cloud=cloud, region=region)
index_name = 'jptest'

if index_name in pc.list_indexes().names():
  pc.delete_index(index_name)
  print("index {} borrado".format(index_name))

# check if index already exists (it shouldn't if this is first time)
if index_name not in pc.list_indexes().names():
    # if does not exist, create index
    print("index creado con el nombre: {}".format(index_name))
    pc.create_index(
        index_name,
        dimension=768,  # dimensionality of text-embedding models/embedding-001
        metric='cosine',
        spec=spec
        )
else:
    print("el index con el nombre {} ya estaba creado".format(index_name))

index creado con el nombre: jptest


In [8]:
## UPSERT THE VECTORS IN TO THE PINECONE DATABASE

import time
from langchain_pinecone import PineconeVectorStore
namespace = "espacio"

index_name = 'jptest'

docsearch = PineconeVectorStore.from_documents(
    documents=documents,
    index_name=index_name,
    embedding=embed_model, 
    namespace=namespace
)
print("upserted values to {} index".format(index_name))

time.sleep(1)



upserted values to jptest index



# <span style="color:red">RETRIEVE AND SEARCH INTO THE CREATED PINECONE DATABASES</span>

In [9]:

PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")
pc=Pinecone(api_key=PINECONE_API_KEY)
index_name = 'jptest'
namespace = "espacio"


In [10]:
vectorstore = PineconeVectorStore(
    index_name=index_name,
    embedding=embed_model,
    namespace=namespace,
)

retriever=vectorstore.as_retriever()

In [11]:
query = "Donde trabaja Juan Pablo"
vectorstore.similarity_search(query, k=1)

[Document(metadata={'page': 0.0, 'source': 'documents\\CV.pdf'}, page_content='Página  1  \n  \nJUAN PABLO SCHAMUN  \nIngeniero Industrial  \n \nDATOS PESONALES  Teléfono:  0291 -155714529  \nE-mail: juanpsch@gmail.com  \nCiudad: Bariloche  \nEDUCACIÓN  Posgrado: Master en tecnología energética para el desarrollo \nsostenible – Universidad Politécnica de Valencia – Valencia, \nEspaña. (finalizado en Enero de 2012)  \nUniversitario: Ingeniería Industrial  - Universidad Nacional del Sur . \nFecha de graduación 19 de Octubre de 2009  \nSecundario:  (1997 -2001) Bachiller - orientación Economía y Gestión de las \nOrganizaciones - Colegio del Solar – Bahía Blanca  \nIDIOMAS  Inglés:  Curso completo en la Asociación Bahiense de Cultura Inglesa – \nNivel avanzado . Exámenes internacionales:  \n● Preliminary English Text ( PET)  \n● First Certificate Examination ( FCE) \n● Certificate in Advance English ( CAE ) \nOtorgados por la Universidad de Cambridge.  \nAlemán:  Cursado hasta 5to nivel en

In [12]:

query = "Que experiencia tiene Juan Pablo"


qa = RetrievalQA.from_chain_type(  
    llm=chat,  
    chain_type="stuff",  
    retriever=vectorstore.as_retriever()  
)  


result = qa.invoke(query)

print(result['result'])

Based on the provided information, Juan Pablo Schamun has experience in several areas, including industrial engineering, energy technology, and project management. He has a Master's degree in technological energy for sustainable development from the Polytechnic University of Valencia in Spain. He has also worked as a industrial engineer, business analyst, and contact engineer for ExxonMobil, where he analyzed business information and coordinated plant maintenance activities.

In addition, Juan Pablo has experience as a independent consultant, providing consulting and advisory services in energy efficiency and renewable energy project design. He has also worked as a specialist for the CAREM project at the National Atomic Energy Commission (CNEA) in Argentina, where he was responsible for configuring 3D design tools, managing databases, creating 3D models, and coordinating a team of five people.

Juan Pablo is also proficient in several computer programs and languages, including CATIA V5

In [13]:
print(result)

{'query': 'Que experiencia tiene Juan Pablo', 'result': "Based on the provided information, Juan Pablo Schamun has experience in several areas, including industrial engineering, energy technology, and project management. He has a Master's degree in technological energy for sustainable development from the Polytechnic University of Valencia in Spain. He has also worked as a industrial engineer, business analyst, and contact engineer for ExxonMobil, where he analyzed business information and coordinated plant maintenance activities.\n\nIn addition, Juan Pablo has experience as a independent consultant, providing consulting and advisory services in energy efficiency and renewable energy project design. He has also worked as a specialist for the CAREM project at the National Atomic Energy Commission (CNEA) in Argentina, where he was responsible for configuring 3D design tools, managing databases, creating 3D models, and coordinating a team of five people.\n\nJuan Pablo is also proficient i