![](img\Architecture.jpg)

In [1]:
from langchain_mongodb import MongoDBAtlasVectorSearch
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_nomic import NomicEmbeddings
from langchain_nomic.embeddings import NomicEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
import os
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
import pymongo

In [2]:
load_dotenv()
os.environ["USER_AGENT"] = "MyApp/1.0"
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')
os.environ["LANGCHAIN_API_KEY"] = os.getenv('LANGCHAIN_API_KEY')

### Document Loading

In [3]:
from langchain_community.document_loaders import WebBaseLoader

urls = [
    "https://www.corteconstitucional.gov.co/relatoria/2024/T-435-24.htm",
    "https://www.corteconstitucional.gov.co/relatoria/2024/T-440-24.htm",
    "https://www.corteconstitucional.gov.co/relatoria/2024/T-378-24.htm",
    "https://www.corteconstitucional.gov.co/relatoria/2024/SU241-24.htm",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

### Splitting

In [4]:

from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=7500, chunk_overlap=100
)
doc_splits = text_splitter.split_documents(docs_list)

import tiktoken

encoding = tiktoken.get_encoding("cl100k_base")
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
for d in doc_splits:
    print("The document is %s tokens" % len(encoding.encode(d.page_content)))

Created a chunk of size 24365, which is longer than the specified 7500
Created a chunk of size 13534, which is longer than the specified 7500
Created a chunk of size 31290, which is longer than the specified 7500
Created a chunk of size 7613, which is longer than the specified 7500
Created a chunk of size 8587, which is longer than the specified 7500
Created a chunk of size 25843, which is longer than the specified 7500
Created a chunk of size 18394, which is longer than the specified 7500
Created a chunk of size 17987, which is longer than the specified 7500
Created a chunk of size 19472, which is longer than the specified 7500


The document is 16 tokens
The document is 19167 tokens
The document is 959 tokens
The document is 10503 tokens
The document is 3998 tokens
The document is 382 tokens
The document is 24220 tokens
The document is 3363 tokens
The document is 5867 tokens
The document is 5957 tokens
The document is 4182 tokens
The document is 722 tokens
The document is 6804 tokens
The document is 4805 tokens
The document is 19884 tokens
The document is 6222 tokens
The document is 304 tokens
The document is 2657 tokens
The document is 14373 tokens
The document is 5648 tokens
The document is 4499 tokens
The document is 4578 tokens
The document is 2404 tokens
The document is 13956 tokens
The document is 14835 tokens
The document is 6298 tokens
The document is 983 tokens


### Conficuración de la conexión con MongoDB Atlas

In [5]:
mongo_uri = "mongodb+srv://olartevivianaa:S0q0d8rQ7jcutEVR@solucioneslegales.pkbwc.mongodb.net/?retryWrites=true&w=majority&appName=SolucionesLegales"
client = pymongo.MongoClient(mongo_uri)
database = client.solucionesLegales
collection_name = "sentencias"
collections = database[collection_name]

try:
    client.admin.command('ping')
    print('Conexión exitosa')
except pymongo.errors.ConnectionFailure as e:
    print('No se pudo conectar a MongoDB: %s' % e)

Conexión exitosa


### Insertar documentos a la base de datos

In [6]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

docs_to_insert = [{"page_content": doc.page_content, "metadata": doc.metadata} for doc in doc_splits]
collections.insert_many(docs_to_insert)

vectorstore = MongoDBAtlasVectorSearch(
    embedding=embeddings,
    collection=collections
)
retriever = vectorstore.as_retriever()

## RAG Chain

### Prompt

In [7]:
template = """Answer the question based only on the following context:
{context}

In the previous documents, there is a summary of 84 rulings by the Constitutional Court on the topic of social media. Could you act as a magistrate of the Constitutional Court and establish the main legal criteria that should be considered to resolve problematic cases concerning individuals' rights? Additionally, establish what is permitted and what should be prohibited to protect individuals, and list the rights that the Constitutional Court has protected in these decisions. In your response, write as an expert in grammar and employ the format of a scientific article suitable for an indexed journal.

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

### LLM API

In [8]:
model = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")

### Chain

In [9]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

### Question

In [10]:
chain.invoke("What are the types of agent memory?")

'Given the context provided, it is not possible to answer the question regarding the types of agent memory, as the context is focused on the rulings of the Constitutional Court related to social media and does not provide information on agent memory. Agent memory typically refers to the cognitive processes and storage mechanisms that allow an agent, which could be a human or an artificial intelligence system, to retain and recall information. However, without specific details on the topic of agent memory within the provided context, it is not feasible to address the question accurately.\n\nIf you require information on agent memory, please provide relevant context or details that would allow for a more precise response.'