In [1]:
from langchain_chroma import Chroma

In [2]:
import os
from dotenv import load_dotenv

from langchain_openai import OpenAIEmbeddings

from langchain_community.document_loaders import JSONLoader


In [3]:
load_dotenv()

True

In [4]:
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")

In [5]:
embeddings = OpenAIEmbeddings(
    api_key=OPEN_AI_KEY,
    model="text-embedding-3-large",
    base_url="https://openrouter.ai/api/v1",
)

In [6]:
vector_db = Chroma(
    collection_name="PreviousData",
    embedding_function=embeddings,
    persist_directory="Company_Info_VectorDB",
)

In [7]:
vector_db.get()

{'ids': [],
 'embeddings': None,
 'documents': [],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': []}

In [10]:
# loading previous_record text data

In [11]:
def metadata_func(record: dict, metadata: dict) -> dict:
    # Extract top-level fields
    metadata["ticket_id"] = record.get("ticket_id")
    metadata["resolution"] = record.get("resolution")
    metadata["creation_time"] = record.get("ticket_creation_time")
    metadata["closure_time"] = record.get("ticket_closure_time")
    
    # Extract nested metadata fields (category and priority)
    # Using .get() with an empty dict {} prevents errors if metadata is missing
    inner_meta = record.get("metadata", {})
    metadata["category"] = inner_meta.get("category")
    metadata["priority"] = inner_meta.get("priority")
    
    return metadata

In [12]:
loader = JSONLoader(
    file_path="previous_record.json",
    jq_schema=".[]",
    content_key="issue",
    metadata_func=metadata_func
)

In [13]:
docs = loader.load()

In [14]:
for doc in docs:
    print(doc)
    break

page_content='Payment deducted but order was not placed' metadata={'source': 'C:\\Users\\write\\Desktop\\ai_chatbot_with_memory_and_tools\\previous_record.json', 'seq_num': 1, 'ticket_id': 'TKT_000001', 'resolution': 'Payment verified and refund initiated to original payment method within 3–5 business days', 'creation_time': '2024-10-12T09:14:22', 'closure_time': '2024-10-12T13:45:10', 'category': 'payment', 'priority': 'high'}


In [15]:
for doc in docs:
    doc.metadata["source"] = "previous_record.json"

In [16]:
for doc in docs:
    print(doc)
    print()

page_content='Payment deducted but order was not placed' metadata={'source': 'previous_record.json', 'seq_num': 1, 'ticket_id': 'TKT_000001', 'resolution': 'Payment verified and refund initiated to original payment method within 3–5 business days', 'creation_time': '2024-10-12T09:14:22', 'closure_time': '2024-10-12T13:45:10', 'category': 'payment', 'priority': 'high'}

page_content='Order marked delivered but customer did not receive it' metadata={'source': 'previous_record.json', 'seq_num': 2, 'ticket_id': 'TKT_000002', 'resolution': 'Delivery investigation opened and replacement order created', 'creation_time': '2024-10-10T16:05:44', 'closure_time': '2024-10-11T11:20:30', 'category': 'delivery', 'priority': 'high'}

page_content='Refund not received even after 7 days' metadata={'source': 'previous_record.json', 'seq_num': 3, 'ticket_id': 'TKT_000003', 'resolution': 'Refund delay escalated to finance team and customer informed', 'creation_time': '2024-10-08T10:01:12', 'closure_time': 

In [19]:
f"loaded {len(docs)} documents"

'loaded 15 documents'

In [20]:
vector_db.add_documents(docs)

['491cd061-26a0-4072-9012-81a0d748e22d',
 '1e05a3b8-ffba-459e-902c-9538e6cab4b6',
 '441f39a0-2058-4231-ac4f-65692d5ec938',
 '8e3fc13d-5936-44e4-90e9-347b2d9c8095',
 '6800373d-e5df-4463-bb81-dc408aee7838',
 'bca2b128-f637-4d1e-a218-f1b42559c900',
 '60cb9ba5-aef8-4007-96fa-18723ea1f23c',
 '2a7e6db2-8465-4663-9c86-a63508a2d029',
 '031a8e63-9943-4566-b619-267a1d3090b4',
 'd8159a23-be41-4c17-a81f-ad5b84c77549',
 'e6532c09-89f2-47f1-904d-7cb125e705ff',
 '46b2534f-1725-4eba-8d5a-940a4fccf02b',
 '39f45f5c-a91b-4fba-af3f-fbbe2708f7fc',
 '7eb430ad-96fe-4160-9a9f-d4ff5ccfe9f0',
 '029f02df-6bb0-48c0-ae21-efa5d54c037c']

In [21]:
vector_db.get(include=["documents", "embeddings"])

{'ids': ['491cd061-26a0-4072-9012-81a0d748e22d',
  '1e05a3b8-ffba-459e-902c-9538e6cab4b6',
  '441f39a0-2058-4231-ac4f-65692d5ec938',
  '8e3fc13d-5936-44e4-90e9-347b2d9c8095',
  '6800373d-e5df-4463-bb81-dc408aee7838',
  'bca2b128-f637-4d1e-a218-f1b42559c900',
  '60cb9ba5-aef8-4007-96fa-18723ea1f23c',
  '2a7e6db2-8465-4663-9c86-a63508a2d029',
  '031a8e63-9943-4566-b619-267a1d3090b4',
  'd8159a23-be41-4c17-a81f-ad5b84c77549',
  'e6532c09-89f2-47f1-904d-7cb125e705ff',
  '46b2534f-1725-4eba-8d5a-940a4fccf02b',
  '39f45f5c-a91b-4fba-af3f-fbbe2708f7fc',
  '7eb430ad-96fe-4160-9a9f-d4ff5ccfe9f0',
  '029f02df-6bb0-48c0-ae21-efa5d54c037c'],
 'embeddings': array([[-0.00730934,  0.03768184,  0.0043503 , ..., -0.01745658,
          0.00709476,  0.00777309],
        [ 0.02410882,  0.03894284,  0.00079462, ...,  0.01491911,
         -0.01046607,  0.00272997],
        [-0.00241064,  0.0204614 , -0.00120371, ..., -0.00164318,
          0.02144375,  0.00206165],
        ...,
        [-0.00616167,  0.0407