## **Install Necessary Packages**

In [None]:
!pip install llama-index-vector-stores-moorcheh
!pip install pandas
!pip install llama-index-readers-file
!pip install llama-index-embeddings-huggingface

Collecting llama-index-embeddings-huggingface
  Using cached llama_index_embeddings_huggingface-0.5.5-py3-none-any.whl.metadata (458 bytes)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface)
  Using cached nvidia_cudnn_cu12-9.1.0.7

In [None]:
import logging
import sys
import os
import csv
import pandas as pd
import time
from llama_index.vector_stores.moorcheh import MoorchehVectorStore
from IPython.display import Markdown, display
from typing import Any, Callable, Dict, List, Optional, cast
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    Settings,
)
from llama_index.core.base.embeddings.base_sparse import BaseSparseEmbedding
from llama_index.core.bridge.pydantic import PrivateAttr
from llama_index.core.schema import BaseNode, MetadataMode, TextNode
from llama_index.core.vector_stores.types import (
    BasePydanticVectorStore,
    MetadataFilters,
    VectorStoreQuery,
    VectorStoreQueryMode,
    VectorStoreQueryResult,
)
from llama_index.core.vector_stores.utils import (
    DEFAULT_TEXT_KEY,
    legacy_metadata_dict_to_node,
    metadata_dict_to_node,
    node_to_metadata_dict,
)
from llama_index.core.vector_stores.types import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
    FilterCondition,
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from google.colab import userdata

## **Logging Setup**

In [None]:
# --- Logging Setup ---
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

## **Preparation Variables**

In [24]:
MOORCHEH_API_KEY = userdata.get("MOORCHEH_API_KEY")

embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
Settings.embed_model=embed_model

namespace_name="llamaindex_moorcheh_legal"
documents_folder="./documents"
query_csv_path = "queries.csv" # Path to your CSV file with queries
output_csv_path = "answers.csv" # Where to save the results
top_k = 5

## **Prepare and Chunk the Documents**

In [25]:
documents = SimpleDirectoryReader(documents_folder).load_data()
documents = [doc for doc in documents if hasattr(doc, 'text') and isinstance(doc.text, str) and doc.text.strip()]

# --- Set chunk size and overlap ---
Settings.chunk_size = 1024
Settings.chunk_overlap = 20

## **Upload the Document Chunks**

In [26]:
# --- Initialize the Moorcheh Vector Store ---
__all__ = ["MoorchehVectorStore"]

vector_store = MoorchehVectorStore(
    api_key=MOORCHEH_API_KEY,
    namespace=namespace_name,
    namespace_type="text",
    vector_dimension=None,
    add_sparse_vector=False,
    batch_size=100,
)

# --- Create a Vector Store Index using the Vector Store and given Documents ---
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, embed_model=embed_model
)

[DEBUG] Initializing MoorchehClient
[DEBUG] Listing namespaces...
[DEBUG] Found namespaces: {'namespaces': [{'namespace_name': 'llamaindex_moorcheh', 'type': 'text', 'vector_dimension': None, 'createdAt': '2025-07-15T15:16:27.967Z', 'itemCount': 0}, {'namespace_name': 'llamaindex_moorcheh1', 'type': 'text', 'vector_dimension': None, 'createdAt': '2025-07-15T15:22:22.837Z', 'itemCount': 0}, {'namespace_name': 'llamaindex_moorcheh12', 'type': 'text', 'vector_dimension': None, 'createdAt': '2025-07-15T15:25:37.576Z', 'itemCount': 0}, {'namespace_name': 'llamaindex_moorcheh13', 'type': 'text', 'vector_dimension': None, 'createdAt': '2025-07-15T15:28:12.437Z', 'itemCount': 51}, {'namespace_name': 'llamaindex_moorcheh2', 'type': 'text', 'vector_dimension': None, 'createdAt': '2025-07-15T15:23:53.436Z', 'itemCount': 0}, {'namespace_name': 'llamaindex_moorcheh3', 'type': 'text', 'vector_dimension': None, 'createdAt': '2025-07-15T15:30:41.917Z', 'itemCount': 355}, {'namespace_name': 'local-benc

## **Generate Answer**

In [27]:
# --- Generate Response ---
# --- Set Logging to DEBUG for more Detailed Outputs ---


queries_df = pd.read_csv(query_csv_path) # Load your questions from a CSV file

with open(output_csv_path, "w", newline="") as f: # Open the results CSV file
    writer = csv.DictWriter(f, fieldnames=["passage_id", "query", "generated_answers"]) # Set up CSV columns
    writer.writeheader() # Write the column headers

    for idx, q in enumerate(queries_df["query"]): # Go through each question
        print(f"Processing: {q}") # Show which question is being processed
        try:
            response = vector_store.get_generative_answer(query = q, ai_model = "anthropic.claude-3-7-sonnet-20250219-v1:0")
            time.sleep(0.5)
            writer.writerow({ # Write the results to the CSV
                "passage_id": idx, # Unique ID for this answer
                "query": q, # The original question
                "generated_answers": response # The AI-generated answer
            })
        except Exception as e: # If something goes wrong
            print(f"Error for query '{q}':", e) # Print the error

Processing: What is copyright law and why does it exist?
Processing: How long does a copyright in sound recording last for (in Canada)?
Processing: What constitutes copyright infringement under Canadian law?
Processing: What are the conditions under which a person can reproduce copyright information without permission (ie. exceptions to infringement)?
Processing: What are moral rights and how do they differ from economic rights?
Processing: If a moral right is violated but not the economic right what legal actions are possible?
Processing: What are some of the possible damages a person must pay if they are found guilty of copyright infringement?
