## **Install Necessary Packages**

In [1]:
!pip install llama-index-vector-stores-moorcheh
!pip install pandas
!pip install llama-index-readers-file



In [13]:
import csv
import sys
import logging
import pandas as pd
import time
from llama_index.vector_stores.moorcheh import MoorchehVectorStore
from llama_index.core import SimpleDirectoryReader, Settings
from google.colab import userdata

## **Logging Setup**

In [10]:
# --- Logging Setup ---
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

## **Initalize the MoorchehClient**

In [11]:
MOORCHEH_API_KEY = userdata.get("MOORCHEH_API_KEY")

namespace_name="llamaindex_moorcheh_geographical"
documents_folder="./documents"
namespace_type="text" # or vector
query_csv_path = "queries.csv" # Path to your CSV file with queries
output_csv_path = "answers.csv" # Where to save the results
top_k = 5

## **Prepare and Chunk the Documents**

In [12]:
documents = SimpleDirectoryReader(documents_folder).load_data()
documents = [doc for doc in documents if hasattr(doc, 'text') and isinstance(doc.text, str) and doc.text.strip()]
output = [{"id": f"chunk_{idx}", "text": doc.text_resource.text} for idx, doc in enumerate(documents)]

# --- Set chunk size and overlap ---
Settings.chunk_size = 1024
Settings.chunk_overlap = 20

## **Upload the Document Chunks**

In [14]:
# --- Initialize the Moorcheh Vector Store ---
__all__ = ["MoorchehVectorStore"]

vector_store = MoorchehVectorStore(
    api_key=MOORCHEH_API_KEY,
    namespace=namespace_name,
    namespace_type=namespace_type,
    vector_dimension=None,
    add_sparse_vector=False,
    batch_size=100,
)

# --- Upload Documents ---
vector_store._client.upload_documents(namespace_name=namespace_name, documents=output)

[DEBUG] Initializing MoorchehClient
[DEBUG] Listing namespaces...
[DEBUG] Found namespaces: {'namespaces': [{'namespace_name': 'llamaindex_moorcheh_geo', 'type': 'text', 'vector_dimension': None, 'createdAt': '2025-07-17T16:10:18.737Z', 'itemCount': 490}]}
[DEBUG] Namespace 'llamaindex_moorcheh_geographical' not found. Creating...
[DEBUG] MoorchehVectorStore initialization complete.


{'status': 'success',
 'message': 'Successfully queued 490 documents for processing.',
 'queued_documents': 490,
 'document_ids': ['chunk_0',
  'chunk_1',
  'chunk_2',
  'chunk_3',
  'chunk_4',
  'chunk_5',
  'chunk_6',
  'chunk_7',
  'chunk_8',
  'chunk_9',
  'chunk_10',
  'chunk_11',
  'chunk_12',
  'chunk_13',
  'chunk_14',
  'chunk_15',
  'chunk_16',
  'chunk_17',
  'chunk_18',
  'chunk_19',
  'chunk_20',
  'chunk_21',
  'chunk_22',
  'chunk_23',
  'chunk_24',
  'chunk_25',
  'chunk_26',
  'chunk_27',
  'chunk_28',
  'chunk_29',
  'chunk_30',
  'chunk_31',
  'chunk_32',
  'chunk_33',
  'chunk_34',
  'chunk_35',
  'chunk_36',
  'chunk_37',
  'chunk_38',
  'chunk_39',
  'chunk_40',
  'chunk_41',
  'chunk_42',
  'chunk_43',
  'chunk_44',
  'chunk_45',
  'chunk_46',
  'chunk_47',
  'chunk_48',
  'chunk_49',
  'chunk_50',
  'chunk_51',
  'chunk_52',
  'chunk_53',
  'chunk_54',
  'chunk_55',
  'chunk_56',
  'chunk_57',
  'chunk_58',
  'chunk_59',
  'chunk_60',
  'chunk_61',
  'chunk_62',

## **Generate Answer**

In [17]:
# --- Generate Response ---
# --- Set Logging to DEBUG for more Detailed Outputs ---

queries_df = pd.read_csv(query_csv_path)

with open(output_csv_path, "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["passage_id", "query", "generated_answers"])
    writer.writeheader()

    for idx, q in enumerate(queries_df["query"]):
        print(f"Processing: {q}")
        try:
            response = vector_store.get_generative_answer(query = q, ai_model = "anthropic.claude-3-7-sonnet-20250219-v1:0", llm=None)
            time.sleep(0.5)
            writer.writerow({
                "passage_id": idx,
                "query": q,
                "generated_answers": response
            })
        except Exception as e:
            print(f"Error for query '{q}':", e)


Processing: What are the main challenges in managing water resources in densely populated river basins?
Processing: How have land use patterns shifted in critical agricultural zones over the past two decades?
Processing: How do changes in Amazon vegetation and deforestation affect regional and global carbon fluxes?
Processing: How do changes in land cover and water resources influence patterns of extreme weather events globally?
Processing: Which countries or regions have experienced the most severe deforestation trends in the last two decades?
Processing: What are the primary causes of forest degradation globally and how do they vary by region?
Processing: What strategies are being implemented for forest restoration and reforestation particularly in tropical regions?
Processing: How is remote sensing used to monitor forest loss and land-use changes over time?
