Server command:

```bash
# Setup
source ~/miniconda3/bin/activate
conda create --prefix ./envs python=3.10
pip install -e .
llama download --model-id Llama3.2-11B-Vision-Instruct
llama download --model-id Llama-Guard-3-1B

export LLAMA_STACK_PORT=5001
export INFERENCE_MODEL=meta-llama/Llama-3.2-11B-Vision-Instruct
export INFERENCE_PORT=8000
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B

# Start server
source ~/miniconda3/bin/activate
conda activate ./envs
llama stack build --template meta-reference-gpu --image-type conda && track llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
  --port 5001 \
  --env INFERENCE_MODEL=meta-llama/Llama-3.2-11B-Vision-Instruct
```

In [6]:
# Deps
%pip install datasets

Note: you may need to restart the kernel to use updated packages.


In [8]:
import datasets

dataset = datasets.load_dataset("deepmind/narrativeqa")

dataset

  from .autonotebook import tqdm as notebook_tqdm


DatasetDict({
    train: Dataset({
        features: ['document', 'question', 'answers'],
        num_rows: 32747
    })
    test: Dataset({
        features: ['document', 'question', 'answers'],
        num_rows: 10557
    })
    validation: Dataset({
        features: ['document', 'question', 'answers'],
        num_rows: 3461
    })
})

In [9]:
import pandas as pd

df = pd.DataFrame(dataset['validation'])
df['context'] = df['document'].apply(lambda x: x['text'])
df = df.rename(columns={'question': 'questions'})
df = df.groupby('context').agg({'questions': list}).reset_index()
df
df.head()

Unnamed: 0,context,questions
0,"<html>\n\n<head>\n<title>""Domino,"" by Richard ...",[{'text': 'Who planned the robbery that was be...
1,<html>\n<head><title>Airplane Script at IMSDb....,"[{'text': 'What is Ted Striker afraid of?', 't..."
2,<html>\n<head><title>All About Steve Script at...,"[{'text': 'What is Mary Horowitz's job?', 'tok..."
3,<html>\n<head><title>American Psycho Script at...,[{'text': 'Who is the first man Bateman muders...
4,"<html>\n<head><title>American, The Script at I...","[{'text': 'What does Jack do for a living?', '..."


In [10]:
import uuid
from llama_stack_client import LlamaStackClient
from llama_stack_client.types.memory_insert_params import Document

client = LlamaStackClient(
    base_url="http://localhost:5001",
)

providers = client.providers.list()
memory_banks_response = client.memory_banks.list()

bank_id = f"bank_{uuid.uuid4()}"
provider = providers["memory"][0]
client.memory_banks.register(
    memory_bank_id=bank_id,
    params={
        "embedding_model": "all-MiniLM-L6-v2",
        # Is the default for agent config: https://github.com/meta-llama/llama-stack/blob/66d8f4ffd126bff668434b314892a99fe854a034/llama_stack/providers/inline/agents/meta_reference/agent_instance.py#L668
        "chunk_size_in_tokens": 512,
    },
    provider_id=provider.provider_id,
)
bank_id

'bank_a7708879-1453-4d97-905d-368918ba428a'

In [11]:
documents = [
    Document(
        document_id=str(idx),
        content=context,
        mime_type="text/plain",
        metadata={},
    )
    for idx, context in zip(df.index, df["context"])
]

In [12]:
from tqdm import tqdm

for i in tqdm(range(len(documents))):
    client.memory.insert(
        bank_id=bank_id,
        documents=[documents[i]],
    )

  7%|█████▍                                                                        | 8/115 [06:29<1:26:44, 48.64s/it]


InternalServerError: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}

The error we get on llama-stack is:

```
  error: database is locked
  __ttl__: 604800
23:07:00.213 [END] MemoryRouter.insert_documents  [StatusCode.OK] (9640.61ms)
  class: MemoryRouter
  method: insert_documents
  type: async
  args: ()
  error: database is locked
  __ttl__: 604800
23:07:00.241 [END] /alpha/memory/insert  [StatusCode.OK] (9684.66ms)
  __root__: True
  location: server
  __ttl__: 604800
INFO:     127.0.0.1:40752 - "POST /alpha/memory/insert HTTP/1.1" 200 OK
```