### Install dependencies

In [57]:
pip install -r requirements.txt

[33mDEPRECATION: textract 1.6.5 has a non-standard dependency specifier extract-msg<=0.29.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of textract or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


### Imports

In [18]:
import os
import json
import meilisearch

from dotenv import load_dotenv # remove if not using dotenv
from langchain.vectorstores import Meilisearch
from langchain.embeddings import OllamaEmbeddings
from langchain.document_loaders import JSONLoader

load_dotenv() # remove if not using dotenv

# exit if missing env vars
if "MEILI_HTTP_ADDR" not in os.environ:
    raise Exception("Missing MEILI_HTTP_ADDR env var")
if "MEILI_MASTER_KEY" not in os.environ:
    raise Exception("Missing MEILI_MASTER_KEY env var")

### Meilisearch configuration

In [4]:
client = meilisearch.Client(
    url=os.environ.get("MEILI_HTTP_ADDR"),
    api_key=os.environ.get("MEILI_MASTER_KEY"),
)

In [5]:
client.health()

{'status': 'available'}

#### Enable vector search

In [6]:
%%bash
curl \
  -X PATCH 'http://localhost:7700/experimental-features/' \
  -H 'Content-Type: application/json'  \
  -H 'Authorization: Bearer ollama'  \
  --data-binary '{
    "vectorStore": true
  }'

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   114  100    85  100    29   5990   2043 --:--:-- --:--:-- --:--:-- 11400


{"scoreDetails":false,"vectorStore":true,"metrics":false,"exportPuffinReports":false}

### Pull mistral model into Ollama container

In [42]:
%%bash
curl http://localhost:11434/api/pull -d '{
  "name": "mistral"
}'

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed


{"status":"pulling manifest"}
{"status":"pulling e8a35b5937a5","digest":"sha256:e8a35b5937a5e6d5c35d1f2a15f161e07eefe5e5bb0a3cdd42998ee79b057730","total":4109853248}
{"status":"pulling e8a35b5937a5","digest":"sha256:e8a35b5937a5e6d5c35d1f2a15f161e07eefe5e5bb0a3cdd42998ee79b057730","total":4109853248}
{"status":"pulling e8a35b5937a5","digest":"sha256:e8a35b5937a5e6d5c35d1f2a15f161e07eefe5e5bb0a3cdd42998ee79b057730","total":4109853248}
{"status":"pulling e8a35b5937a5","digest":"sha256:e8a35b5937a5e6d5c35d1f2a15f161e07eefe5e5bb0a3cdd42998ee79b057730","total":4109853248}
{"status":"pulling e8a35b5937a5","digest":"sha256:e8a35b5937a5e6d5c35d1f2a15f161e07eefe5e5bb0a3cdd42998ee79b057730","total":4109853248}
{"status":"pulling e8a35b5937a5","digest":"sha256:e8a35b5937a5e6d5c35d1f2a15f161e07eefe5e5bb0a3cdd42998ee79b057730","total":4109853248}
{"status":"pulling e8a35b5937a5","digest":"sha256:e8a35b5937a5e6d5c35d1f2a15f161e07eefe5e5bb0a3cdd42998ee79b057730","total":4109853248}
{"status":"pulling

100  199k    0  199k    0    23   2190      0 --:--:--  0:01:33 --:--:--   327


{"status":"pulling ed11eda7790d","digest":"sha256:ed11eda7790d05b49395598a42b155812b17e263214292f7b87d15e14003d337","total":30}
{"status":"pulling ed11eda7790d","digest":"sha256:ed11eda7790d05b49395598a42b155812b17e263214292f7b87d15e14003d337","total":30}
{"status":"pulling ed11eda7790d","digest":"sha256:ed11eda7790d05b49395598a42b155812b17e263214292f7b87d15e14003d337","total":30}
{"status":"pulling ed11eda7790d","digest":"sha256:ed11eda7790d05b49395598a42b155812b17e263214292f7b87d15e14003d337","total":30}
{"status":"pulling ed11eda7790d","digest":"sha256:ed11eda7790d05b49395598a42b155812b17e263214292f7b87d15e14003d337","total":30}
{"status":"pulling ed11eda7790d","digest":"sha256:ed11eda7790d05b49395598a42b155812b17e263214292f7b87d15e14003d337","total":30}
{"status":"pulling ed11eda7790d","digest":"sha256:ed11eda7790d05b49395598a42b155812b17e263214292f7b87d15e14003d337","total":30}
{"status":"pulling ed11eda7790d","digest":"sha256:ed11eda7790d05b49395598a42b155812b17e263214292f7b87d15

### Confirm model is available in ollama server

In [46]:
%%bash
curl http://localhost:11434/api/tags

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed


100   572  100   572    0     0  71224      0 --:--:-- --:--:-- --:--:--  186k


{"models":[{"name":"mistral:latest","modified_at":"2024-01-17T05:26:48.488838612Z","size":4109865159,"digest":"61e88e884507ba5e06c49b40e6226884b2a16e872382c2b44a42f2d119d804a5","details":{"format":"gguf","family":"llama","families":["llama"],"parameter_size":"7B","quantization_level":"Q4_0"}},{"name":"zephyr:latest","modified_at":"2023-12-19T01:32:50.072888421Z","size":4108917840,"digest":"03af36d860cca7429f4ba5dff423111834424cb4513fbd85ec46d841762946ef","details":{"format":"gguf","family":"llama","families":null,"parameter_size":"7B","quantization_level":"Q4_0"}}]}

#### Use OllamaEmbeddings from Langchain

In [47]:
embeddings = OllamaEmbeddings(model="mistral", base_url="http://localhost:11434")

### Load JSON documents

In [48]:
loader = JSONLoader(
    file_path="./movies-small.json",
    jq_schema=".[] | {id: .id, overview: .overview, title: .title}",
    text_content=False,
)
documents = loader.load()
print("Loaded {} documents".format(len(documents)))

Loaded 22 documents


### Create Vector store

In [49]:
# Store documents in Meilisearch
vector_store = Meilisearch.from_documents(documents=documents, embedding=embeddings)

print("Started importing documents")

Started importing documents


In [50]:
client.get_indexes({'limit': 3})

{'results': [<meilisearch.index.Index at 0x10bc245e0>],
 'offset': 0,
 'limit': 3,
 'total': 1}

In [27]:
# client.delete_index('langchain-demo')

### Search

In [56]:
query = "any movies related to Pirates"
results = vector_store.similarity_search(
    query=query,
    k=5,
)

# Display results
for result in results:
    doc = json.loads(result.page_content)
    print(doc["id"], doc["title"])

22 Pirates of the Caribbean: The Curse of the Black Pearl
24 Kill Bill: Vol. 1
18 The Fifth Element
19 Metropolis
28 Apocalypse Now
