# Marqo Getting Started

> Run the following from your terminal

In [1]:
# !docker rm -f marqo
# !docker pull marqoai/marqo:latest
# !docker run --name marqo --privileged -p 8882:8882 --add-host host.docker.internal:host-gateway marqoai/marqo:latest
!docker run --detach --name marqo --privileged -p 8882:8882 --add-host host.docker.internal:host-gateway marqoai/marqo:latest

In [40]:
!docker logs marqo --tail 20

6a80f8e80048: Pulling fs layer
d612b5f8a653: Pulling fs layer
4f4fb700ef54: Waiting
6a80f8e80048: Waiting
d612b5f8a653: Waiting
237b91e13be3: Download complete
471574b8cfce: Download complete
4f4fb700ef54: Verifying Checksum
4f4fb700ef54: Download complete
6a80f8e80048: Verifying Checksum
6a80f8e80048: Download complete
d612b5f8a653: Verifying Checksum
d612b5f8a653: Download complete
e3cfe889ce0a: Verifying Checksum
e3cfe889ce0a: Download complete
e3cfe889ce0a: Pull complete
471574b8cfce: Pull complete
237b91e13be3: Pull complete
f83c242d5958: Verifying Checksum
f83c242d5958: Download complete


In [3]:
pip install marqo

Note: you may need to restart the kernel to use updated packages.


> Restart Kernel function

In [4]:
from IPython.display import display_html
def restartkernel() :
    display_html("<script>Jupyter.notebook.kernel.restart()</script>",raw=True)

In [5]:
restartkernel()

## Back to it

In [6]:
import marqo
mq = marqo.Client(url='http://localhost:8882')

In [7]:
mq.create_index("my-first-index")

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'my-first-index'}

In [8]:
mq.index("my-first-index").add_documents([
    {
        "Title": "The Travels of Marco Polo",
        "Description": "A 13th-century travelogue describing Polo's travels"
    }, 
    {
        "Title": "Extravehicular Mobility Unit (EMU)",
        "Description": "The EMU is a spacesuit that provides environmental protection, "
                       "mobility, life support, and communications for astronauts",
        "_id": "article_591"
    }],
    tensor_fields=["Title", "Description"]
)
        

{'errors': False,
 'processingTimeMs': 361.04777099990315,
 'index_name': 'my-first-index',
 'items': [{'_id': '710e111f-2854-43e2-8dc5-fd0f16e33c33',
   'result': 'created',
   'status': 201},
  {'_id': 'article_591', 'result': 'created', 'status': 201}]}

In [9]:
results = mq.index("my-first-index").search(
    q="What is the best outfit to wear on the moon?", searchable_attributes=["Title", "Description"]
)

In [10]:
# let's print out the results:
import pprint
pprint.pprint(results)

{'hits': [{'Description': 'The EMU is a spacesuit that provides environmental '
                          'protection, mobility, life support, and '
                          'communications for astronauts',
           'Title': 'Extravehicular Mobility Unit (EMU)',
           '_highlights': {'Description': 'The EMU is a spacesuit that '
                                          'provides environmental protection, '
                                          'mobility, life support, and '
                                          'communications for astronauts'},
           '_id': 'article_591',
           '_score': 0.6193894},
          {'Description': "A 13th-century travelogue describing Polo's travels",
           'Title': 'The Travels of Marco Polo',
           '_highlights': {'Title': 'The Travels of Marco Polo'},
           '_id': '710e111f-2854-43e2-8dc5-fd0f16e33c33',
           '_score': 0.60237324}],
 'limit': 10,
 'offset': 0,
 'processingTimeMs': 217,
 'query': 'What is the 

## Retrieve a document by id

In [11]:
result = mq.index("my-first-index").get_document(document_id="article_591")
pprint.pprint(result)


{'Description': 'The EMU is a spacesuit that provides environmental '
                'protection, mobility, life support, and communications for '
                'astronauts',
 'Title': 'Extravehicular Mobility Unit (EMU)',
 '_id': 'article_591'}


## Get index stats

In [12]:
results = mq.index("my-first-index").get_stats()
pprint.pprint(results)

{'numberOfDocuments': 2, 'numberOfVectors': 4}


## Lexical search

In [13]:
result = mq.index("my-first-index").search('marco polo', search_method=marqo.SearchMethods.LEXICAL)
pprint.pprint(result)

{'hits': [{'Description': "A 13th-century travelogue describing Polo's travels",
           'Title': 'The Travels of Marco Polo',
           '_highlights': [],
           '_id': '710e111f-2854-43e2-8dc5-fd0f16e33c33',
           '_score': 1.3260207}],
 'limit': 10,
 'offset': 0,
 'processingTimeMs': 63,
 'query': 'marco polo'}


## Specific field search

In [14]:
result = mq.index("my-first-index").search('adventure', searchable_attributes=['Title'])
pprint.pprint(result)

{'hits': [{'Description': "A 13th-century travelogue describing Polo's travels",
           'Title': 'The Travels of Marco Polo',
           '_highlights': {'Title': 'The Travels of Marco Polo'},
           '_id': '710e111f-2854-43e2-8dc5-fd0f16e33c33',
           '_score': 0.62348425},
          {'Description': 'The EMU is a spacesuit that provides environmental '
                          'protection, mobility, life support, and '
                          'communications for astronauts',
           'Title': 'Extravehicular Mobility Unit (EMU)',
           '_highlights': {'Title': 'Extravehicular Mobility Unit (EMU)'},
           '_id': 'article_591',
           '_score': 0.53939724}],
 'limit': 10,
 'offset': 0,
 'processingTimeMs': 61,
 'query': 'adventure'}


## Multi-modal and cross modal search

In [15]:
settings = {
    "treat_urls_and_pointers_as_images":True,   # allows us to find an image file and index it 
    "model":"ViT-L/14"
}

In [16]:
response = mq.create_index("my-multimodal-index", **settings)

In [17]:
response = mq.index("my-multimodal-index").add_documents([{
    "My Image": "https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png",
    "Description": "The hippopotamus, also called the common hippopotamus or river hippopotamus, is a large semiaquatic mammal native to sub-Saharan Africa",
    "_id": "hippo-facts"
}], tensor_fields=["My Image", "Description"])

In [18]:
results = mq.index("my-multimodal-index").search('animal')
pprint.pprint(results)

{'hits': [{'Description': 'The hippopotamus, also called the common '
                          'hippopotamus or river hippopotamus, is a large '
                          'semiaquatic mammal native to sub-Saharan Africa',
           'My Image': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
           '_highlights': {'Description': 'The hippopotamus, also called the '
                                          'common hippopotamus or river '
                                          'hippopotamus, is a large '
                                          'semiaquatic mammal native to '
                                          'sub-Saharan Africa'},
           '_id': 'hippo-facts',
           '_score': 0.82948834}],
 'limit': 10,
 'offset': 0,
 'processingTimeMs': 278,
 'query': 'animal'}


In [19]:
results = mq.index("my-multimodal-index").search('animal',  searchable_attributes=['My Image'])
pprint.pprint(results)

{'hits': [{'Description': 'The hippopotamus, also called the common '
                          'hippopotamus or river hippopotamus, is a large '
                          'semiaquatic mammal native to sub-Saharan Africa',
           'My Image': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
           '_highlights': {'My Image': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png'},
           '_id': 'hippo-facts',
           '_score': 0.57958966}],
 'limit': 10,
 'offset': 0,
 'processingTimeMs': 172,
 'query': 'animal'}


## Searching using an image

In [20]:
results = mq.index("my-multimodal-index").search('https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png')
pprint.pprint(results)

{'hits': [{'Description': 'The hippopotamus, also called the common '
                          'hippopotamus or river hippopotamus, is a large '
                          'semiaquatic mammal native to sub-Saharan Africa',
           'My Image': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png',
           '_highlights': {'My Image': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png'},
           '_id': 'hippo-facts',
           '_score': 0.8334509}],
 'limit': 10,
 'offset': 0,
 'processingTimeMs': 1568,
 'query': 'https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_statue.png'}


## Searching using weights in queries

In [21]:
import marqo
import pprint
mq = marqo.Client(url="http://localhost:8882")
mq.create_index("my-weighted-query-index")


{'acknowledged': True,
 'shards_acknowledged': True,
 'index': 'my-weighted-query-index'}

In [22]:
mq.index("my-weighted-query-index").add_documents(
    [
        {
            "Title": "Smartphone",
            "Description": "A smartphone is a portable computer device that combines mobile telephone "
            "functions and computing functions into one unit.",
        },
        {
            "Title": "Telephone",
            "Description": "A telephone is a telecommunications device that permits two or more users to"
            "conduct a conversation when they are too far apart to be easily heard directly.",
        },
        {
            "Title": "Thylacine",
            "Description": "The thylacine, also commonly known as the Tasmanian tiger or Tasmanian wolf, "
            "is an extinct carnivorous marsupial."
            "The last known of its species died in 1936.",
        },
    ],
    tensor_fields=["Title", "Description"]
)

{'errors': False,
 'processingTimeMs': 322.9877530000067,
 'index_name': 'my-weighted-query-index',
 'items': [{'_id': '7debd37d-2f5b-45d9-8849-944e168786e4',
   'result': 'created',
   'status': 201},
  {'_id': 'a32133f6-8003-4e9d-ab12-617e41ef16e5',
   'result': 'created',
   'status': 201},
  {'_id': '93a034a5-b686-445c-9a3b-ac52a04aa79f',
   'result': 'created',
   'status': 201}]}

In [23]:
# initially we ask for a type of communications device which is popular in the 21st century
query = {
    # a weighting of 1.1 gives this query slightly more importance
    "I need to buy a communications device, what should I get?": 1.1,
    # a weighting of 1 gives this query a neutral importance
    "Technology that became prevelant in the 21st century": 1.0,
}

In [24]:
results = mq.index("my-weighted-query-index").search(
    q=query, searchable_attributes=["Title", "Description"]
)

print("Query 1:")
pprint.pprint(results)

Query 1:
{'hits': [{'Description': 'A smartphone is a portable computer device that '
                          'combines mobile telephone functions and computing '
                          'functions into one unit.',
           'Title': 'Smartphone',
           '_highlights': {'Title': 'Smartphone'},
           '_id': '7debd37d-2f5b-45d9-8849-944e168786e4',
           '_score': 0.7332566},
          {'Description': 'A telephone is a telecommunications device that '
                          'permits two or more users toconduct a conversation '
                          'when they are too far apart to be easily heard '
                          'directly.',
           'Title': 'Telephone',
           '_highlights': {'Title': 'Telephone'},
           '_id': 'a32133f6-8003-4e9d-ab12-617e41ef16e5',
           '_score': 0.7094748},
          {'Description': 'The thylacine, also commonly known as the Tasmanian '
                          'tiger or Tasmanian wolf, is an extinct carnivorous 

In [25]:
# now we ask for a type of communications which predates the 21st century
query = {
    # a weighting of 1 gives this query a neutral importance
    "I need to buy a communications device, what should I get?": 1.0,
    # a weighting of -1 gives this query a negation effect
    "Technology that became prevelant in the 21st century": -1.0,
}

results = mq.index("my-weighted-query-index").search(
    q=query, searchable_attributes=["Title", "Description"]
)

print("\nQuery 2:")
pprint.pprint(results)


Query 2:
{'hits': [{'Description': 'A telephone is a telecommunications device that '
                          'permits two or more users toconduct a conversation '
                          'when they are too far apart to be easily heard '
                          'directly.',
           'Title': 'Telephone',
           '_highlights': {'Description': 'A telephone is a telecommunications '
                                          'device that permits two or more '
                                          'users toconduct a conversation when '
                                          'they are too far apart to be easily '
                                          'heard directly.'},
           '_id': 'a32133f6-8003-4e9d-ab12-617e41ef16e5',
           '_score': 0.6079516},
          {'Description': 'A smartphone is a portable computer device that '
                          'combines mobile telephone functions and computing '
                          'functions into one unit.',
  

## Creating and searching indexes with multimodal combination fields

In [26]:
import marqo
import pprint

mq = marqo.Client(url="http://localhost:8882")

settings = {"treat_urls_and_pointers_as_images": True, "model": "ViT-L/14"}

In [27]:
mq.create_index("my-first-multimodal-index", **settings)

{'acknowledged': True,
 'shards_acknowledged': True,
 'index': 'my-first-multimodal-index'}

In [28]:
mq.index("my-first-multimodal-index").add_documents(
    [
        {
            "Title": "Flying Plane",
            "captioned_image": {
                "caption": "An image of a passenger plane flying in front of the moon.",
                "image": "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg",
            },
        },
        {
            "Title": "Red Bus",
            "captioned_image": {
                "caption": "A red double decker London bus traveling to Aldwych",
                "image": "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image4.jpg",
            },
        },
        {
            "Title": "Horse Jumping",
            "captioned_image": {
                "caption": "A person riding a horse over a jump in a competition.",
                "image": "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image1.jpg",
            },
        },
    ],
    # Create the mappings, here we define our captioned_image mapping 
    # which weights the image more heavily than the caption - these pairs 
    # will be represented by a single vector in the index
    mappings={
        "captioned_image": {
            "type": "multimodal_combination",
            "weights": {
                "caption": 0.3,
                "image": 0.7,
            },
        }
    },
    # We specify which fields to create vectors for. 
    # Note that captioned_image is treated as a single field.
    tensor_fields=["Title", "captioned_image"]
)

{'errors': False,
 'processingTimeMs': 3663.3276999998543,
 'index_name': 'my-first-multimodal-index',
 'items': [{'_id': '69f2197c-6b4b-4726-8703-d11a23094821',
   'result': 'created',
   'status': 201},
  {'_id': '32ea62f9-2058-4bb0-baea-59e45adb092a',
   'result': 'created',
   'status': 201},
  {'_id': 'e34be7fa-8e02-4ff6-8bee-fc2c9ea67668',
   'result': 'created',
   'status': 201}]}

In [29]:
# Search this index with a simple text query
results = mq.index("my-first-multimodal-index").search(
    q="Give me some images of vehicles and modes of transport. I am especially interested in air travel and commercial aeroplanes.",
    searchable_attributes=["captioned_image"],
)

In [30]:
print("Query 1:")
pprint.pprint(results)

Query 1:
{'hits': [{'Title': 'Flying Plane',
           '_highlights': {'captioned_image': '{"caption": "An image of a '
                                              'passenger plane flying in front '
                                              'of the moon.", "image": '
                                              '"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg"}'},
           '_id': '69f2197c-6b4b-4726-8703-d11a23094821',
           '_score': 0.6611588,
           'captioned_image': {'caption': 'An image of a passenger plane '
                                          'flying in front of the moon.',
                               'image': 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg'}},
          {'Title': 'Red Bus',
           '_highlights': {'captioned_image': '{"caption": "A red double '
                                              'decker London bus traveling to '
  

In [31]:
# search the index with a query that uses weighted components
results = mq.index("my-first-multimodal-index").search(
    q={
        "What are some vehicles and modes of transport?": 1.0,
        "Aeroplanes and other things that fly": -1.0,
    },
    searchable_attributes=["captioned_image"],
)
print("\nQuery 2:")
pprint.pprint(results)


Query 2:
{'hits': [{'Title': 'Red Bus',
           '_highlights': {'captioned_image': '{"caption": "A red double '
                                              'decker London bus traveling to '
                                              'Aldwych", "image": '
                                              '"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image4.jpg"}'},
           '_id': '32ea62f9-2058-4bb0-baea-59e45adb092a',
           '_score': 0.5551582,
           'captioned_image': {'caption': 'A red double decker London bus '
                                          'traveling to Aldwych',
                               'image': 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image4.jpg'}},
          {'Title': 'Horse Jumping',
           '_highlights': {'captioned_image': '{"caption": "A person riding a '
                                              'horse over a jump in a '
                 

In [32]:
results = mq.index("my-first-multimodal-index").search(
    q={"Animals of the Perissodactyla order": -1.0},
    searchable_attributes=["captioned_image"],
)
print("\nQuery 3:")
pprint.pprint(results)


Query 3:
{'hits': [{'Title': 'Flying Plane',
           '_highlights': {'captioned_image': '{"caption": "An image of a '
                                              'passenger plane flying in front '
                                              'of the moon.", "image": '
                                              '"https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg"}'},
           '_id': '69f2197c-6b4b-4726-8703-d11a23094821',
           '_score': 0.4383292,
           'captioned_image': {'caption': 'An image of a passenger plane '
                                          'flying in front of the moon.',
                               'image': 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image2.jpg'}},
          {'Title': 'Red Bus',
           '_highlights': {'captioned_image': '{"caption": "A red double '
                                              'decker London bus traveling to '
 

## Delete documents

In [33]:
results = mq.index("my-first-index").delete_documents(ids=["article_591", "article_602"])
pprint.pprint(results)

{'details': {'deletedDocuments': 1, 'receivedDocumentIds': 2},
 'duration': 'PT0.080698S',
 'finishedAt': '2023-08-18T11:29:35.348405Z',
 'index_name': 'my-first-index',
 'startedAt': '2023-08-18T11:29:35.267707Z',
 'status': 'succeeded',
 'type': 'documentDeletion'}


## Delete index

In [34]:
results = mq.index("my-first-index").delete()
pprint.pprint(results)

{'acknowledged': True}


## Cleanup

In [35]:
results = mq.index("my-first-multimodal-index").delete()
pprint.pprint(results)

{'acknowledged': True}


In [36]:
results = mq.index("my-weighted-query-index").delete()
pprint.pprint(results)

{'acknowledged': True}


In [37]:
results = mq.index("my-multimodal-index").delete()
pprint.pprint(results)

{'acknowledged': True}


# Fin.