In [None]:
# default_exp pod.client
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Pod Client

In [None]:
# export
from pymemri.data.basic import *
from pymemri.data.schema import *
from pymemri.data.itembase import Edge, ItemBase, Item
from pymemri.imports import *
from hashlib import sha256
from pymemri.pod.db import DB
from pymemri.pod.utils import *
from pymemri.plugin.schema import *
from pymemri.test_utils import get_ci_variables
from pymemri.pod.api import PodAPI, PodError, DEFAULT_POD_ADDRESS, POD_VERSION

from typing import List, Union
import uuid
import urllib
from datetime import datetime

from threading import Thread

In [None]:
# export
class PodClient:
    # Mapping from python type to schema type
    # TODO move to data.schema once schema is refactored
    TYPE_TO_SCHEMA = {
        bool: "Bool",
        str: "Text",
        int: "Integer",
        float: "Real",
        datetime: "DateTime",
    }

    def __init__(
        self,
        url=DEFAULT_POD_ADDRESS,
        version=POD_VERSION,
        database_key=None,
        owner_key=None,
        auth_json=None,
        register_base_schema=True,
        verbose=False,
    ):
        self.verbose = verbose
        self.database_key = (
            database_key if database_key is not None else self.generate_random_key()
        )
        self.owner_key = (
            owner_key if owner_key is not None else self.generate_random_key()
        )
        self.api = PodAPI(
            database_key=self.database_key,
            owner_key=self.owner_key,
            url=url,
            version=version,
            auth_json=auth_json,
            verbose=verbose,
        )
        self.api.test_connection()
        self.local_db = DB()
        self.registered_classes = dict()
        self.register_base_schemas()

    @classmethod
    def from_local_keys(cls, path=DEFAULT_POD_KEY_PATH, **kwargs):
        return cls(
            database_key=read_pod_key("database_key"),
            owner_key=read_pod_key("owner_key"),
            **kwargs,
        )

    @staticmethod
    def generate_random_key():
        return "".join([str(random.randint(0, 9)) for i in range(64)])

    def register_base_schemas(self):
        assert self.add_to_schema(PluginRun, CVUStoredDefinition, Account, Photo)

    def add_to_db(self, node):
        existing = self.local_db.get(node.id)
        if existing is None and node.id is not None:
            self.local_db.add(node)

    def reset_local_db(self):
        self.local_db = DB()

    def get_create_dict(self, node):
        properties = node.to_json()
        properties = {k: v for k, v in properties.items() if v != []}
        return properties

    def create(self, node):
        create_dict = self.get_create_dict(node)
        try:
            result = self.api.create_item(create_dict)
            node.id = result
            self.add_to_db(node)
            node.on_sync_to_pod(self)
            return True
        except Exception as e:
            print(e)
            return False

    def create_photo(self, photo, asyncFlag=True):
        file = photo.file[0]
    
        # create the photo
        items_edges_success = self.bulk_action(
            create_items=[photo, file], create_edges=photo.get_edges("file")
        )
        if not items_edges_success:
            raise ValueError("Could not create file or photo item")
            
        return self._upload_image(photo.data, asyncFlag=asyncFlag)

    def _property_dicts_from_instance(self, node):
        create_items = []
        attributes = node.to_json()
        for k, v in attributes.items():
            if type(v) not in self.TYPE_TO_SCHEMA:
                raise ValueError(f"Could not add property {k} with type {type(v)}")
            value_type = self.TYPE_TO_SCHEMA[type(v)]

            create_items.append(
                {
                    "type": "ItemPropertySchema",
                    "itemType": attributes["type"],
                    "propertyName": k,
                    "valueType": value_type,
                }
            )
        return create_items

    def _property_dicts_from_type(self, item):
        create_items = []
        for property, p_type in item.get_property_types().items():
            p_type = self.TYPE_TO_SCHEMA[p_type]
            create_items.append(
                {
                    "type": "ItemPropertySchema",
                    "itemType": item.__name__,
                    "propertyName": property,
                    "valueType": p_type,
                }
            )
        return create_items

    def add_to_schema(self, *items: List[Union[object, type]]):
        create_items = []

        for item in items:
            if isinstance(item, type):
                property_dicts = self._property_dicts_from_type(item)
            else:
                property_dicts = self._property_dicts_from_instance(item)
                item = type(item)
            create_items.extend(property_dicts)
            self.registered_classes[item.__name__] = item

        try:
            self.api.bulk(create_items=create_items)
            return True
        except Exception as e:
            print(e)
            return False

    def _upload_image(self, img, asyncFlag=True, callback=None):
        if isinstance(img, np.ndarray):
            return self.upload_file(img.tobytes(), asyncFlag=asyncFlag, callback=callback)
        elif isinstance(img, bytes):
            return self.upload_file(img, asyncFlag=asyncFlag, callback=callback)
        else:
            raise ValueError(f"Unknown image data type {type(img)}")

    def upload_file(self, file, asyncFlag=True, callback=None):
        if asyncFlag:
            return self.upload_file_async(file, callback=callback)
        try:
            self.api.upload_file(file)
            return True
        except PodError as e:
            # 409 = CONFLICT, file already exists
            if e.status == 409:
                return True
            return False

    def upload_file_async(self, file, callback=None):
        def thread_fn(file, callback):
            result = self.upload_file(file, asyncFlag=False)
            if callback:
                callback(result)
        Thread(target=thread_fn, args=(file, callback)).start()
        return True

    def get_file(self, sha):
        return self.api.get_file(sha)

    def get_photo(self, id, size=640):
        photo = self.get(id)
        self._load_photo_data(photo, size=size)
        return photo

    def _load_photo_data(self, photo, size=None):
        if len(photo.file) > 0 and photo.data is None:
            file = self.get_file(photo.file[0].sha256)
            if file is None:
                print(
                    f"Could not load data of {photo} attached file item does not have data in pod"
                )
                return
            photo.data = file
        else:
            print(f"could not load data of {photo}, no file attached")

    def create_if_external_id_not_exists(self, node):
        if not self.external_id_exists(node):
            self.create(node)

    def external_id_exists(self, node):
        if node.externalId is None:
            return False
        existing = self.search({"externalId": node.externalId})
        return len(existing) > 0

    def create_edges(self, edges):
        return self.bulk_action(create_edges=edges)

    def delete_items(self, items):
        return self.bulk_action(delete_items=items)
    def delete_all(self):
        items = self.get_all_items()
        self.delete_items(items)

    @staticmethod
    def gather_batch(items, start_idx, start_size=0, max_size=5000000):
        idx = start_idx
        total_size = start_size
        batch_items = []
        for i, x in enumerate(items):
            if i < idx:
                continue
            elif len(str(x)) > max_size:
                idx = i + 1
                print("Could not add item: Item exceeds max item size")
            elif total_size + len(str(x)) < max_size:
                batch_items.append(x)
                total_size += len(str(x))
                idx = i + 1
            else:
                break
        return batch_items, idx, total_size

    def bulk_action(
        self, create_items=None, update_items=None, create_edges=None, delete_items=None, partial_update=True
    ):
        all_items = []
        if create_items:
            all_items += create_items
        if update_items:
            all_items += update_items
        for item in all_items:
            item._set_client(self)

        # we need to set the id to not lose the reference
        if create_items is not None:
            for c in create_items:
                if c.id is None:
                    c.id = uuid.uuid4().hex

        create_items = (
            [self.get_create_dict(i) for i in create_items]
            if create_items is not None
            else []
        )
        update_items = (
            [self.get_update_dict(i, partial_update=partial_update) for i in update_items]
            if update_items is not None
            else []
        )
        create_edges = (
            [self.get_create_edge_dict(i) for i in create_edges]
            if create_edges is not None
            else []
        )
        # Note: skip delete_items without id, as items that are not in pod cannot be deleted
        delete_items = (
            [item.id for item in delete_items if item.id is not None]
            if delete_items is not None
            else []
        )

        n_total = len(create_items + update_items + create_edges + delete_items)
        n = 0

        i_ci, i_ui, i_ce, i_di = 0, 0, 0, 0
        while not (
            i_ci == len(create_items)
            and i_ui == len(update_items)
            and i_ce == len(create_edges)
            and i_di == len(delete_items)
        ):
            batch_size = 0
            create_items_batch, i_ci, batch_size = self.gather_batch(
                create_items, i_ci, start_size=batch_size
            )
            update_items_batch, i_ui, batch_size = self.gather_batch(
                update_items, i_ui, start_size=batch_size
            )
            delete_items_batch, i_di, batch_size = self.gather_batch(
                delete_items, i_di, start_size=batch_size
            )
            if i_ci == len(create_items):
                create_edges_batch, i_ce, batch_size = self.gather_batch(
                    create_edges, i_ce, start_size=batch_size
                )
            else:
                create_edges_batch = []
            n_batch = len(
                create_items_batch
                + update_items_batch
                + create_edges_batch
                + delete_items_batch
            )
            n += n_batch
            print(f"BULK: Writing {n}/{n_total} items/edges")

            try:
                result = self.api.bulk(
                    create_items_batch,
                    update_items_batch,
                    create_edges_batch,
                    delete_items_batch,
                )
            except PodError as e:
                print(e)
                print("could not complete bulk action, aborting")
                return False
        print(f"Completed Bulk action, written {n} items/edges")
        
        for item in all_items:
            item.on_sync_to_pod(self)
        return True
    
    def get_create_edge_dict(self, edge):
        return {"_source": edge.source.id, "_target": edge.target.id, "_name": edge._type}

    def create_edge(self, edge):
        edge_dict = self.get_create_edge_dict(edge)

        try:
            self.api.create_edge(edge_dict)
            return True
        except PodError as e:
            print(e)
            return False

    def get(self, id, expanded=True, include_deleted=False):
        if not expanded:
            res = self._get_item_with_properties(id)
        else:
            res = self._get_item_expanded(id)
        if res is None:
            raise ValueError(f"Item with id {id} does not exist")
        elif res.deleted and not include_deleted:
            print(f"Item with id {id} has been deleted")
            return
        return res

    def get_all_items(self):
        raise NotImplementedError()

    def filter_deleted(self, items):
        return [i for i in items if not i.deleted == True]

    def _get_item_expanded(self, id, include_deleted=False):
        item = self.get(id, expanded=False, include_deleted=include_deleted)
        edges = self.get_edges(id)
        for e in edges:
            item.add_edge(e["name"], e["item"])
        return item

    def get_edges(self, id):
        try:
            result = self.api.get_edges(id)
            for d in result:
                d["item"] = self.item_from_json(d["item"])
            return result
        except PodError as e:
            print(e)
            return

    def _get_item_with_properties(self, id):
        try:
            result = self.api.get_item(str(id))
            if not len(result):
                return
            return self.item_from_json(result[0])
        except PodError as e:
            print(e)
            return

    def get_update_dict(self, node, partial_update=True):
        properties = node.to_json(dates=False)
        properties.pop("type", None)
        properties.pop("deleted", None)
        properties = {k: v for k, v in properties.items() if k=="id" or k in node._updated_properties}
        return properties

    def update_item(self, node, partial_update=True):
        data = self.get_update_dict(node, partial_update=partial_update)
        try:
            self.api.update_item(data)
            node.on_sync_to_pod(self)
            return True
        except PodError as e:
            print(e)
            return False

    def exists(self, id):
        try:
            result = self.api.get_item(str(id))
            if isinstance(result, list) and len(result) > 0:
                return True
            return False
        except PodError as e:
            print(e)
            return False

    def search_paginate(self, fields_data, limit=50, include_edges=True, add_to_local_db: bool = True):
        if "ids" in fields_data:
            raise NotImplementedError("Searching by multiple IDs is not implemented for paginated search.")
        
        extra_fields = {"[[edges]]": {}} if include_edges else {}
        query = {**fields_data, **extra_fields}
        
        try:
            for page in self.api.search_paginate(query, limit):
                result = [self._item_from_search(item, add_to_local_db=add_to_local_db) for item in page]
                yield self.filter_deleted(result)
        except PodError as e:
            print(e)

    def search(self, fields_data, include_edges: bool = True, add_to_local_db: bool = True):
        extra_fields = {"[[edges]]": {}} if include_edges else {}
        query = {**fields_data, **extra_fields}
        
        # Special key "ids" for searching a list of ids.
        # Requires /bulk search instead of /search.
        if "ids" in query:
            ids = query.pop("ids")
            bulk_query = [{"id": uid, **query} for uid in ids]
            try:
                result = self.api.bulk(search=bulk_query)["search"]
            except PodError as e:
                print(e)
    
            result = [item for sublist in result for item in sublist]
        else:
            try:
                result = self.api.search(query)
            except PodError as e:
                print(e)

        result = [self._item_from_search(item, add_to_local_db=add_to_local_db) for item in result]
        return self.filter_deleted(result)

    def _item_from_search(self, item_json: dict, add_to_local_db: bool = True):
        # search returns different fields w.r.t. edges compared to `get` api,
        # different method to keep `self.get` clean.
        item = self.item_from_json(item_json)

        for edge_json in item_json.get("[[edges]]", []):
            edge_name = edge_json["_edge"]
            try:
                edge_item = self.item_from_json(edge_json["_item"])
                item.add_edge(edge_name, edge_item)
            except Exception as e:
                print(f"Could not attach edge {edge_json['_item']} to {item}")
                print(e)
                continue
        return item

    def search_last_added(self, type=None, with_prop=None, with_val=None):
        query = {"_limit": 1, "_sortOrder": "Desc"}
        if type is not None:
            query["type"] = type
        if with_prop is not None:
            query[f"{with_prop}=="] = with_val
        return self.search(query)[0]

    def item_from_json(self, json, add_client_ref: bool = True, from_pod: bool = True):
        plugin_class = json.get("pluginClass", None)
        plugin_package = json.get("pluginPackage", None)

        constructor = get_constructor(
            json["type"],
            plugin_class,
            plugin_package=plugin_package,
            extra=self.registered_classes,
        )
        new_item = constructor.from_json(json)
        existing = self.local_db.get(new_item.id)
        # TODO: cleanup
        if existing is not None:
            if not existing.is_expanded() and new_item.is_expanded():
                for edge_name in new_item.get_all_edge_names():
                    edges = new_item.get_edges(edge_name)
                    for e in edges:
                        e.source = existing
                    existing.__setattr__(edge_name, edges)

            for prop_name in new_item.get_property_names():
                existing.__setattr__(prop_name, new_item.__getattribute__(prop_name))
            result = existing
        else:
            result = new_item
        
        result._set_client(self) if add_client_ref else None
        if from_pod:
            result._in_pod = True
            result._updated_properties = set()
        return result

    def get_properties(self, expanded):
        properties = copy(expanded)
        if ALL_EDGES in properties:
            del properties[ALL_EDGES]
        return properties

    def send_email(self, to, subject="", body=""):
        try:
            self.api.send_email(to, subject, body)
            print(f"succesfully sent email to {to}")
            return True
        except PodError as e:
            print(e)
            return False

Pymemri communicates with the pod via the `PodClient`. The PodClient requires you to provide a [database key](https://gitlab.memri.io/memri/pod/-/blob/dev/docs/HTTP_API.md#user-content-api-authentication-credentials) and an [owner key](https://gitlab.memri.io/memri/pod/-/blob/dev/docs/HTTP_API.md#user-content-api-authentication-credentials). During development, you don't have to worry about these keys, you can just omit the keys when initializing the `PodClient`, which creates a new user by defining random keys.

If you want to use the same keys for different `PodClient` instances, you can store a random key pair locally with the `store_keys` CLI, and create a new client with `PodClient.from_local_keys()`. When you are using the app, setting the keys in the pod, and passing them when calling a plugin is handled for you by the app itself.

In [None]:
client = PodClient()
client.registered_classes["Photo"]

ValueError: <class 'pymemri.data.itembase.ItemBase'> is not in list

In [None]:
# hide
success = client.api.test_connection()
assert success

## Creating Items and Edges

Now that we have access to the pod, we can create items here and upload them to the pod. All items are defined in the schema of the pod. To create an item in the pod, you have to add the schema first. Schemas can be added as follows

In [None]:
from pymemri.data.schema import EmailMessage, Address, PhoneNumber
succes = client.add_to_schema(EmailMessage, Address, PhoneNumber)

ValueError: <class 'pymemri.data.itembase.ItemBase'> is not in list

In [None]:
# hide
assert succes

We can now create an item with one of the above item definitions. As a side-effect, our item will be assigned an id.

In [None]:
email_item = EmailMessage.from_data(content="example content field")
client.create(email_item)
print(email_item.id)

a94ef10fab243e30ba88ff856c7e1464


The types of items in the pod are not limited to definitions to the pymemri schema. We can easily define our own types, or overwrite existing item definitions with the same `add_to_schema` method.

Note that all keyword arguments need to be added to the `properties` class variable to let the pod know what the properties of our item are. Additionally, properties in the Pod are statically typed, and have to be inferred from type the annotations of our `__init__` method.

In [None]:
# export
class Dog(Item):
    properties = Item.properties + ["name", "age", "bites", "weight"]

    def __init__(self, name: str=None, age: int=None, bites: bool=False, weight: float=None, **kwargs):
        super().__init__(**kwargs)
        self.name = name
        self.age = age
        self.bites = bites
        self.weight = weight

In [None]:
client.add_to_schema(Dog)
dog2 = Dog(name="bob", age=3, weight=33.2)
client.create(dog2);

In [None]:
# hide
client.reset_local_db()
dog_from_db = client.get(dog2.id)

assert dog_from_db.name == "bob"
assert dog_from_db.age == 3
assert dog_from_db.weight == 33.2

We can connect items using edges. Let's create another item, a person, and connect the email and the person.

In [None]:
person_item = Person.from_data(firstName="Alice", lastName="X")
succes = client.add_to_schema(person_item)

In [None]:
# hide
assert succes

In [None]:
person_item = Person.from_data(firstName="Alice", lastName="X")
item_succes = client.create(person_item)
edge = Edge(email_item, person_item, "sender")
edge_succes = client.create_edge(edge)

print(client.get_edges(email_item.id))

[{'item': Person (#c0050fe0b4cde00fa97f7bbab7a54c7f), 'name': 'sender'}]


In [None]:
# hide
assert item_succes
assert edge_succes

If we use the normal `client.get` (without `expanded=False`), we also get items directly connected to the Item.

In [None]:
email_from_db = client.get(email_item.id) 
print(email_from_db.sender)

[Person (#c0050fe0b4cde00fa97f7bbab7a54c7f)]


In [None]:
# hide
assert isinstance(email_from_db.sender[0], Person)

# Fetching and updating Items

## Normal Items

We can use the client to fetch data from the database. This is in particular useful for indexers, which often use data in the database as input for their models. The simplest form  of querying the database is by querying items in the pod by their id (unique identifier).

In [None]:
person_item = Person.from_data(firstName="Alice")
client.create(person_item)

# Retrieve person from Pod
person_from_db = client.get(person_item.id, expanded=False)

In [None]:
# hide
assert person_from_db is not None
assert person_from_db == person_item
assert person_from_db.id is not None

# item._in_pod check
other_person = Person(firstName="test")
other_person._in_pod = False
client.create(other_person)
assert other_person._in_pod

Appart from creating, we might want to update existing items:

In [None]:
person_item.lastName = "Awesome"
client.update_item(person_item)
person_from_db = client.get(person_item.id, expanded=False)
print(person_from_db.lastName)

Awesome


In [None]:
# hide
assert person_from_db.lastName == "Awesome"

In [None]:
# hide
assert person_from_db._in_pod

# Test partial updating
assert len(person_from_db._updated_properties) == 0

# Change property
person_from_db.displayName = "AliceAwesome"
assert "displayName" in person_from_db._updated_properties

client.update_item(person_from_db)
assert len(person_from_db._updated_properties) == 0

# Change non-property
person_from_db.non_property = "test"
assert len(person_from_db._updated_properties) == 0

# Empty update
print(list(client.get_update_dict(person_from_db).keys()) == ["id"])
assert client.update_item(person_from_db)

True


When we don't know the ids of the items we want to fetch, we can also search by property. We can use this for instance when we want to query all items from a particular type to perform some indexing on. We can get all `Person` Items from the db by:

## Search

the `PodClient` can search through the pod with the `search` or `search_paginate` methods, which return the results of a search as a list or generator respectively. Search uses the same arguments as the Pod search API, which can be found [here](https://gitlab.memri.io/memri/pod/-/blob/dev/docs/HTTP_API.md#post-v4owner_keysearch).

To display how search works, we first add a few new items

In [None]:
person_item2 = Person.from_data(firstName="Bob")
person_account = Account(service="testService")
client.create(person_item2)
client.create(person_account)

person_item2.add_edge("account", person_account)
client.create_edges(person_item2.get_edges("account"));

BULK: Writing 1/1 items/edges
Completed Bulk action, written 1 items/edges


In [None]:
# hide
client.reset_local_db()

In [None]:
# Search for all Persons in the pod
all_people = client.search({"type": "Person"}, include_edges=True)
print("Number of results:", len(all_people))

Number of results: 4


In [None]:
# hide
assert all([isinstance(p, Person) for p in all_people]) and len(all_people) > 0
assert any([len(p.account) for p in all_people])
assert all([item._in_pod for item in all_people])

In [None]:
# hide
# search without returning edges
all_people = client.search({"type": "Person"}, include_edges=False)
assert len(all_people)
assert([len(person.get_all_edges())==0 for person in all_people])

In [None]:
# hide
assert len(all_people)

In [None]:
# hide
# Search with edges
all_people = client.search({"type": "Person"}, include_edges=True)

assert all([isinstance(p, Person) for p in all_people]) and len(all_people) > 0
assert any([len(p.account) for p in all_people])

In [None]:
# Search by IDs
ids = [person.id for person in all_people]
result = client.search({"ids": ids})
assert [item.id for item in result] == ids

To hande large volumes of Items, the `PodClient.search_paginate` method can search through the pod and return a generator which yields batches of items. This method uses the same search arguments as the `search` method:

In [None]:
# Create 100 accounts to search
client.bulk_action(
    create_items=[
        Account(identifier=str(i), service="paginate_test") for i in range(100)
    ]
)

generator = client.search_paginate({"type": "Account", "service": "paginate_test"}, limit=10)
for page in generator:
    # process accounts
    pass

BULK: Writing 100/100 items/edges
Completed Bulk action, written 100 items/edges


In [None]:
# hide
# Test pagination
accounts = client.search({"type": "Account", "service": "paginate_test"})

generator = client.search_paginate({"type": "Account", "service": "paginate_test"}, limit=10)
accounts_paginated = []
for page in generator:
    accounts_paginated.extend(page)

assert len(accounts_paginated) == 100
assert [a.id for a in accounts] == [a.id for a in accounts_paginated]
assert all(a._in_pod for a in accounts_paginated)

In [None]:
# hide
# Search, edge cases

result = client.search({"type": "Account", "service": "NonExistentService"})
assert result == []

paginator = client.search_paginate({"type": "Account", "service": "NonExistentService"})

try:
    next(paginator)
except Exception as e:
    assert isinstance(e, StopIteration)

## Search last added items

In [None]:
person_item2 = Person.from_data(firstName="Last Person")
client.create(person_item2)
last_added = client.search_last_added(type="Person")

In [None]:
# hide
assert last_added.firstName == "Last Person"

In the near future, Pod will support searching by user defined properties as well. This will allow for the following. **warning, this is currently not supported**

```client.search_last_added(type="Person", with_prop="ImportedBy", with_val="EmailImporter")```

## Uploading & downloading files

### File API

To work with files like Photos or Videos, the `PodClient` has a separate file api. This api works by posting a blob to the `upload_file` endpoint, and creating an Item with a property with the same sha256 as the sha used in the endpoint.

For example, we can upload a photo with the file API as follows:

In [None]:
x = np.random.randint(0, 255+1, size=(640, 640), dtype=np.uint8)
photo = Photo.from_np(x)
file = photo.file[0]
succes = client.create(file)
succes2 = client._upload_image(photo.data)

cb = lambda res: print(res)
succes3 = client._upload_image(photo.data, asyncFlag=True, callback=cb)

TypeError: super(type, obj): obj must be an instance or subtype of type

In [None]:
# hide
assert succes
assert succes2

data = client.get_file(file.sha256)
photo.data = data

arr = photo.to_np()
assert (arr == x).all()

Exception in thread Thread-5:
Traceback (most recent call last):
  File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-3-382a129c03c6>", line 168, in thread_fn
  File "/home/locobel/Documents/memri/pymemri/pymemri/pod/api.py", line 182, in upload_file
    raise PodError(result.status_code, result.text)
pymemri.pod.api.PodError: 409 Failure: File already exists


### Photo API

The PodClient implements an easier API for photos separately, which uses the same file API under the hood

In [None]:
print(client.registered_classes["Photo"])

# client.add_to_schema(Photo)

x = np.random.randint(0, 255+1, size=(640, 640), dtype=np.uint8)
photo = Photo.from_np(x)
client.create_photo(photo);

photo.file

<class 'pymemri.data.photo.Photo'>
BULK: Writing 3/3 items/edges
Completed Bulk action, written 3 items/edges


[File (#c99b9f7fddee414aa08f0edcfc4e0876)]

In [None]:
# hide
assert photo._in_pod
res = client.get_photo(photo.id)
print(res.id)
res.file[0].sha256
assert (res.to_np() == x).all()

67127f61161c4ec690b3e5db475a9ce0


Some photos come as bytes, for example when downloading them from a third party service. We can use `photo.from_bytes` to initialize these photos:

In [None]:
byte_photo = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xe1\x00\x00\x00\xe1\x08\x03\x00\x00\x00\tm"H\x00\x00\x003PLTE\x04\x02\x04\x00\x00\x00\xa0\xa0\xa0\xa3\xa3\xa3\xaa\xaa\xaa\xb4\xb4\xb4\xbd\xbe\xbd\xbb\xbc\xbb\xde\xde\xde\x9b\x9a\x9b\xfe\xfe\xfe\xf2\xf3\xf2\xe5\xe6\xe5\xd8\xd9\xd8\xd1\xd1\xd1\xc9\xca\xc9\xae\xae\xae\x80k\x98\xfc\x00\x00\x01TIDATx\x9c\xed\xdd;r\xc2P\x00\x04A!\x90\x84\xfc\x01\xee\x7fZ\x138\xb1\x13S\xceF\xaf\xfb\x06\x93o\xd5No\xef\x1f\x9f\xb7\xfb}]\xd7my\xba|;\xff4\xff\xdf\xf9O\x97W<\x96W\xac\xbfm\xd7i9\x1d\xdb\xfe,\x9c\x8e\xec4+\xac{\x16^\x14\xb6)\xecS\xd8\xa7\xb0Oa\x9f\xc2\xbe!\n\xcf\n\xdb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}C\x14\xce\n\xdb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd87\xc4bHa\x9c\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x86xaQ\x18\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd87D\xe1\xe3\xf0\x85\x8b\xc26\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}C\x14\xae\n\xdb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}C\x14n\xa7c\xdb\xa7\xeb>\x1f\xd9~\xfb\x02\xee\x7f\r\xe5\xe1h\x04"\x00\x00\x00\x00IEND\xaeB`\x82'
photo = Photo.from_bytes(byte_photo)
client.create_photo(photo);

BULK: Writing 3/3 items/edges
Completed Bulk action, written 3 items/edges


In [None]:
# hide
# Test on a new client to prevent caching
new_client = PodClient(database_key=client.database_key, owner_key=client.owner_key)
res = new_client.get_photo(photo.id, size=225)
assert res.data == photo.data

## Bulk API

Adding each item separately to the pod with the `create` method can take a lot of time. For this reason, using the bulk API is faster and more convenient in most cases. Here we show creating items and edges, updating and deleting is also possible.

In [None]:
# Create 100 Dogs to add to the pod, and two edges to a new person
dogs = [Dog(name=f"dog number {i}") for i in range(100)]
person = Person(firstName="Alice")
edge1 = Edge(dogs[0], person, "label")
edge2 = Edge(dogs[1], person, "label")

# Simultaneously add the dogs, person, and edges with the bulk API
success = client.bulk_action(create_items=dogs + [person], create_edges=[edge1, edge2])

BULK: Writing 103/103 items/edges
Completed Bulk action, written 103 items/edges


In [None]:
# hide
dogs = client.search({"type": "Dog"})
dogs_with_edge = [item for item in dogs if len(item.get_all_edges())]

print(len(dogs_with_edge))
assert len(dogs_with_edge) == 2
for d in dogs_with_edge:
    assert len(d.label) > 0

2


In [None]:
# hide
# test bulk delete and update
person.firstName = "Bob"
to_delete = [dogs[0]]
to_update = [person]

client.bulk_action(delete_items=to_delete, update_items=to_update)
dogs_with_edge = [
    item for item in client.search({"type": "Dog"}) if item.name.startswith("dog number 0") or item.name.startswith("dog number 1 ")
]

assert len(dogs_with_edge) == 1

dog = dogs_with_edge[0]
assert dog.label[0].firstName == "Bob"

BULK: Writing 2/2 items/edges
Completed Bulk action, written 2 items/edges


# Sending emails -

In [None]:
# hide
# skip
to = "myemail@gmail.com"
client.send_email(to=to, subject="test", body="test2")

succesfully sent email to myemail@gmail.com


True

# Create items that do not exist in the Pod

The `PodClient` can deduplicate items with the same externalId with the `create_if_external_id_not_exists` method.

In [None]:
person_item = Person(firstName="Eve", externalId="gmail_1")
person_item2 = Person(firstName="Eve2", externalId="gmail_1")

client.create_if_external_id_not_exists(person_item)
client.create_if_external_id_not_exists(person_item2)

existing = client.search({"externalId": "gmail_1"})

In [None]:
# hide
assert len(existing) == 1

# Export -

In [None]:
# hide
from nbdev.export import *
notebook2script()

Converted basic.ipynb.
Converted cvu.utils.ipynb.
Converted data.dataset.ipynb.
Converted data.photo.ipynb.
Converted exporters.exporters.ipynb.
Converted index.ipynb.
Converted itembase.ipynb.
Converted plugin.authenticators.credentials.ipynb.
Converted plugin.authenticators.oauth.ipynb.
Converted plugin.listeners.ipynb.
Converted plugin.pluginbase.ipynb.
Converted plugin.states.ipynb.
Converted plugins.authenticators.password.ipynb.
Converted pod.api.ipynb.
Converted pod.client.ipynb.
Converted pod.db.ipynb.
Converted pod.utils.ipynb.
Converted template.config.ipynb.
Converted template.formatter.ipynb.
Converted test_schema.ipynb.
Converted test_utils.ipynb.
