In [None]:
%load_ext autoreload
%autoreload 2
# default_exp pod.client

# Pod Client

In [None]:
# export
from pyintegrators.data.itembase import Edge, ItemBase
from pyintegrators.indexers.facerecognition.photo import resize
from pyintegrators.data.schema import *
from pyintegrators.imports import *
from hashlib import sha256

In [None]:
# export
DEFAULT_POD_ADDRESS = "http://localhost:3030"
POD_VERSION = "v3"

In [None]:
# export
class PodClient:

    def __init__(self, url=DEFAULT_POD_ADDRESS, version=POD_VERSION, database_key=None, owner_key=None):
        self.url = url
        self.version = POD_VERSION
        self.test_connection(verbose=False)
        self.database_key=database_key if database_key is not None else self.generate_random_key()
        self.owner_key=owner_key if owner_key is not None else self.generate_random_key()
        self.base_url = f"{url}/{version}/{self.owner_key}"
        self.auth_json = {"type":"ClientAuth", "databaseKey": self.database_key}
    
    @staticmethod
    def generate_random_key():
        return "".join([str(random.randint(0, 9)) for i in range(64)])        

    def test_connection(self, verbose=True):
        try:
            res = requests.get(self.url)
            if verbose: print("Succesfully connected to pod")
            return True
        except requests.exceptions.RequestException as e:
            print("Could no connect to backend")
            return False
    
    def create(self, node):
        if isinstance(node, Photo) and not self.create_photo_file(node): return False

        try:
            properties = self.get_properties_json(node)
            properties = {k:v for k, v in properties.items() if v != []}
            body = {"auth": self.auth_json, "payload":properties}

            result = requests.post(f"{self.base_url}/create_item", json=body)
            if result.status_code != 200:
                print(result, result.content)
                return False
            else:
                id = result.json()
                node.id = id
                ItemBase.add_to_db(node)
                return True
        except requests.exceptions.RequestException as e:
            print(e)
            return False   
        
    def add_to_schema(self, node):
        attributes = self.get_properties_json(node)
        for k, v in attributes.items():
            if not isinstance(v, list) and k != "type":
                if isinstance(v, str):
                    value_type = "Text"
                elif isinstance(v, int):
                    value_type = "Integer"
                
                payload = {"type": "ItemPropertySchema", "itemType": attributes["type"],
                           "propertyName": k, "valueType": value_type}

                body = {"auth": self.auth_json, "payload": payload }

                try:
                    result = requests.post(f"{self.base_url}/create_item", json=body)

                    if result.status_code != 200:
                        print(result, result.content)
                        return False
                    else:
                        id = result.json()
                        node.id = id
                        ItemBase.add_to_db(node)

                except requests.exceptions.RequestException as e:
                    print(e)
                    return False  
        return True
    
    def create_photo_file(self, photo):
        file = photo.file[0]
        self.create(file)
        return self.upload_photo(photo.data)
        
    def upload_photo(self, arr):
        return self.upload_file(arr.tobytes())
        
    def upload_file(self, file):
        # TODO: currently this only works for numpy images
        try:
            sha = sha256(file).hexdigest()
            result = requests.post(f"{self.base_url}/upload_file/{self.database_key}/{sha}", data=file)
            if result.status_code != 200:
                print(result, result.content)
                return False
            else:
                return True
        except requests.exceptions.RequestException as e:
            print(e)
            return False
        
    def get_file(self, sha):
        # TODO: currently this only works for numpy images
        try:
            body= {"auth": self.auth_json,
                   "payload": {"sha256": sha}}
            result = requests.post(f"{self.base_url}/get_file", json=body)
            if result.status_code != 200:
                print(result, result.content)
                return None
            else:
                return result.content
        except requests.exceptions.RequestException as e:
            print(e)
            return None
        
    def get_photo(self, id, size=640):
        photo = self.get(id)
        self._load_photo_data(photo, size=size)
        return photo
        
    def _load_photo_data(self, photo, size=None):
        if len(photo.file) > 0 and photo.data is None:
            file = self.get_file(photo.file[0].sha256)
            if file is None:
                print(f"Could not load data of {photo} attached file item does not have data in pod")
                return
            data = np.frombuffer(file, dtype=np.uint8)
            c = photo.channels
            shape = (photo.height,photo.width, c) if c is not None and c > 1 else (photo.height, photo.width)
            data = data.reshape(shape)
            if size is not None: data = resize(data, size) 
            photo.data = data
            return
        print(f"could not load data of {photo}, no file attached")
        
    def create_if_external_id_not_exists(self, node):
        if not self.external_id_exists(node):
            self.create(node)
            
    def external_id_exists(self, node):
        if node.externalId is None: return False
        existing = self.search({"externalId": node.externalId})
        return len(existing) > 0 
    
    def create_edges(self, edges):
        """Create edges between nodes, edges should be of format [{"_type": "friend", "_source": 1, "_target": 2}]"""
        create_edges = []
        for e in edges:
            src, target = e.source.id, e.target.id
            
            if src is None or target is None:
                print(f"Could not create edge {e} missing source or target id")
                return False
            data = {"_source": src, "_target": target, "_type": e._type}
            if e.label is not None: data[LABEL] = e.label
            if e.sequence is not None: data[SEQUENCE] = e.sequence

            if e.reverse:
                data2 = copy(data)
                data2["_source"] = target
                data2["_target"] = src
                data2["_type"] = "~" + data2["_type"]
                create_edges.append(data2)

            create_edges.append(data)

        return self.bulk_action(create_items=[], update_items=[],create_edges=create_edges)
        
    def delete_items(self, items):
        ids = [i.id for i in items]
        return self.bulk_action(delete_items=ids)
    
    def delete_all(self):
        items = self.get_all_items()
        self.delete_items(items)
        
    def bulk_action(self, create_items=None, update_items=None, create_edges=None, delete_items=None):
        create_items = create_items if create_items is not None else []
        update_items = update_items if update_items is not None else []
        create_edges = create_edges if create_edges is not None else []
        delete_items = delete_items if delete_items is not None else []
        edges_data = {"databaseKey": self.database_key, "payload": {
                    "createItems": create_items, "updateItems": update_items,
                    "createEdges": create_edges, "deleteItems": delete_items}}
        try:
            result = requests.post(f"{self.base_url}/bulk_action",
                                   json=edges_data)
            if result.status_code != 200:
                if "UNIQUE constraint failed" in str(result.content):
                    print(result.status_code, "Edge already exists")
                else:
                    print(result, result.content)
                return False
            else:
                return True
        except requests.exceptions.RequestException as e:
            print(e)
            return False
        
    def create_edge(self, edge):
        payload = {"_source": edge.source.id, "_target": edge.target.id, "_name": edge._type}
        body = {"auth": self.auth_json,
                "payload": payload}
        
        try:
            result = requests.post(f"{self.base_url}/create_edge", json=body)
            if result.status_code != 200:
                print(result, result.content)
                return False
            else:
                return True
        except requests.exceptions.RequestException as e:
            print(e)
            return False 
        
        
        return self.create_edges([edge])
    
    def get(self, id, expanded=True):
        if not expanded:
            res = self._get_item_with_properties(id)
        else:
            res = self._get_item_expanded(id)
        if res is None:
            return None
        
        elif res.deleted == True:
            print(f"Item with id {id} does not exist anymore")
            return None
        else:
            return res
        
    def get_all_items(self):        
        try:
            body = {  "databaseKey": self.database_key, "payload":None}
            result = requests.post(f"{self.base_url}/get_all_items", json=body)
            if result.status_code != 200:
                print(result, result.content)
                return None
            else:
                json = result.json()
                res =  [self.item_from_json(x) for x in json]
                return self.filter_deleted(res)

        except requests.exceptions.RequestException as e:
            print(e)
            return None
        
    def filter_deleted(self, items):
        return [i for i in items if not i.deleted == True]
    
    def _get_item_expanded(self, id):
        item = self.get(id, expanded=False)
        edges = self.get_edges(id)
        for e in edges:
            item.add_edge(e["name"], e["item"])
        return item

        
#         body = {"payload": [id],
#                 "databaseKey": self.database_key}
#         try:
#             result = requests.post(f"{self.base_url}/get_items_with_edges",
#                                     json=body)
#             if result.status_code != 200:
#                 print(result, result.content)
#                 return None
#             else:
#                 json = result.json()[0]
#                 res =  self.item_from_json(json)
#                 return res
            
#         except requests.exceptions.RequestException as e:
#             print(e)
#             return None
        
    def get_edges(self, id):
        body = {"payload": {"item": str(id),
                            "direction": "Outgoing",
                            "expandItems": True},
                "auth": self.auth_json}
        
        try:
            result = requests.post(f"{self.base_url}/get_edges", json=body)
            if result.status_code != 200:
                print(result, result.content)
                return None
            else:
                json = result.json()
                if json == []:
                    return None
                else:
                    for d in json:
                        d["item"] = self.item_from_json(d["item"])
#                     res =  self.item_from_json(json[0])
                    return json
        except requests.exceptions.RequestException as e:
            print(e)
            return None

    def _get_item_with_properties(self, id):        
        try:
            body = {"auth": self.auth_json,
                    "payload": str(id)}
            result = requests.post(f"{self.base_url}/get_item", json=body)
            if result.status_code != 200:
                print(result, result.content)
                return None
            else:
                json = result.json()
                if json == []:
                    return None
                else:
                    res =  self.item_from_json(json[0])
                    return res
        except requests.exceptions.RequestException as e:
            print(e)
            return None

    def get_properties_json(self, node, dates=True):
        DATE_KEYS = ['dateCreated', 'dateModified', 'dateServerModified']
        res = dict()
        private = getattr(node, "private", []) 
        for k, v in node.__dict__.items():
#             if k[:1] != '_' and k != "private" and k not in private and not (isinstance(v, list)\
#                             and len(v)>0 and isinstance(v[0], Edge)) and v is not None:
#             if k[:1] != '_' and k != "private" and k != "id" and k not in private and not (isinstance(v, list)) \
#                             and v is not None:
            if k[:1] != '_' and k != "private" and k not in private and not (isinstance(v, list)) \
                            and v is not None and (not (dates == False and k in DATE_KEYS)):

                res[k] = v
        res["type"] = self._get_schema_type(node)
        return res
    
    @staticmethod
    def _get_schema_type(node):
        for cls in node.__class__.mro():
            if cls.__module__ == "pyintegrators.data.schema" and cls.__name__ != "ItemBase":
                return cls.__name__
        raise ValueError

    def update_item(self, node):
        data = self.get_properties_json(node, dates=False)
        if "type" in data:
            del data["type"]
        if "deleted" in data:
            del data["deleted"]
        id = data["id"]
        body = {"payload": data,
                "auth": self.auth_json}

        try:
            result = requests.post(f"{self.base_url}/update_item",
                                  json=body)
            if result.status_code != 200:
                print(result, result.content)
        except requests.exceptions.RequestException as e:
            print(e)

    def search(self, fields_data):

        body = {"payload": fields_data,
                "auth": self.auth_json}
        try:
            result = requests.post(f"{self.base_url}/search", json=body)
            json =  result.json()
            res = [self.item_from_json(item) for item in json]
            return self.filter_deleted(res)
        except requests.exceptions.RequestException as e:
            return None
        
    def item_from_json(self, json):
        indexer_class = json.get("indexerClass", None)
        constructor = get_constructor(json["itemType"], indexer_class)
        new_item = constructor.from_json(json)
        existing = ItemBase.global_db.get(new_item.id)
        import ipdb
        ipdb.set_trace()
        # TODO: cleanup
        if existing is not None:
            if not existing.is_expanded() and new_item.is_expanded():
                for edge_name in new_item.get_all_edge_names():
                    edges = new_item.get_edges(edge_name)
                    for e in edges:
                        e.source = existing
                    existing.__setattr__(edge_name, edges)
                
            for prop_name in new_item.get_property_names():
                existing.__setattr__(prop_name, new_item.__getattribute__(prop_name))                    
            return existing
        else:
            return new_item

    def get_properties(self, expanded):
        properties = copy(expanded)
        if ALL_EDGES in properties: del properties[ALL_EDGES]
        return properties

    def run_importer(self, id, servicePayload):

        body = dict()
        body["databaseKey"] = servicePayload["databaseKey"]
        body["payload"] = {"id": id, "servicePayload": servicePayload}
        print(body)

        try:
            res = requests.post(f"{self.base_url}/run_importer", json=body)
            if res.status_code != 200:
                print(f"Failed to start importer on {url}:\n{res.status_code}: {res.text}")
            else:
                print("Starting importer")
        except requests.exceptions.RequestException as e:
            print("Error with calling importer {e}")

Pyintegrators communicate with the pod via the PodClient. The PodClient requires you to provide a [database key](https://gitlab.memri.io/memri/pod/-/blob/dev/docs/HTTP_API.md#user-content-api-authentication-credentials) and an [owner key](https://gitlab.memri.io/memri/pod/-/blob/dev/docs/HTTP_API.md#user-content-api-authentication-credentials). During development, you don't have to worry about these keys, you can just omit the keys when initializing the PodClient, which creates a new user by defining random keys. When you are using the app, setting the keys in the pod, and passing them when calling an integrator is handled for you by the app itself.

In [None]:
client = PodClient()
success = client.test_connection()
assert success

Succesfully connected to pod


## Creating Items and Edges

Now that we have access to the pod, we can create items here and upload them to the pod. All items are defined in the memri [schema](https://gitlab.memri.io/memri/schema). When the schema is changed it automatically generates all the class definitions for the different languages used in memri, the python schema file lives in [schema.py](https://gitlab.memri.io/memri/pyintegrators/-/blob/master/integrators/schema.py) in the integrators package. When Initializing an Item, always make sure to use the from_data classmethod to initialize.

In [None]:
email_item = EmailMessage.from_data(content="example content field")
email_item

EmailMessage (#None)

In [None]:
succes = client.add_to_schema(email_item)
assert succes

<!-- [08:09:30 vasya@vn971 pod] curl -X POST -H "Content-Type: application/json" --insecure "http://localhost:3030/v3/03170a2e7597b7b7e3d84c05391d139a62b157e78786d8c082f29dcf4c111314/create_item" -d '{"databaseKey": "2DD29CA851E7B56E4697B0E1F08507293D761A05CE4D1B628663F411A8086D99", "payload": {"type": "ItemPropertySchema", "itemType": "Person", "propertyName": "age", "valueType": "integer"}}'
16[08:09:31 vasya@vn971 pod] 
[08:09:32 vasya@vn971 pod] 
[08:09:32 vasya@vn971 pod] 
[08:09:33 vasya@vn971 pod] curl -X POST -H "Content-Type: application/json" --insecure "http://localhost:3030/v3/03170a2e7597b7b7e3d84c05391d139a62b157e78786d8c082f29dcf4c111314/create_item" -d '{"databaseKey": "2DD29CA851E7B56E4697B0E1F08507293D761A05CE4D1B628663F411A8086D99", "payload": {"type": "Person", "age": 20}}'
17[08:10:02 vasya@vn971 pod] 
[08:10:04 vasya@vn971 pod] 
[08:10:04 vasya@vn971 pod] 
[08:10:05 vasya@vn971 pod] curl -X POST -H "Content-Type: application/json" --insecure "http://localhost:3030/v3/03170a2e7597b7b7e3d84c05391d139a62b157e78786d8c082f29dcf4c111314/create_item" -d '{"databaseKey": "2DD29CA851E7B56E4697B0E1F08507293D761A05CE4D1B628663F411A8086D99", "payload": {"type": "Person", "myAge": 20}}'
Property myAge not defined in Schema (attempted to use it for json value 20) -->

We can connect items using edges. Let's create another item, a person, and connect the email and the person.

In [None]:
person_item = Person.from_data(firstName="Alice", lastName="X")
succes = client.add_to_schema(person_item)
assert succes

In [None]:
person_item = Person.from_data(firstName="Alice", lastName="X")
item_succes = client.create(person_item)
edge = Edge(person_item, email_item, "author")
edge_succes = client.create_edge(edge)
assert item_succes and edge_succes

In [None]:
client.get_edges(person_item.id)

> [0;32m<ipython-input-4-8bec0dac8dc6>[0m(379)[0;36mitem_from_json[0;34m()[0m
[0;32m    378 [0;31m        [0;31m# TODO: cleanup[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 379 [0;31m        [0;32mif[0m [0mexisting[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    380 [0;31m            [0;32mif[0m [0;32mnot[0m [0mexisting[0m[0;34m.[0m[0mis_expanded[0m[0;34m([0m[0;34m)[0m [0;32mand[0m [0mnew_item[0m[0;34m.[0m[0mis_expanded[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


In [None]:
client.get(person_item.id)

In [None]:
%debug

> [0;32m<ipython-input-4-8bec0dac8dc6>[0m(373)[0;36mitem_from_json[0;34m()[0m
[0;32m    371 [0;31m    [0;32mdef[0m [0mitem_from_json[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mjson[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    372 [0;31m        [0mindexer_class[0m [0;34m=[0m [0mjson[0m[0;34m.[0m[0mget[0m[0;34m([0m[0;34m"indexerClass"[0m[0;34m,[0m [0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 373 [0;31m        [0mconstructor[0m [0;34m=[0m [0mget_constructor[0m[0;34m([0m[0mjson[0m[0;34m[[0m[0;34m"itemType"[0m[0;34m][0m[0;34m,[0m [0mindexer_class[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    374 [0;31m        [0mnew_item[0m [0;34m=[0m [0mconstructor[0m[0;34m.[0m[0mfrom_json[0m[0;34m([0m[0mjson[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    375 [0;31m        [0mexisting[0m [0;34m=[0m [0mItemBase[0m[0;34m.[0m[0mglobal_db[0m[0;34m.[0m[0mget[0m[0;34m

# Fetching and updating Items

## Normal Items

We can use the client to fetch data from the database. This is in particular usefull for indexers, which often use data in the database as input for their models. The simplest form  of querying the database is by querying items in the pod by their id (unique identifier).

In [None]:
person_item = Person.from_data(firstName="Alice")
client.create(person_item)

True

In [None]:
person_from_db = client.get(person_item.id, expanded=False)
assert person_from_db is not None
assert person_from_db == person_item
assert person_from_db.id is not None

Appart from creating, we might want to update existing items:

In [None]:
person_item.lastName = "Awesome"
client.update_item(person_item)
person_from_db = client.get(person_item.id, expanded=False)
assert person_from_db.lastName == "Awesome"

Sometimes, we might not know the ids of the items we want to fetch. We can also search by a certain property. We can use this for instance when we want to query all items from a particular type to perform some indexing on.

In [None]:
person_item2 = Person.from_data(firstName="Bob")
client.create(person_item2);
all_people = client.search({"type": "Person"})

assert all([isinstance(p, Person) for p in all_people]) and len(all_people) > 0

all_people[:3]

[Person (#0620a379e62f92d8301dde8f58749e2b),
 Person (#22af3e04f98f24f03472b9bd3fadcadf),
 Person (#28dd71ab968c4b33eec4a84f9370ab7a)]

## Uploading & downloading files

### File API

In [None]:
from pyintegrators.indexers.facerecognition.photo import *

In [None]:
x = np.random.randint(0, 255+1, size=(640, 640), dtype=np.uint8)
photo = IPhoto.from_np(x)
file = photo.file[0]
succes = client.create(file)
succes2 = client.upload_photo(x)
assert succes
assert succes2

In [None]:
data = client.get_file(file.sha256)
arr = np.frombuffer(data, dtype=np.uint8)
assert (arr.reshape(640,640) == x).all()

### Photo API

The file API is currently only tested for images.

In [None]:
from pyintegrators.indexers.facerecognition.photo import *

In [None]:
x = np.random.randint(0, 255+1, size=(640, 640), dtype=np.uint8)
photo = IPhoto.from_np(x)

In [None]:
succes = client.add_to_schema(IPhoto.from_np(x))

In [None]:
assert client.create(photo)

In [None]:
res = client.get_photo(photo.id, size=640)

<Response [404]> b'Endpoint not found'


AttributeError: 'NoneType' object has no attribute 'file'

In [None]:
%debug

> [0;32m<ipython-input-12-0933dafd80dd>[0m(121)[0;36m_load_photo_data[0;34m()[0m
[0;32m    119 [0;31m[0;34m[0m[0m
[0m[0;32m    120 [0;31m    [0;32mdef[0m [0m_load_photo_data[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mphoto[0m[0;34m,[0m [0msize[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 121 [0;31m        [0;32mif[0m [0mlen[0m[0;34m([0m[0mphoto[0m[0;34m.[0m[0mfile[0m[0;34m)[0m [0;34m>[0m [0;36m0[0m [0;32mand[0m [0mphoto[0m[0;34m.[0m[0mdata[0m [0;32mis[0m [0;32mNone[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    122 [0;31m            [0mfile[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mget_file[0m[0;34m([0m[0mphoto[0m[0;34m.[0m[0mfile[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m.[0m[0msha256[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    123 [0;31m            [0;32mif[0m [0mfile[0m [0;32mis[0m [0;32mNone[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m

In [None]:
# assert (res.data == x).all()

# Check if an item exists

In [None]:
person_item = Person.from_data(firstName="Eve",   externalId="gmail_1")
person_item2 = Person.from_data(firstName="Eve2", externalId="gmail_1")

client.create_if_external_id_not_exists(person_item)
client.create_if_external_id_not_exists(person_item2)

existing = client.search({"externalId": "gmail_1"})
assert len(existing) == 1
client.delete_all()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

# Resetting the db

In [None]:
client.delete_all()

# Export -

In [None]:
# hide
from nbdev.export import *
notebook2script()

Converted basic.ipynb.
Converted importers.EmailImporter.ipynb.
Converted importers.Importer.ipynb.
Converted importers.util.ipynb.
Converted index.ipynb.
Converted indexers.FaceClusteringIndexer.Models.ipynb.
Converted indexers.FaceClusteringIndexer.Utils.ipynb.
Converted indexers.FaceClusteringIndexer.indexer.ipynb.
Converted indexers.FaceRecognitionModel.ipynb.
Converted indexers.FacerecognitionIndexer.Photo.ipynb.
Converted indexers.GeoIndexer.ipynb.
Converted indexers.NoteListIndexer.NoteList.ipynb.
Converted indexers.NoteListIndexer.Parser.ipynb.
Converted indexers.NoteListIndexer.ipynb.
Converted indexers.NoteListIndexer.util.ipynb.
Converted indexers.indexer.ipynb.
Converted itembase.ipynb.
Converted pod.client.ipynb.
