# Understanding Custom Spaces

CustomSpace is the tool to incorporate vectors created outside the Superlinked system.

In [1]:
%pip install superlinked==10.1.1

In [2]:
from superlinked.framework.common.schema.id_schema_object import IdField
from superlinked.framework.common.schema.schema import Schema
from superlinked.framework.common.schema.schema_object import FloatList
from superlinked.framework.dsl.index.index import Index
from superlinked.framework.dsl.space.custom_space import (
    CustomSpace,
)
from superlinked.framework.dsl.query.param import Param

from superlinked.framework.dsl.executor.in_memory.in_memory_executor import (
    InMemoryExecutor,
)
from superlinked.framework.dsl.source.in_memory_source import InMemorySource
from superlinked.framework.dsl.query.query import Query

## Creating a custom space

To use pre-created custom vectors in Superlinked:
* create a schema (just like in any other case) with a `FloatList` type `SchemaField`,
* create a `CustomSpace` based on that field. You only need to set the length of the `FloatList`, meaning the number of scalars your vectors (referring to 1 entity) consist of,
* working with Superlinked, vectors often need to be aggregated (see [event effects](event_effects.ipynb) for example) or normalized.
* During aggregation vectors are summed up elementwise, and normalized using L2 norm of the vector to achieve unit vector length when needed.

In [3]:
class Product(Schema):
    id: IdField
    sales_history: FloatList  # we need to supply vectors using this SchemaFieldType


product = Product()

In [4]:
sales_history_space = CustomSpace(
    vector=product.sales_history,
    length=3,
)
product_index = Index(sales_history_space)

In [5]:
source: InMemorySource = InMemorySource(product)
executor = InMemoryExecutor(sources=[source], indices=[product_index])
app = executor.run()

In [6]:
# FloatList can be parsed from list or np.array of floats or ints
source.put(
    [
        {"id": "product-1", "sales_history": [1.0, 0.0, 0.0]},
        {"id": "product-2", "sales_history": [0.0, 0.8, 0.2]},
        {"id": "product-3", "sales_history": [0.1, 0.1, 0.8]},
    ]
)

At query-time, we can search with any vector that has the correct length, this is demonstrated here by making all of the 3 products be the first result once.

In [7]:
sales_history_query = (
    Query(product_index)
    .find(product)
    .similar(sales_history_space, Param("sales_history"))
)

In [8]:
res = app.query(sales_history_query, sales_history=[1.0, 0.0, 0.0])

res.to_pandas()

Unnamed: 0,sales_history,id,similarity_score
0,"[1.0, 0.0, 0.0]",product-1,1.0
1,"[0.1, 0.1, 0.8]",product-3,0.123091
2,"[0.0, 0.8, 0.2]",product-2,0.0


In [9]:
res = app.query(sales_history_query, sales_history=[0.0, 1.0, 0.0])

res.to_pandas()

Unnamed: 0,sales_history,id,similarity_score
0,"[0.0, 0.8, 0.2]",product-2,0.970143
1,"[0.1, 0.1, 0.8]",product-3,0.123091
2,"[1.0, 0.0, 0.0]",product-1,0.0


In [10]:
res = app.query(sales_history_query, sales_history=[0.0, 0.0, 1.0])

res.to_pandas()

Unnamed: 0,sales_history,id,similarity_score
0,"[0.1, 0.1, 0.8]",product-3,0.984732
1,"[0.0, 0.8, 0.2]",product-2,0.242536
2,"[1.0, 0.0, 0.0]",product-1,0.0
