<!-- TABS -->
# Create datatype

Data types such as "text" or "integer" which are natively support by your `db.databackend` don't need a datatype.

Otherwise do one of the following:

In [None]:
# <testing: >
from superduperdb import superduper

db = superduper("mongomock://test")

In [None]:
# <tab: Vector>
from superduperdb import vector

datatype = vector(shape=(3, ))

In [None]:
# <tab: Tensor>
from superduperdb.ext.torch import tensor
import torch

datatype = tensor(torch.float, shape=(32, 32, 3))

In [None]:
# <tab: Array>
from superduperdb.ext.numpy import array
import numpy as np

datatype = array(dtype="float64", shape=(32, 32, 3))

In [None]:
# <tab: PDF>
!pip install PyPDF2
from superduperdb import DataType
from superduperdb.components.datatype import File

datatype = DataType('pdf', encodable='file')

In [None]:
# <tab: Text>
datatype = 'str'

In [None]:
# <tab: Image>
from superduperdb.ext.pillow import pil_image
import PIL.Image

datatype = pil_image

In [1]:
# <tab: URI>

datatype = None

In [None]:
# <tab: Audio>
from superduperdb.ext.numpy import array
from superduperdb import DataType
import scipy.io.wavfile
import io


def encoder(data):
    buffer = io.BytesIO()
    fs = data[0]
    content = data[1]
    scipy.io.wavfile.write(buffer, fs, content)
    return buffer.getvalue()


def decoder(data):
    buffer = io.BytesIO(data)
    content = scipy.io.wavfile.read(buffer)
    return content


datatype = DataType(
    'wav',
    encoder=encoder,
    decoder=decoder,
    encodable='artifact',
)

In [None]:
# <testing: >
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/audio.zip && unzip audio.zip
test = scipy.io.wavfile.read('./audio/1.wav')
datatype.decoder(datatype.encoder(test))

In [None]:
# <tab: Video>
from superduperdb import DataType

# Create an instance of the Encoder with the identifier 'video_on_file' and load_hybrid set to False
datatype = DataType(
    identifier='video_on_file',
    encodable='file',
)

In [None]:
# <tab: Encodable>
from superduperdb import DataType
import pandas as pd

def encoder(x, info=None):
    return x.to_json()

def decoder(x, info):
    return pd.read_json(x)
    
datatype = DataType(
    identifier="pandas",
    encoder=encoder,
    decoder=decoder
)

In [None]:
# <tab: Artifact>
from superduperdb import DataType
import numpy as np
import pickle


def pickle_encode(object, info=None):
    return pickle.dumps(object)

def pickle_decode(b, info=None):
    return pickle.loads(b)


datatype = DataType(
    identifier="VectorSearchMatrix",
    encoder=pickle_encode,
    decoder=pickle_decode,
    encodable='artifact',
)

In [None]:
# <testing: >
from superduperdb import DataType
if datatype and isinstance(datatype, DataType):
    db.apply(datatype)

In [None]:
# <testing: >

from superduperdb.backends.mongodb import Collection
from superduperdb import Document
collection = Collection("data")

print(origin_data)

db.execute(collection.insert_one(Document({"x": datatype(origin_data)})))

data = db.execute(collection.find_one())
print(data.unpack()["x"])