In [None]:
!pip install deasy-client

In [None]:
import os

from dotenv import load_dotenv

from deasy_client import Deasy

load_dotenv()

##### 1. Instantiate the Deasy Client

In [None]:
deasy_api_key = os.environ["DEASY_API_KEY"]  # You get in the Deasy Tokens Dashboard
username = "<username>"  # Your Deasy username

data_connector_profile_name = "<data-connector-profile-name>"  # name of the vdb profile/datasource you want to use
schema_name = "<schema-name>"

schema_description = "<schema-description>"

file_names = ["<filename 1>", "<filename 2>", "<filename 3>"]

client = Deasy(
    x_user=username,
    x_token=deasy_api_key,
)

##### 2. Use the client to OCR the connector after adding files to the S3

In [5]:
ocr_response = client.ocr.ingest(
    data_connector_name=data_connector_profile_name,
    file_names=file_names,
)
job_id = ocr_response["job_id"]

In [12]:
status = client.task_status.task_status(job_id=job_id)

In [None]:
status

##### 3. Prepare your data

In [None]:
data_prepare_response = client.prepare_data.create(
    data_connector_name=data_connector_profile_name,
)

In [None]:
data_prepare_response

##### 4. Suggest a schema (Optional)

In [None]:
schema_response = client.suggest_schema.create(
    data_connector_name=data_connector_profile_name,
    schema_name=schema_name,
    node={
        "label": "Root",
        "children": []
    }
)

In [None]:
schema_response.suggestion

##### 5. Extract Metadata

In [9]:
import uuid

job_id = str(uuid.uuid4())

classify_response = client.classify_bulk.classify(
    data_connector_name=data_connector_profile_name,
    hierarchy_name=schema_name,
    job_id=job_id,
)

##### 6. Check Job Status

In [10]:
job_status = client.task_status.task_status(job_id=job_id)

In [None]:
job_status

##### 7. Export

In [33]:
file = client.dataslice.export.export_metadata(
    data_connector_name=data_connector_profile_name,
    export_format="csv"
)

In [None]:
import io

import pandas as pd

csv_buffer = io.StringIO(file)


metadata = pd.read_csv(csv_buffer)
metadata.head()