# Create Tables

This notebook provides an example for creating databases, schemas, tables, and columns using the Secoda API. To get started, you will need to [obtain an API key](https://app.secoda.co/settings/api)

## Configure `requests` with your API key

Input your API key and Secoda API endpoint URL below. The URL for the cloud instance is `https://api.secoda.co`. If you are self-hosting Secoda or on the EU instance, you will have to update variable.

In [1]:
import requests

API_KEY = ""
SECODA_API_URL = "https://api.secoda.co"
INTEGRATION_ID = ""

session = requests.Session()
session.headers.update(dict(
    Authorization=f"Bearer {API_KEY}"
))

def build_url(url: str):
    return f"{SECODA_API_URL}{url}"

In [None]:
import pandas

dataframe = pandas.read_csv("data/data_import.csv", sep=";", encoding="unicode_escape", low_memory=False)
dataframe["table"] = dataframe["name"]
dataframe["table_description"] = dataframe["description"]
dataframe["column"] = dataframe["col_name"]
dataframe.fillna('', inplace=True)
dataframe

## Create a cluster and database

Secoda organizes in a Cluster -> Database -> Schema -> Table -> (Column, Query, Test) hierarchy. The first step is to create a cluster.

In [3]:
# first make sure no clusters with the same name exist to avoid inserting duplicate entries
my_cluster_name = "oracle"
clusters = session.get(
    build_url(f"/table/clusters?title={my_cluster_name}")
).json().get("results", [])

# create a cluster if non exist with the name
if len(clusters) == 0:
    cluster = session.post(build_url("/table/clusters/"), json=dict(
        title=my_cluster_name,
        integration=INTEGRATION_ID,
    )).json()
else:
    cluster = clusters[0]


# create a cluster if non exist with the name
database_name = "bice"
databases = session.get(
    build_url(f"/table/databases?title={database_name}&parent_id={cluster['id']}")
).json().get("results", [])

if len(databases) == 0:
    database = session.post(build_url("/table/databases/"), json=dict(
        title=database_name,
        parent=cluster["id"],
        integration=INTEGRATION_ID,
    )).json()
else:
    database = databases[0]

# Process tables

In [None]:
def get_or_create_schema(schema_name: str):
    schemas = (
        session.get(
            build_url(f"/table/schemas/?title={schema_name}&parent_id={database['id']}")
        )
        .json()
        .get("results", [])
    )

    if len(schemas) == 0:
        schema = session.post(
            build_url("/table/schemas/"),
            json=dict(
                title=schema_name,
                parent=database["id"],
                integration=INTEGRATION_ID,
            ),
        ).json()
    else:
        schema = schemas[0]
    return schema


def get_or_create_table(schema_id: str, table_dict: dict):
    tables = (
        session.get(
            build_url(f"/table/tables/?title={table_dict['table']}&parent_id={schema_id}")
        )
        .json()
        .get("results", [])
    )

    if len(tables) == 0:
        table = session.post(
            build_url("/table/tables/"),
            json=dict(
                title=table_dict["table"],
                parent=schema_id,
                schema=table_dict["schema"],
                database=database["title"],
                cluster=cluster["title"],
                description=table_dict["description"],
                definition="",
                integration=INTEGRATION_ID,
            ),
        ).json()
    else:
        table = tables[0]
    return table


def get_or_create_column(schema_name: str, table_id: str, column_dict: dict):
    columns = (
        session.get(
            build_url(f"/table/columns/?title={column_dict['column']}&parent_id={table_id}")
        )
        .json()
        .get("results", [])
    )

    if len(columns) == 0:
        column = session.post(
            build_url("/table/columns/"),
            json=dict(
                title=column_dict["column"],
                parent=table_id,
                table=column_dict["table"],
                schema=column_dict["schema"],
                database=database["title"],
                cluster=cluster["title"],
                description=column_dict["col_description"],
                type=column_dict["col_type"],
                is_pk=column_dict["is_pk"],
                sort_order=column_dict["col_sort_order"],
                integration=INTEGRATION_ID,
            ),
        ).json()
    else:
        column = columns[0]


for index, entry in enumerate(dataframe.to_dict("records")):
    print(f"Processing {index}/{dataframe.shape[0]}")
    schema = get_or_create_schema(entry["schema"])
    table = get_or_create_table(
        schema["id"],
        {
            "schema": entry["schema"],
            "table": entry["table"],
            "description": entry["description"],
        },
    )
    column = get_or_create_column(
        schema["id"],
        table["id"],
        {
            "schema": entry["schema"],
            "table": entry["table"],
            "column": entry["column"],
            "col_description": entry["col_description"],
            "col_type": entry["col_type"],
            "is_pk": entry["is_pk"],
            "col_sort_order": entry["col_sort_order"],
        },
    )
