# Import Column Metadata From CSV

This notebook provides an example for uploading column metadata to Secoda. To get started, you will need to [obtain an API key](https://app.secoda.co/settings/api)

Input your API key and Secoda API endpoint URL below. The URL for the cloud instance is `https://api.secoda.co`. If you are self-hosting Secoda or on the EU instance, you will have to update variable.

In [1]:

import requests

API_KEY = ""
SECODA_API_URL = "https://api.secoda.co"

session = requests.Session()
session.headers.update(dict(
    Authorization=f"Bearer {API_KEY}"
))


def build_url(url: str):
    return f"{SECODA_API_URL}{url}"

## Parse your data

If your data is in CSV format, you could parse it this way and standardize the column names in there to match up with Secoda

In [30]:
import pandas
import json

dataframe = pandas.read_csv("data/delta_defense_column_metadata.csv")
dataframe = dataframe.where(pandas.notnull(dataframe), None)
dataframe["column"] = dataframe["Column_Name (Technical Term)"]
dataframe["table"] = dataframe["Presentation_Table"]
dataframe["description"] = dataframe["Definition"]
dataframe["nullable"] = dataframe["Nullable"]
dataframe["required"] = dataframe["Required"]
dataframe["sensitive data type"] = dataframe["Sensitive_Data_Type"]
dataframe["protection level"] = dataframe["Protection_Level"]
dataframe["presentation tbl purpose"] = dataframe["Presentation_TBL_Purpose"]
dataframe["key"] = dataframe["Keys"]

## Upload column metadata to Secoda

In [35]:
import math

def find_column(table, column, *args, **kwargs):
    res = session.get(
        build_url(f"/table/columns/?title={column.lower()}&parent__title={table.lower()}")
    )
    results = res.json().get("results")
    if len(results) >= 1:
        return results[0]
    return None


def set_column_metadata(column, metadata):
    column_id = column.get("id")
    description = metadata.get("description")
    
    # Set custom properties
    new_properties = metadata.get("custom")
    properties = column.get("properties", {})
    if not "custom" in properties:
        properties["custom"] = {}

    for property, value in metadata.items():
        if property not in ["column", "table", "description"] and value is not None and not value == 'nan':
            properties["custom"][property] = value
        
    print(properties)
    print(description)

    session.put(
        build_url(f"/table/columns/{column_id}"),
        json=dict(
            description=description,
            properties=properties
        )
    )

for index, entry in enumerate(dataframe.to_dict("records")):
    print(f"Processing {index}/{dataframe.shape[0]}")
    column = find_column(
        **entry
    )
    if column is not None:
        set_column_metadata(column, entry)
    else:
        print(f"NOT FOUND: ", entry.get("column"))
    
    break

Processing 0/442
{'custom': {'keys': 'PK', 'Presentation_Table': 'DM_AGENT', 'Column_Name (Technical Term)': 'AGENT_DIM_ID', 'Definition': 'System generated key used to uniquely identify a row in this dimension.', 'Data_Type': 'number', 'Keys': 'PK', 'Nullable': nan, 'Required': nan, 'Presentation_TBL_Purpose': 'Table that contains information about call center agents.', 'nullable': nan, 'required': nan, 'presentation tbl purpose': 'Table that contains information about call center agents.', 'key': 'PK'}, '_profile': {'max': 0.0, 'min': 0.0, 'mean': 0.0, 'count': 71114, 'median': 0.0, 'ntiles': [{'left': 0, 'label': 'ad4fc2da352d828d2a4c817984c8423e', 'right': 1, 'frequency': '1', 'is_numeric': False}, {'left': 1, 'label': 'a46450b63133937a59399ec1e404899e', 'right': 2, 'frequency': '1', 'is_numeric': False}, {'left': 2, 'label': '61716441aeac3dfe90a5bd9254bdd532', 'right': 3, 'frequency': '1', 'is_numeric': False}, {'left': 3, 'label': '616f6a679351149d91e2b2127d14f271', 'right': 4, '

InvalidJSONError: Out of range float values are not JSON compliant