# Get PII Columns

This notebook provides an example for getting PII columns in Secoda. To get started, you will need to [obtain an API key](https://app.secoda.co/settings/api)

Input your API key and Secoda API endpoint URL below. The URL for the cloud instance is `https://api.secoda.co`. If you are self-hosting Secoda or on the EU instance, you will have to update the variable. For self hosted, you need to add /api/v1 to the end of the SECODA_API_URL, i.e, https://secoda.paystack.com/api/v1

In [3]:

import requests

API_KEY = ""
SECODA_API_URL = "https://api.secoda.co"

session = requests.Session()
session.headers.update(dict(
    Authorization=f"Bearer {API_KEY}"
))


def build_url(url: str):
    return f"{SECODA_API_URL}{url}"

In [4]:
pii_keywords = [
    "deviceid",
    "subscriberid",
    "simid",
    "formdef_version",
    "devicephonenum",
    "caseid",
    "district",
    "country",
    "subcountry",
    "parish",
    "village",
    "community",
    "location",
    "panchayat",
    "compound",
    "survey_location",
    "county",
    "subcounty",
    "ciudad",
    "distrito",
    "villa",
    "city",
    "town",
    "neighborhood",
    "neighbourhood",
    "barangay",
    "brgy",
    "municipio",
    "colonia",
    "alcaldia",
    "alcaldía",
    "upazila",
    "tribe",
    "gps",
    "lat",
    "lon",
    "coord",
    "house",
    "social",
    "census",
    "fax",
    "ip",
    "ip_address",
    "ip_addr" "name",
    "enum_name",
    "hh",
    "age",
    "gps",
    "red",
    "fono",
    "url",
    "web",
    "number",
    "encuestador",
    "escuela",
    "colegio",
    "edad",
    "insurance",
    "school",
    "birth",
    "fname",
    "lname",
    "full_name",
    "first_name",
    "last_name",
    "birthday",
    "bday",
    "address",
    "network",
    "email",
    "beneficiary",
    "mother",
    "wife",
    "father",
    "husband",
    "enumerator ",
    "enumerator_",
    "child_age",
    "latitude",
    "longitude",
    "coordinates",
    "website",
    "nickname",
    "nick_name",
    "firstname",
    "lastname",
    "sublocation",
    "alternativecontact",
    "division",
    "resp_name",
    "head_name",
    "headname",
    "respname",
    "subvillage",
    "sex",
    "gender",
    "state",
    "country",
    "credit",
    "card",
    "visa",
    "mastercard",
    "cc ",
    "amex",
    "discover",
    "jcb",
    "diners",
    "maestro",
    "instapayment",
    "domain",
    "ip",
    "phone",
    "number",
    "telephone",
    "cell",
    "mobile",
    "call",
]


In [None]:
import pandas

fields = ['column', 'source', 'database', 'schema', 'table', 'pii_keyword']
pii_columns = []

page = 1
pages = 1
while page <= pages:
    print(f'Getting page {page}/{pages}')
    response = session.get(build_url(f"/table/columns?page={page}"))
    data = response.json()
    pages = data["total_pages"]
    page += 1
    for column in data["results"]:
        for pii_keyword in pii_keywords:

            # Found column that matche some pii keyword
            if pii_keyword in column.get("title", "").lower():
                c = [
                    column["title"],
                    column["table_cluster"],
                    column["table_database"],
                    column["table_schema"],
                    column["table_title"],
                    pii_keyword
                ]
                pii_columns.append(c)
                break

column_data = pandas.DataFrame(columns=fields, data=pii_columns)
column_data.to_csv("data/pii_columns.csv", index=False)