# Tetra Data Platform (TDP) Configuration Report

## Import Statements

In [None]:
import os
import json
import requests
from datetime import datetime
from zoneinfo import ZoneInfo
import pandas as pd

## Notebook Parameters

* SAVE_DIR = directory on your local machine 
* API_URL = API URL for your TDP instance
* AUTH_TOKEN = personal access token for TDP, or token of Service User
* TDP_ORG = organization for configuration report

In [None]:
SAVE_DIR = "./"
API_URL = ""
AUTH_TOKEN = ""
TDP_ORG = ""

## Create Authentication File

This uses the notebook parameters to create authentication file, as outlined on documentation site for setting up your development environment: [Development Setup](https://developers.tetrascience.com/docs/set-up-your-environment-and-initialize-ts-sdk#set-up-the-environment)

This authentication file can also be used with our ts-sdk to deploy custom pipelines.

In [None]:
auth_json_path = os.path.join(SAVE_DIR, "auth.json")
with open(auth_json_path, "w") as f:
    auth_json = {"api_url": API_URL,
                 "auth_token": AUTH_TOKEN,
                 "org": TDP_ORG,
                 "ignore_ssl": "false"}
    json.dump(auth_json, f, indent = 4)

## Create filename

This uses the current date and time (Pacific time) to document when the configuration was saved. More information about Time Zones in Python can be found [here](https://docs.python.org/3/library/zoneinfo.html).

In [None]:
now = datetime.now(ZoneInfo("America/Los_Angeles"))
timestamp = now.strftime("%Y-%m-%d %H:%M:%S")
savefile_name = os.path.join(SAVE_DIR, now.strftime("%Y-%m-%d-%H%M%S") + ".xlsx")
print("savefile_name =", savefile_name)

## Use Authentication File for API Headers

In [None]:
with open(auth_json_path, "r") as f:
    auth_data = json.loads(f.read())

headers = {"ts-auth-token": auth_data["auth_token"],
           "x-org-slug": auth_data["org"]}

## API Endpoints

In [None]:
API_URL = auth_data["api_url"]
PIPELINE_SEARCH = API_URL + "pipeline/search"
AGENT_LIST = API_URL + "agents"

## Export Info Sheet

In [None]:
info_df = pd.DataFrame([timestamp], columns = ["timestamp"])

In [None]:
info_df

## Pipeline Configuration Sheet

In [None]:
def get_pipeline_page(headers, **kwargs):
    """
        Returns a set of pipelines and whether there are more 
        pipelines remaining
        Optional args: page_size, page_index
    """
    pipeline_api = PIPELINE_SEARCH + "?"
    if "index" in kwargs.keys():
        page_index = kwargs["index"]
        pipeline_api += "from=" + str(page_index) + "&"
    if "size" in kwargs.keys():
        page_size = kwargs["size"]
        pipeline_api += "size=" + str(page_size) + "&"

    pipeline_response = requests.get(pipeline_api, headers=headers)
    pipeline_response = json.loads(pipeline_response.text)
    
    return pipeline_response["hits"], pipeline_response["hasNext"]

In [None]:
def get_all_pipelines(headers, size=10):
    """
        Returns list of all pipelines by iterating over full list
        by the size parameter.
    """
    hasNext = True
    index = 0
    all_pipelines = []
    while hasNext == True:
        pipes, nxt = get_pipeline_page(headers, size=size, index=index)
        all_pipelines += pipes
        hasNext = nxt
        index += 1
    return all_pipelines

In [None]:
pipeline_list = get_all_pipelines(headers)

In [None]:
pipeline_top_fields = ["id",
                        "name",
                        "description",
                        "status",
                        "triggerCondition",
                        "maxParallelWorkflows",
                        "priority",
                        "retryBehavior",
                        "protocolSlug",
                        "protocolVersion",
                        "createdAt",
                        "updatedAt"]

In [None]:
def pipeline_summary(pipeline_info, org):
    
    pipeline_top_vals = [org] + [pipeline_info[x] for x in pipeline_top_fields]
    
    return pipeline_top_vals

In [None]:
pipeline_summaries = [pipeline_summary(a, TDP_ORG) for a in pipeline_list]

In [None]:
pipeline_df = pd.DataFrame(pipeline_summaries, columns = ["orgSlug"] + pipeline_top_fields)

In [None]:
pipeline_df

## Agent Configuration Sheet

In [None]:
agent_response = requests.get(AGENT_LIST, headers=headers)
agent_list = json.loads(agent_response.text)

In [None]:
agent_top_fields = ["orgSlug",
                    "name",
                    "description",
                    "isEnabled",
                    "status",
                    "type",
                    "liveType",
                    "integrationType",
                    "integrationId",
                    "version",
                    "tags",
                    "metadata"]
agent_queue_fields = ["queue_enabled"]
agent_paths_fields = ["paths",
                      "paths_interval",
                      "paths_labels",
                      "paths_metadata",
                      "paths_patterns",
                      "paths_filewatchmode"]
agent_fields = agent_top_fields + agent_queue_fields + agent_paths_fields

In [None]:
def agent_summary(agent_info):
    
    agent_top_vals = [agent_info[x] for x in agent_top_fields]
    
    if agent_info["queue"]:
        agent_queue_vals = [agent_info["queue"]["enabled"]]
    else:
        agent_queue_vals = ["N/A"]*len(agent_queue_fields)
    
    if agent_info["config"]:
        paths_info = agent_info["config"]["services_configuration"]["fileWatcher"]["paths"]
    
        agent_subvals = [[x["path"] for x in paths_info],
                         [x["interval"] for x in paths_info],
                         [x["labels"] for x in paths_info],
                         [x["metadata"] for x in paths_info],
                         [x["patterns"] for x in paths_info],
                         [x["file_watch_mode"] for x in paths_info]]
    else:
        agent_subvals = ["N/A"]*len(agent_paths_fields)
    
    return agent_top_vals + agent_queue_vals + agent_subvals

In [None]:
agent_summaries = [agent_summary(a) for a in agent_list]

In [None]:
agent_df = pd.DataFrame(agent_summaries, columns = agent_fields)

In [None]:
agent_df

## Save to Excel

In [None]:
with pd.ExcelWriter(savefile_name) as writer:
    info_df.to_excel(writer, sheet_name='Info')
    agent_df.to_excel(writer, sheet_name='Agent Cfg')
    pipeline_df.to_excel(writer, sheet_name='Pipeline Cfg')