TSG093 - Agent log tail for all containers in BDC
=================================================

Steps
-----

### Parameters

In [None]:
tail_lines = 100
line_offset = 27 # Skip the date/time at start of line

cmd = f'tail -n {tail_lines} /var/log/agent/agent.log'

coalesce_duplicates = True

### Analyze log in all pod containers

### Instantiate Kubernetes client

In [None]:
# Instantiate the Python Kubernetes client into 'api' variable

import os

try:
    from kubernetes import client, config
    from kubernetes.stream import stream

    if "KUBERNETES_SERVICE_PORT" in os.environ and "KUBERNETES_SERVICE_HOST" in os.environ:
        config.load_incluster_config()
    else:
        config.load_kube_config()

    api = client.CoreV1Api()

    print('Kubernetes client instantiated')
except ImportError:
    from IPython.display import Markdown
    display(Markdown(f'SUGGEST: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))
    raise

### Get the namespace for the big data cluster

Get the namespace of the big data cluster from the Kuberenetes API.

NOTE: If there is more than one big data cluster in the target
Kubernetes cluster, then set \[0\] to the correct value for the big data
cluster.

In [None]:
# Place Kubernetes namespace name for BDC into 'namespace' variable

try:
    namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name
except IndexError:
    from IPython.display import Markdown
    display(Markdown(f'SUGGEST: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))
    display(Markdown(f'SUGGEST: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))
    display(Markdown(f'SUGGEST: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))
    raise

print('The kubernetes namespace for your big data cluster is: ' + namespace)

In [None]:
from IPython.display import Markdown

tsgs = [
    ["""Failed to get file names from controller with Error""", """../repair/tsg040-failed-get-file-names-controller.ipynb""", """TSG040 - Failed to get file names from controller with Error"""],
    ["""Please increase sysctl fs.aio-max-nr""", """../repair/tsg041-increase-fs-aio-max-nr.ipynb""", """TSG041 - Unable to create a new asynchronous I/O context (increase sysctl fs.aio-max-nr)"""]]


pod_list = api.list_namespaced_pod(namespace)
pod_names = [pod.metadata.name for pod in pod_list.items]

for pod in pod_list.items:
    container_names = [container.name for container in pod.spec.containers]
    for container in container_names:
            print (f"*** LOGS for CONTAINER: {container} in POD: {pod.metadata.name}")
            try:
                logs=stream(api.connect_get_namespaced_pod_exec, pod.metadata.name, namespace, command=['/bin/sh', '-c', cmd], container=container, stderr=True, stdout=True)

                if coalesce_duplicates:
                    previous_line = ""
                    duplicates = 1
                    for line in logs.split('\n'):
                        if line[line_offset:] != previous_line[line_offset:]:
                            if duplicates != 1:
                                print(f"\t{previous_line} (x{duplicates})")
                            print(f"\t{line}")

                            for tsg in tsgs:
                                if line[line_offset:].find(tsg[0]) != -1:
                                    display(Markdown(f'SUGGEST: Use [{tsg[1]}](tsg[2]) to resolve this issue.'))

                            duplicates = 1
                        else:
                            duplicates = duplicates + 1
                            continue

                        previous_line = line
                else:
                    print(logs)

            except Exception:
                print (f"Failed to get LOGS for CONTAINER: {container} in POD: {pod.metadata.name}")

In [None]:
print('Notebook execution complete.')