TSG036 - Controller logs
========================

Get the last ‘n’ hours of controller logs.

Steps
-----

### Parameters

In [None]:
since_hours = 2
since_seconds = since_hours * 3600 # seconds in hour

coalesce_duplicates = True

### Instantiate Kubernetes client

In [None]:
# Instantiate the Python Kubernetes client into 'api' variable

import os

try:
    from kubernetes import client, config
    from kubernetes.stream import stream

    if "KUBERNETES_SERVICE_PORT" in os.environ and "KUBERNETES_SERVICE_HOST" in os.environ:
        config.load_incluster_config()
    else:
        config.load_kube_config()

    api = client.CoreV1Api()

    print('Kubernetes client instantiated')
except ImportError:
    from IPython.display import Markdown
    display(Markdown(f'SUGGEST: Use [SOP059 - Install Kubernetes Python module](../install/sop059-install-kubernetes-module.ipynb) to resolve this issue.'))
    raise

### Get the namespace for the big data cluster

Get the namespace of the big data cluster from the Kuberenetes API.

NOTE: If there is more than one big data cluster in the target
Kubernetes cluster, then set \[0\] to the correct value for the big data
cluster.

In [None]:
# Place Kubernetes namespace name for BDC into 'namespace' variable

try:
    namespace = api.list_namespace(label_selector='MSSQL_CLUSTER').items[0].metadata.name
except IndexError:
    from IPython.display import Markdown
    display(Markdown(f'SUGGEST: Use [TSG081 - Get namespaces (Kubernetes)](../monitor-k8s/tsg081-get-kubernetes-namespaces.ipynb) to resolve this issue.'))
    display(Markdown(f'SUGGEST: Use [TSG010 - Get configuration contexts](../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb) to resolve this issue.'))
    display(Markdown(f'SUGGEST: Use [SOP011 - Set kubernetes configuration context](../common/sop011-set-kubernetes-context.ipynb) to resolve this issue.'))
    raise

print('The kubernetes namespace for your big data cluster is: ' + namespace)

### Get controller logs

In [None]:
container = "controller"

pod_list = api.list_namespaced_pod(namespace, label_selector="app=controller")

entries_for_analysis = []

for pod in pod_list.items:
    print (f"Logs for controller pod: {pod.metadata.name}")
    try:
        logs = api.read_namespaced_pod_log(pod.metadata.name, namespace, container=container, since_seconds=since_seconds)
    except Exception as err:
        print(f"ERROR: {err}")
        pass
    else:
        if coalesce_duplicates:
            previous_line = ""
            duplicates = 1
            for line in logs.split('\n'):
                if line[27:] != previous_line[27:]:
                    if duplicates != 1:
                        print(f"\t{previous_line} (x{duplicates})")
                    print(f"\t{line}")
                    duplicates = 1
                else:
                    duplicates = duplicates + 1
                    continue

                if line[25:34] == "| ERROR |" or line[25:33] == "| WARN |":
                    entries_for_analysis.append(line)

                previous_line = line
        else:
            print(logs)

print (f"There were {len(entries_for_analysis)} warnings and errors found.")

### Analyze log entries and suggest relevant Troubleshooting Guides

In [None]:
# Analyze log entries and suggest further relevant troubleshooting guides

from IPython.display import Markdown

tsgs = [
    ["""doc is missing key: /data""", """../repair/tsg038-doc-is-missing-key-error.ipynb""", """TSG038 - BDC create failures due to - doc is missing key"""],
    ["""Failed when starting controller service. System.TimeoutException: Operation timed out after 10 minutes""", """../repair/tsg057-failed-when-starting-controller.ipynb""", """TSG057 - Failed when starting controller service. System.TimeoutException"""]]


suggestions = 0
for entry in entries_for_analysis:
    print (entry)

    for tsg in tsgs:
        if entry.find(tsg[0]) != -1:
            display(Markdown(f'SUGGEST: Use [{tsg[2]}](tsg[1]) to resolve this issue.'))
            suggestions = suggestions + 1

print("")
print(f"{len(entries_for_analysis)} log entries analyzed. {suggestions} further troubleshooting suggestions made inline.")

In [None]:
print('Notebook execution complete.')

Related
-------

-   [TSG027 - Observe cluster
    deployment](../diagnose/tsg027-observe-bdc-create.ipynb)