In [4]:
%%capture
!pip install drain3

In [5]:
import sqlite3 as sql
import pandas as pd
import os
import logging
import pandas as pd
import string
import re
import drain3 as d3

In [6]:
logging.basicConfig(format='%(asctime)s %(levelname)s | %(message)s',level=logging.INFO)
logger = logging.getLogger(__name__)

def database_builder(path: str) -> pd.DataFrame():
    logger.info('Building DataFrame ...')
    (_, _, files) = next(os.walk(path))
    sql_query = 'SELECT * FROM logs'
    data = []
    for f in files:
        if '.db' in f:
            conn = create_connection(path + '/' + f)
            d = pd.read_sql_query(sql_query, conn)
            data.append(d)
    logger.info('...complete!')
    return pd.concat(data)

def create_connection(path: str) -> sql.Connection:
    """
    Creates a database connection
    :param path: str
        path to database object
    :return sql.Connection
        a connection to the database
    """
    try:
        conn = sql.connect(path)
        logger.info('Connected to database ' + path)
        return conn
    except sql.Error as e:
        logger.warning(e)

In [7]:
class LogPreprocessor:
    def __init__(self, logs: pd.DataFrame):
        self.logs = logs
        self.Drain = d3.TemplateMiner()
        self.cleaned_logs = pd.DataFrame
        self.clusters = {}
        self.n_clusters = 0
    @staticmethod
    def clean_solr_logs(s: str) -> str:
        if len(s) == 33 or len(s) == 32:
            if 'zoo' in s or 'solr' in s:
                s = s[:8] + ' ' + s[9:22] + ' ' + s[22:]
        return s
    
    def standardize(self, logs: pd.DataFrame) -> pd.DataFrame:
        fmt = '%Y-%m-%dT%H:%M:%S.%f'
        logs['timestamp'] = pd.to_datetime(logs['timestamp'], format=fmt)
        logger.info('Standardizing log documents ...')
        # remove timestamps
        logs['log'] = logs['log'].replace(to_replace=r'(?:\d{4}-\d{2}-\d{2}[\sT]\d{2}:\d{2}:\d{2}([.,]\d{3}|\s))',
                                          value='',
                                          regex=True)
        logs['log'] = logs['log'].apply(lambda log: self.clean_solr_logs(log))
        # remove punctuation
        #logs['log'] = logs['log'].replace(to_replace=r'[^\w\s]',
        #                                  value=' ',
        #                                  regex=True)
        logger.info('...complete!')
        return logs
    
    def generate_clusters(self) -> list:
        self.cleaned_logs = self.standardize(self.logs)
        logger.info('Generating Drain model ...')
        for row in self.cleaned_logs.itertuples():
            self.Drain.add_log_message(row.log)
        logger.info('...complete!')
        self.clusters = self.Drain.drain.clusters
        self.n_clusters = len(self.Drain.drain.clusters)
        # cleaned_clusters = [re.sub(pattern=r'[^\w\s]',
        #                            repl=' ',
        #                            string=cluster.get_template())
        #                     for cluster in self.Drain.drain.clusters]
        cleaned_clusters = [re.sub(pattern=r' +',
                                   repl=' ',
                                   string=cluster.get_template())
                            for cluster in self.Drain.drain.clusters]
        return cleaned_clusters

In [8]:
# collect logs from database into pandas dataframe
df = database_builder(r'../database')
# create LogPreprocessor object and clean logs and generate templates
log_preprocessor = LogPreprocessor(df)
templates_list = log_preprocessor.generate_clusters()
for template in templates_list:
    print(template + '\n')
print(log_preprocessor.n_clusters)

2021-04-02 19:14:13,928 INFO | Building DataFrame ...
2021-04-02 19:14:13,930 INFO | Connected to database ../database/elastic_logs.db
2021-04-02 19:14:15,195 INFO | ...complete!
2021-04-02 19:14:15,261 INFO | Starting Drain3 template miner
2021-04-02 19:14:15,262 INFO | Loading configuration from drain3.ini
2021-04-02 19:14:15,625 INFO | Standardizing log documents ...
2021-04-02 19:14:21,262 INFO | ...complete!
2021-04-02 19:14:21,263 INFO | Generating Drain model ...


total          : took    23.60 s (100.00%),     69,015 samples,  341.93 ms / 1000 samples,        2,924.59 hz
mask           : took    20.75 s ( 87.91%),     69,015 samples,  300.59 ms / 1000 samples,        3,326.81 hz
drain          : took     2.49 s ( 10.54%),     69,015 samples,   36.05 ms / 1000 samples,       27,737.24 hz
tree_search    : took     1.63 s (  6.91%),     69,015 samples,   23.62 ms / 1000 samples,       42,332.84 hz
cluster_exist  : took     0.44 s (  1.87%),     68,661 samples,    6.44 ms / 1000 samples,      155,280.77 hz
create_cluster : took     0.00 s (  0.02%),        354 samples,   10.41 ms / 1000 samples,       96,102.50 hz
total          : took    53.03 s (100.00%),    168,247 samples,  315.17 ms / 1000 samples,        3,172.87 hz
mask           : took    45.87 s ( 86.50%),    168,247 samples,  272.63 ms / 1000 samples,        3,667.94 hz
drain          : took     6.27 s ( 11.82%),    168,247 samples,   37.25 ms / 1000 samples,       26,842.75 hz
tree_searc

2021-04-02 19:16:30,017 INFO | ...complete!


<LOG_LEVEL> HealthCheckService:106 - status: HEALTHY, name: <*> <*> duration_ms: <*> failure_reason: none

<LOG_LEVEL> HealthCheckService:106 - [{"container":"soaesb-diagnostic-ai-log-collection","status":"Up <DURATION>"},{"container":"cda-notification-app","status":"Up <DURATION>"},{"container":"config-server","status":"Up <DURATION>"},{"container":"core.soaesb","status":"Up <DURATION> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> (1) <DURATION> <*> (1) <DURATION> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <*> <DURATION>"},{"container":"filebeat","status":"Up <DURATION>"},{"container":"grafana.metrics.soaesb","status":"Up <DURATION>"},{"container":"cadvisor.metrics.soaesb","status":"Up <DURATION>"},{"container":"prometheus.metrics.soaesb","status":"Up <DURATION>"},{"container":"node-exporter.metrics.soaesb","status":"Up <DURATION>"},{"container":"push-gateway.metrics.soaesb","status":"Up <DURATION>"},{"container":"container-export