In [1]:
from elasticsearch import Elasticsearch
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation as LDA
import matplotlib.pyplot as plt
import pyLDAvis
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [2]:
es = Elasticsearch(['elasticsearch:9200'])

In [8]:
try:
    fluentdIndex = list(es.indices.get_alias("fluentd-*").keys())[0]
except:
    print("Please, run any container with Fluentd as the log driver first")

In [9]:
n = 10
body = {
  "query": {
    "match_all": {}
  },
  "size": n,
  "sort": [
    {
      "@timestamp": {
        "order": "desc"
      }
    }
  ]
}


response = es.search(index=fluentdIndex, body=body)

In [10]:
fields = {}
for num, doc in enumerate(response["hits"]["hits"]):
    source_data = doc["_source"]
    for key, val in source_data.items():
        try:
            fields[key] = np.append(fields[key], val)
        except KeyError:
            fields[key] = np.array([val])

In [11]:
elastic_df = pd.DataFrame(fields)
elastic_df.head()

Unnamed: 0,source,log,container_id,container_name,@timestamp,@log_name
0,stderr,ts=2020-10-08T20:47:33Z caller=logging.go:29 m...,5e263eb0aec4cecb6b198de0024522ec38ee6072ff5559...,/docker-compose_payment_1,2020-10-08T20:47:33.000000000+00:00,docker.5e263eb0aec4
1,stderr,ts=2020-10-08T20:47:33Z caller=logging.go:29 m...,5e263eb0aec4cecb6b198de0024522ec38ee6072ff5559...,/docker-compose_payment_1,2020-10-08T20:47:33.000000000+00:00,docker.5e263eb0aec4
2,stdout,Attempting to delete cart for user: 57a98d98e4...,b5478ce2f915113837c99c06bd494056f4f41f6cb195fa...,/docker-compose_front-end_1,2020-10-08T20:47:33.000000000+00:00,docker.b5478ce2f915
3,stdout,User cart deleted with status: 202,b5478ce2f915113837c99c06bd494056f4f41f6cb195fa...,/docker-compose_front-end_1,2020-10-08T20:47:33.000000000+00:00,docker.b5478ce2f915
4,stdout,[0mDELETE /cart [32m202 [0m6.382 ms - -[0m,b5478ce2f915113837c99c06bd494056f4f41f6cb195fa...,/docker-compose_front-end_1,2020-10-08T20:47:33.000000000+00:00,docker.b5478ce2f915


In [12]:
elastic_df.to_csv('elastic.csv', index=False)

In [13]:
elastic_df = pd.read_csv('elastic.csv')

In [15]:
elastic_df = elastic_df.drop(columns=['container_id', '@log_name'])