In [1]:
from statsmodels.tsa.stattools import grangercausalitytests as Granger
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import datetime

In [2]:
KPI_DIR = "/mnt/c/Users/tiago/Documents/Uni/anm/anm-project/data/training_data/host/"
KPI_FILES = [KPI_DIR+k for k in os.listdir(KPI_DIR)]
KPI_FILES

['/mnt/c/Users/tiago/Documents/Uni/anm/anm-project/data/training_data/host/db_oracle_11g.csv',
 '/mnt/c/Users/tiago/Documents/Uni/anm/anm-project/data/training_data/host/dcos_container.csv',
 '/mnt/c/Users/tiago/Documents/Uni/anm/anm-project/data/training_data/host/dcos_docker.csv',
 '/mnt/c/Users/tiago/Documents/Uni/anm/anm-project/data/training_data/host/mw_redis.csv',
 '/mnt/c/Users/tiago/Documents/Uni/anm/anm-project/data/training_data/host/os_linux.csv']

In [3]:
kpis_df = pd.concat([pd.read_csv(f) for f in KPI_FILES]).sort_values('timestamp')
kpis_df['timestamp'] = kpis_df['timestamp'].apply(lambda x: datetime.datetime.fromtimestamp(x/1000.0))
kpis_df

Unnamed: 0,itemid,name,bomc_id,timestamp,value,cmdb_id
0,999999998651280,CPU_free_pct,ZJ-002-056,2020-05-03 17:00:00,98.119746,db_008
0,63309300,Sent_errors_packets,ZJ-001-070,2020-05-03 17:00:00,0.000000,os_012
1,999999996431400,Processor_load_5_min,ZJ-001-005,2020-05-03 17:00:00,0.011667,os_012
2,999999996432240,Disk_rd_ios,ZJ-001-098,2020-05-03 17:00:00,0.000000,os_007
3,999999996437700,CPU_util_pct,ZJ-001-001,2020-05-03 17:00:00,1.750118,os_002
...,...,...,...,...,...,...
380143,999999996508399,used_cpu_sys,ZJ-005-022,2020-05-04 16:59:59,8.440000,redis_001
380144,999999996508394,Redis_key_count,ZJ-005-033,2020-05-04 16:59:59,19799.000000,redis_001
380145,999999996508393,redis_load,ZJ-005-035,2020-05-04 16:59:59,0.000000,redis_001
1214597,999999996432179,CPU_iowait_time,ZJ-001-010,2020-05-04 16:59:59,0.002779,os_007


In [4]:
kpis = dict(tuple(kpis_df.groupby('cmdb_id')))
kpis.keys()

dict_keys(['container_001', 'container_002', 'db_001', 'db_002', 'db_003', 'db_004', 'db_005', 'db_006', 'db_007', 'db_008', 'db_009', 'db_010', 'db_011', 'db_012', 'db_013', 'docker_001', 'docker_002', 'docker_003', 'docker_004', 'docker_005', 'docker_006', 'docker_007', 'docker_008', 'os_001', 'os_002', 'os_003', 'os_004', 'os_005', 'os_006', 'os_007', 'os_008', 'os_009', 'os_010', 'os_011', 'os_012', 'os_013', 'os_014', 'os_015', 'os_016', 'os_017', 'os_018', 'os_019', 'os_020', 'os_021', 'os_022', 'redis_001', 'redis_002', 'redis_003', 'redis_004', 'redis_005', 'redis_006', 'redis_007', 'redis_008', 'redis_009', 'redis_010', 'redis_011', 'redis_012'])

In [5]:
nodes = []
for key in kpis:
    names = kpis[key]['name'].unique()
    nodes.extend([(key, name) for name in names])

In [204]:
nodes[0]

('container_001', 'container_fail_percent')

In [6]:
import networkx as nx
causality_graph = nx.DiGraph()
causality_graph.add_nodes_from(map(lambda x: f'{x[0]}:{x[1]}', nodes))

In [23]:
a = list(map(lambda x: f'{x[0]}:{x[1]}', nodes))
edge_pairs = [(i,j) for index, i in enumerate(a) for j in a[index+1:]]

In [12]:
len(edge_pairs)

2143485

In [226]:
def get_values(node):
    host, name = node.split(':')
    return kpis[host][kpis[host]['name'] == name]

In [41]:
from sklearn import preprocessing
scaler = preprocessing.MinMaxScaler()

formatted_df = {}
for host in list(kpis.keys()):
    groups = list(kpis[host].groupby('name'))
    for i in groups:
        formatted_df[f'{host}:{i[0]}'] = scaler.fit_transform(i[1]['value'].reset_index())

In [42]:
formatted_df['container_001:container_fail_percent']

array([[0.00000000e+00, 0.00000000e+00],
       [6.97107006e-04, 0.00000000e+00],
       [1.04566051e-03, 0.00000000e+00],
       ...,
       [9.98954339e-01, 0.00000000e+00],
       [9.99651446e-01, 0.00000000e+00],
       [1.00000000e+00, 0.00000000e+00]])

In [10]:
from dtw import dtw, accelerated_dtw
from dtaidistance import dtw
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean

In [17]:
hosts = list(formatted_df.keys())

In [19]:
hosts[0]

'container_001:container_fail_percent'

In [43]:
base_dst = {}

start = hosts[0]
start_df = formatted_df[start]
for i, next_host in enumerate(hosts[1:]):
    if i % 100 == 0:
        print('Currently at ', i)
    next_df = formatted_df[next_host]
    base_dst[(start,next_host)] = fastdtw(start_df, next_df, dist=euclidean)

Currently at  0
Currently at  100
Currently at  200
Currently at  300
Currently at  400
Currently at  500
Currently at  600
Currently at  700
Currently at  800
Currently at  900
Currently at  1000
Currently at  1100
Currently at  1200
Currently at  1300
Currently at  1400
Currently at  1500
Currently at  1600
Currently at  1700
Currently at  1800
Currently at  1900
Currently at  2000


In [44]:
distances = {}
for edge in edge_pairs:
    src,dst = edge
    if src == start:
        distances[edge] = base_dst[edge][0]
    else:
        distances[edge] = abs(base_dst[(start, src)][0] - base_dst[(start, dst)][0])


In [27]:
edge_pairs[:5]

[('container_001:container_fail_percent',
  'container_002:container_fail_percent'),
 ('container_001:container_fail_percent', 'db_001:CPU_free_pct'),
 ('container_001:container_fail_percent', 'db_001:Hang'),
 ('container_001:container_fail_percent', 'db_001:DbTime'),
 ('container_001:container_fail_percent', 'db_001:Row_Lock')]

In [68]:
arr = np.array(list(distances.values()))

In [76]:
arr = np.array(list(map(lambda x: round(x, 3), arr)))

In [83]:
len(np.where(arr == 0)[0])

11886

In [84]:
import pickle
with open('weights.pickle', 'wb') as f:
    pickle.dump(distances, f)

In [None]:
nodes

In [57]:
v1 = formatted_df['container_001:container_fail_percent']['value']
v2 = formatted_df['db_001:Hang']['value']
v3 = formatted_df['db_001:DbTime']['value']
v4 = formatted_df['db_001:Sess_Active']['value']
print(fastdtw(v1, v2)[0])
print(fastdtw(v1, v4)[0])
print(fastdtw(v2, v4)[0])

print(fastdtw(v1, v2)[0])
print(fastdtw(v1, v3)[0])
print(fastdtw(v2, v3)[0])


1440.0
255.19999999999453
1188.999999999983
1440.0
25550.600000000057
24110.600000000053


In [40]:
formatted_df.keys()

os_013:Processor_load_15_min', 'os_013:Processor_load_1_min', 'os_013:Processor_load_5_min', 'os_013:Received_errors_packets', 'os_013:Received_packets', 'os_013:Received_queue', 'os_013:Recv_total', 'os_013:Send_total', 'os_013:Sent_errors_packets', 'os_013:Sent_packets', 'os_013:Sent_queue', 'os_013:Shared_memory', 'os_013:Swap_used_pct', 'os_013:System_block_queue_length', 'os_013:System_wait_queue_length', 'os_013:Zombie_Process', 'os_013:ss_total', 'os_014:Agent_ping', 'os_014:Buffers_used', 'os_014:CPU_idle_pct', 'os_014:CPU_iowait_time', 'os_014:CPU_system_time', 'os_014:CPU_user_time', 'os_014:CPU_util_pct', 'os_014:Cache_used', 'os_014:Disk_avgqu_sz', 'os_014:Disk_await', 'os_014:Disk_io_util', 'os_014:Disk_rd_ios', 'os_014:Disk_rd_kbs', 'os_014:Disk_svctm', 'os_014:Disk_wr_ios', 'os_014:Disk_wr_kbs', 'os_014:FS_max_avail', 'os_014:FS_max_util', 'os_014:FS_total_space', 'os_014:FS_used_pct', 'os_014:FS_used_space', 'os_014:ICMP_ping', 'os_014:Incoming_network_traffic', 'os_014

In [14]:
k = list(formatted_df.keys())
len(k)

2071

In [307]:
formatted_df[k[0]]

Unnamed: 0,itemid,name,bomc_id,timestamp,value,cmdb_id
1,999999996470837,container_fail_percent,ZJ-004-065,2020-05-03 17:00:21,0.0,container_001
3,999999996470837,container_fail_percent,ZJ-004-065,2020-05-03 17:01:20,0.0,container_001
4,999999996470837,container_fail_percent,ZJ-004-065,2020-05-03 17:02:19,0.0,container_001
6,999999996470837,container_fail_percent,ZJ-004-065,2020-05-03 17:03:07,0.0,container_001
8,999999996470837,container_fail_percent,ZJ-004-065,2020-05-03 17:04:08,0.0,container_001
...,...,...,...,...,...,...
2863,999999996470837,container_fail_percent,ZJ-004-065,2020-05-04 16:55:16,0.0,container_001
2864,999999996470837,container_fail_percent,ZJ-004-065,2020-05-04 16:56:15,0.0,container_001
2867,999999996470837,container_fail_percent,ZJ-004-065,2020-05-04 16:57:15,0.0,container_001
2869,999999996470837,container_fail_percent,ZJ-004-065,2020-05-04 16:58:15,0.0,container_001


In [16]:
k[0]

'container_001:container_fail_percent'

In [31]:
formatte

Unnamed: 0,itemid,name,bomc_id,timestamp,value,cmdb_id
1,999999996470837,container_fail_percent,ZJ-004-065,2020-05-03 17:00:21,0.0,container_001
3,999999996470837,container_fail_percent,ZJ-004-065,2020-05-03 17:01:20,0.0,container_001
4,999999996470837,container_fail_percent,ZJ-004-065,2020-05-03 17:02:19,0.0,container_001
6,999999996470837,container_fail_percent,ZJ-004-065,2020-05-03 17:03:07,0.0,container_001
8,999999996470837,container_fail_percent,ZJ-004-065,2020-05-03 17:04:08,0.0,container_001
...,...,...,...,...,...,...
2863,999999996470837,container_fail_percent,ZJ-004-065,2020-05-04 16:55:16,0.0,container_001
2864,999999996470837,container_fail_percent,ZJ-004-065,2020-05-04 16:56:15,0.0,container_001
2867,999999996470837,container_fail_percent,ZJ-004-065,2020-05-04 16:57:15,0.0,container_001
2869,999999996470837,container_fail_percent,ZJ-004-065,2020-05-04 16:58:15,0.0,container_001
