# Prepare synthetic data for simulating health information in normal and abnormal behavior

In [62]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
normal_cpu_utilization = np.random.normal(0.6,0.1, 100)
abnormal_cpu_utilization = np.random.normal(0.9,0.1, 10)
normal_free_heap_percent = np.random.normal(0.4,0.1, 100)
normal_jvm_old_generation_used = np.random.normal(0.5,0.2, 100)
# normal_cpu_utilization

## Features Identified

feature 1: avgbackground queue, (1500) [tasknode] <br>
feature 2: avg workflow queue, (55) [tasknode]<br>
feature 3: avg node manager thread pool size, (5.57) [tasknode] [feature 1,2,3 are related]<br>
 <br>
feature 4 : node id (need to revisit) [can we put node id as feature so that we can backtrack it later ???]<br>
feature 5: index=mail, failed post output=ssl ctx set options [source : splunk search] ( 0 normal, 5000+ abnormal)<br>
feature 6: product release (value 0,1)<br>
feature 7: ui node thread count (normal 250, abnormal 500+) (ims990)<br>
feature 8: ariba.ui.aribaweb.util.AWGenericException: java.lang.IllegalStateException: splunk search linked with feature 7<br>
feature 9: scheduletaskstatustab query arches batch publishinselectedrealm job exception <br>
<br>
feature 10: catalogue search time.(38) linked with 1,2,3 feature<br>
feature 11: no network connectivity between SNV and US1 - ims-983<br>
feature 12: 4310024, "Failed to get JDBC connection permit ims-980 (normal 0-100, abnormal 100-1000)<br>
feature 13: node status fair to critical its-980 (linked with feature 12)<br>
feature 14: cloud health index down more than N time in X minutes<br>
<br>
Feature 15: JVM Heap size getting filled & garbage collector not able to clean the data & frequently garbage collector is running<br>
Feature 16: log size getting filled drastically 10%->100% in an hour<br>
Feature 17: GT node restarting [Need more data]<br>
</p>

RecordDate  - Record date and time <br>

In [6]:
columns = ['RecordDate','AvgBackgroundQ', 'AvgThreadPoolSize', 'AvgWorkflowQ', 'CatalogSearchTime', 
           'Exception', 'LogSizeVolumePercent', 'NetworkConnectivitySNV-US1',  'NodeId', 'IsProductReleased',
          'UiNodeThreadsCount','CloudHealthIndex', 'Label'  
          ]

print('Dataset columns \n' , columns)

Dataset columns 
 ['RecordDate', 'AvgBackgroundQ', 'AvgThreadPoolSize', 'AvgWorkflowQ', 'CatalogSearchTime', 'Exception', 'LogSizeVolumePercent', 'NetworkConnectivitySNV-US1', 'NodeId', 'IsProductReleased', 'UiNodeThreadsCount', 'CloudHealthIndex', 'Label']


In [7]:
recordsCount = 10000

In [55]:
# Average BackgroudQ value is around 100-300 for normal & 1000+ abnormally
avgBackgroundQ = np.round(np.random.normal(200, 100, recordsCount-100)).astype(int)

# add abnormal values
avgBackgroundQ = np.append(avgBackgroundQ, np.round(np.random.normal(1000, 100, 100)).astype(int))


#Average WorkflowQ value is around 10-40
avgWorkflowQ = np.round(np.random.normal(20, 10, recordsCount)).astype(int)

#Average ThreadPoolSize would be around 8-20
avgThreadPoolSize = np.round(np.random.normal(8, 4, recordsCount)).astype(int)

# Most of the records don't have ID -1 represents no nodeid
nodeId = np.random.choice(np.append(np.arange(60,80), [-999]),recordsCount)

# Indicates if product is released
isProductReleased = np.random.choice([0,1], recordsCount)

# UI health index data 
uiNodeThreadCount = np.round(np.random.normal(250, 200, recordsCount)).astype(int)

# CloudHealthIndex
cloudHealthIndex = np.random.choice(['FAIR', 'GOOD', 'CRITICAL'], recordsCount)

# Exception occurred
exception = np.random.choice(['AWGenericException: java.lang.IllegalStateException','java.lang.OutOfMemoryException',
                              'NA', 'javax.net.ssl.SSLHandshakeException', 'spanning tree event',
                              'JDBC-connection-permit-failure', '[OutOfMemoryException, GT Nodes restarting]'], recordsCount)

# Catelog search time
catalogSearchTime = np.round(np.random.normal(20, 10, recordsCount)).astype(int)

# Network Connectivity broken ?
nwConnectivitySNV_US1 = np.random.choice([0,1], recordsCount)

# LogSize Volume Percent 0-1 . 
logSizeVolumePercent = np.round(np.random.random(recordsCount),decimals=3)
#np.random.randint(50,100,recordsCount)

# Record dates for every 30min from 1 Jan 2017, taking only given number of records
record_dates = pd.date_range(pd.to_datetime('01-01-2017', format='%d-%m-%Y'), 
                                  pd.to_datetime('31-12-2017', format='%d-%m-%Y'), freq='30min')[:recordsCount]


print('avgBackgroundQ = ', avgBackgroundQ)
print('avgWorkflowQ = ', avgWorkflowQ)
print('avgThreadPoolSize = ', avgThreadPoolSize)
print('nodeId = ', nodeId)
print('isProductReleased = ', isProductReleased)
print('uiNodeThreadCount = ', uiNodeThreadCount)
print('cloudHealthIndex = ', cloudHealthIndex)
print('exception = ', exception)
print('catalogSearchTime = ', catalogSearchTime)
print('nwConnectivitySNV_US1 = ', nwConnectivitySNV_US1)
print('logSizeVolumePercent = ', logSizeVolumePercent)
print('record_dates =\n', record_dates)


avgBackgroundQ =  [183  72 102 ... 993 889 939]
avgWorkflowQ =  [10  9 27 ... 26 28 34]
avgThreadPoolSize =  [ 6  2 14 ...  4  3  3]
nodeId =  [  65   60   79 ...   74   62 -999]
isProductReleased =  [0 0 0 ... 1 0 0]
uiNodeThreadCount =  [473 137 298 ...  97 418 389]
cloudHealthIndex =  ['FAIR' 'CRITICAL' 'FAIR' ... 'CRITICAL' 'GOOD' 'CRITICAL']
exception =  ['spanning tree event' 'JDBC-connection-permit-failure' 'NA' ... 'NA'
 'AWGenericException: java.lang.IllegalStateException'
 '[OutOfMemoryException, GT Nodes restarting]']
catalogSearchTime =  [16 30 20 ... 28 20 18]
nwConnectivitySNV_US1 =  [0 1 0 ... 1 1 1]
logSizeVolumePercent =  [0.306 0.908 0.58  ... 0.88  0.608 0.874]
record_dates =
 DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 00:30:00',
               '2017-01-01 01:00:00', '2017-01-01 01:30:00',
               '2017-01-01 02:00:00', '2017-01-01 02:30:00',
               '2017-01-01 03:00:00', '2017-01-01 03:30:00',
               '2017-01-01 04:00:00', '2017-01-01 0

In [56]:
# There are fixed Issue category

incidentTypes = ['NoIssue', 'DBConnectionIssue', 'InvoiceIssue', 'OrderIssue', 'CommunityIssue',
                 'WorkspaceIssue' , 'NetworkIssue', 'CommunityHealthIssue' ]

labels = np.random.choice(incidentTypes, recordsCount)
print('labels = ', +labels)


labels =  ['NoIssue' 'CommunityIssue' 'OrderIssue' ... 'WorkspaceIssue'
 'NetworkIssue' 'OrderIssue']


In [80]:
recordsDict= {
    'AvgBackgroundQ' : avgBackgroundQ, 'AvgWorkflowQ' : avgWorkflowQ, 'AvgThreadPoolSize' : avgThreadPoolSize,
    'NodeId' : nodeId, 'IsProductReleased' : productRelease, 'UiNodeThreadsCount':uiNodeThreadCount , 
    'Exception' : exception, 'CatalogSearchTime' : catalogSearchTime, 
    'NetworkConnectivitySNV-US1': nwConnectivitySNV_US1, 'LogSizeVolumePercent' : logSizeVolumePercent,
    'LabelName' : labels, 'Date' : record_dates, 'CloudHealthIndex' : cloudHealthIndex
}

sample1 = pd.DataFrame(data=recordsDict)
sample1.head()

Unnamed: 0,AvgBackgroundQ,AvgThreadPoolSize,AvgWorkflowQ,CatalogSearchTime,CloudHealthIndex,Date,Exception,IsProductReleased,LabelName,LogSizeVolumePercent,NetworkConnectivitySNV-US1,NodeId,UiNodeThreadsCount
0,183,6,10,16,FAIR,2017-01-01 00:00:00,spanning tree event,0,NoIssue,0.306,0,65,473
1,72,2,9,30,CRITICAL,2017-01-01 00:30:00,JDBC-connection-permit-failure,0,CommunityIssue,0.908,1,60,137
2,102,14,27,20,FAIR,2017-01-01 01:00:00,,1,OrderIssue,0.58,0,79,298
3,243,8,25,32,FAIR,2017-01-01 01:30:00,,0,NoIssue,0.097,0,77,198
4,218,6,49,7,CRITICAL,2017-01-01 02:00:00,"[OutOfMemoryException, GT Nodes restarting]",1,CommunityIssue,0.016,0,66,346


In [81]:
sample1.to_excel('SystemLogsDataset2306.xlsx',index=False)

PermissionError: [Errno 13] Permission denied: 'SystemLogsDataset2306.xlsx'

## SystemLogsDataset is created
#### Now we will train the model

### Feature Generation & Normalization

In [84]:
# Consider all negative values for AvgBackgroundQ, AvgThreadPoolSize, AvgWorkflowQ, CatalogSearchTime, UiNodeThreadsCount
# as Missing values and replace with -999

df = sample1.copy()

df['AvgBackgroundQ'][df['AvgBackgroundQ'] < 1] = np.NAN
df['AvgThreadPoolSize'][df['AvgThreadPoolSize'] < 1] = np.NAN
df['AvgWorkflowQ'][df['AvgWorkflowQ'] < 1] = np.NAN
df['CatalogSearchTime'][df['CatalogSearchTime'] < 1] = np.NAN
df['UiNodeThreadsCount'][df['UiNodeThreadsCount'] < 1] = np.NAN

# df['AvgBackgroundQ'].hist()
# df['AvgThreadPoolSize'].hist()
# df['AvgWorkflowQ'].hist()
# df['CatalogSearchTime'].hist()
# df['UiNodeThreadsCount'].hist()

df.head()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Rem

Unnamed: 0,AvgBackgroundQ,AvgThreadPoolSize,AvgWorkflowQ,CatalogSearchTime,CloudHealthIndex,Date,Exception,IsProductReleased,LabelName,LogSizeVolumePercent,NetworkConnectivitySNV-US1,NodeId,UiNodeThreadsCount
0,183.0,6.0,10.0,16.0,FAIR,2017-01-01 00:00:00,spanning tree event,0,NoIssue,0.306,0,65,473.0
1,72.0,2.0,9.0,30.0,CRITICAL,2017-01-01 00:30:00,JDBC-connection-permit-failure,0,CommunityIssue,0.908,1,60,137.0
2,102.0,14.0,27.0,20.0,FAIR,2017-01-01 01:00:00,,1,OrderIssue,0.58,0,79,298.0
3,243.0,8.0,25.0,32.0,FAIR,2017-01-01 01:30:00,,0,NoIssue,0.097,0,77,198.0
4,218.0,6.0,49.0,7.0,CRITICAL,2017-01-01 02:00:00,"[OutOfMemoryException, GT Nodes restarting]",1,CommunityIssue,0.016,0,66,346.0


In [82]:
# Fill NaN values with -999
df = df.fillna(-999)

In [86]:
#Assign number to label
df['Label'] = df['LabelName'].apply(incidentTypes.index)
df.head()

Unnamed: 0,AvgBackgroundQ,AvgThreadPoolSize,AvgWorkflowQ,CatalogSearchTime,CloudHealthIndex,Date,Exception,IsProductReleased,LabelName,LogSizeVolumePercent,NetworkConnectivitySNV-US1,NodeId,UiNodeThreadsCount,Label
0,183.0,6.0,10.0,16.0,FAIR,2017-01-01 00:00:00,spanning tree event,0,NoIssue,0.306,0,65,473.0,0
1,72.0,2.0,9.0,30.0,CRITICAL,2017-01-01 00:30:00,JDBC-connection-permit-failure,0,CommunityIssue,0.908,1,60,137.0,4
2,102.0,14.0,27.0,20.0,FAIR,2017-01-01 01:00:00,,1,OrderIssue,0.58,0,79,298.0,3
3,243.0,8.0,25.0,32.0,FAIR,2017-01-01 01:30:00,,0,NoIssue,0.097,0,77,198.0,0
4,218.0,6.0,49.0,7.0,CRITICAL,2017-01-01 02:00:00,"[OutOfMemoryException, GT Nodes restarting]",1,CommunityIssue,0.016,0,66,346.0,4
