Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update SHISO.py #52

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 16 additions & 4 deletions logparser/Drain/Drain.py
Expand Up @@ -4,13 +4,14 @@
License : MIT
"""

import re
import regex as re
import os
import numpy as np
import pandas as pd
import hashlib
from datetime import datetime

import math
from collections import Counter

class Logcluster:
def __init__(self, logTemplate='', logIDL=None):
Expand Down Expand Up @@ -212,7 +213,8 @@ def outputResult(self, logClustL):
self.df_log['EventId'] = log_templateids
self.df_log['EventTemplate'] = log_templates

if self.keep_para:
# if self.keep_para:
if log_templates:
self.df_log["ParameterList"] = self.df_log.apply(self.get_parameter_list, axis=1)
self.df_log.to_csv(os.path.join(self.savePath, self.logName + '_structured.csv'), index=False)

Expand Down Expand Up @@ -332,6 +334,15 @@ def generate_logformat_regex(self, logformat):
regex = re.compile('^' + regex + '$')
return headers, regex

def entropy(self, string):
''' Used to encrypt the string parameter in ParameterList.

:param string: the string parameter
:return: the encrypted number
'''
letter_num, length_string = Counter(string), float(len(string))
return np.round(-sum(count/length_string * math.log(count/length_string, 10) for count in letter_num.values()),5)

def get_parameter_list(self, row):
template_regex = re.sub(r"<.{1,5}>", "<*>", row["EventTemplate"])
if "<*>" not in template_regex: return []
Expand All @@ -341,4 +352,5 @@ def get_parameter_list(self, row):
parameter_list = re.findall(template_regex, row["Content"])
parameter_list = parameter_list[0] if parameter_list else ()
parameter_list = list(parameter_list) if isinstance(parameter_list, tuple) else [parameter_list]
return parameter_list
parameter_list = [self.entropy(string) for string in parameter_list]
return parameter_list
5 changes: 3 additions & 2 deletions logparser/SHISO/SHISO.py
Expand Up @@ -7,8 +7,9 @@
import re
import os
import time
import multiprocessing as mp
from nltk import ngrams
from Queue import *
# from Queue import *
import numpy as np
import pandas as pd
import hashlib
Expand Down Expand Up @@ -293,7 +294,7 @@ def Adjust(self, pn, nidx, n):

def outputResult(self, node):
templateNo = 1
nodeQ = Queue()
nodeQ = mp.Queue()
nodeQ.put(node)

templates = [0] * self.df_log.shape[0]
Expand Down