Merge pull request #111 from Siyuexi/main

Fixed a potential bugs that may lead to positional misalignment between pred and gt on different device.
logpai · Jan 16, 2024 · 7e6f6cd · 7e6f6cd
2 parents 097ac7f + e39f71c
commit 7e6f6cd
Show file tree

Hide file tree

Showing 2 changed files with 92 additions and 82 deletions.
diff --git a/logparser/DivLog/DivLog.py b/logparser/DivLog/DivLog.py
@@ -11,6 +11,7 @@
 from random import sample
 from sklearn.model_selection import train_test_split
 from openai.embeddings_utils import get_embedding, cosine_similarity
+from collections import Counter
 
 
 def dpp(kernel_matrix, max_length, epsilon=1E-10):
@@ -63,15 +64,73 @@ def DPPsplit(log_list, groundtruth_template, candidate_idx):
     test_templates = [groundtruth_template[idx] for idx in test_idx]
     return test_logs, cand_logs, test_templates, cand_templates
 
-def evaluateGA(dataset, groundtruth, result):
+# calculate parsing accuracy
+def evaluatePA(groundtruth, result):
+    # len(predicted_list) may smaller than len(groundtruth)
+    length = len(result['template'])
+    if length == 0: return 0
+    correct = 0
+    for i in range(length):
+        if result['template'][i] == groundtruth.loc[groundtruth['Content'] == result['log'][i]]['EventTemplate'].values[0]:
+            correct += 1
+    return correct/length
+
+# correctly identified templates over total num of identified template
+def evaluatePTA(groundtruth, result):
+    # generate a "template: log indexes list" mapping for groundtruth
+    oracle_tem_dict = {}
+    for idx in range(len(result['template'])):
+        if groundtruth['EventTemplate'][idx] not in oracle_tem_dict:
+          oracle_tem_dict[groundtruth['EventTemplate'][idx]] = [groundtruth['Content'][idx]]
+        else: oracle_tem_dict[groundtruth['EventTemplate'][idx]].append(groundtruth['Content'][idx])
+
+    # generate mapping for identified template
+    result_tem_dict = {}
+    for idx in range(len(result['template'])):
+        if result['template'][idx] not in result_tem_dict:
+          result_tem_dict[result['template'][idx]] = [result['log'][idx]]
+        else: result_tem_dict[result['template'][idx]].append(result['log'][idx])
+
+    correct_num = 0
+    for key in result_tem_dict.keys():
+        if key not in oracle_tem_dict: continue
+        else:
+          if Counter(oracle_tem_dict[key]) == Counter(result_tem_dict[key]): correct_num += 1
+
+    return correct_num/len(result_tem_dict)
+
+# correctly identified templates over total num of oracle template
+def evaluateRTA(groundtruth, result):
+    # generate a "template: log indexes list" mapping for groundtruth
+    oracle_tem_dict = {}
+    for idx in range(len(result['template'])):
+        if groundtruth['EventTemplate'][idx] not in oracle_tem_dict:
+          oracle_tem_dict[groundtruth['EventTemplate'][idx]] = [groundtruth['Content'][idx]]
+        else: oracle_tem_dict[groundtruth['EventTemplate'][idx]].append(groundtruth['Content'][idx])
+
+    # generate mapping for identified template
+    result_tem_dict = {}
+    for idx in range(len(result['template'])):
+        if result['template'][idx] not in result_tem_dict:
+          result_tem_dict[result['template'][idx]] = [result['log'][idx]]
+        else: result_tem_dict[result['template'][idx]].append(result['log'][idx])
+
+    correct_num = 0
+    for key in oracle_tem_dict.keys():
+        if key not in result_tem_dict: continue
+        else:
+          if Counter(oracle_tem_dict[key]) == Counter(result_tem_dict[key]): correct_num += 1
+
+    return correct_num/len(oracle_tem_dict)
+
+# calculate grouping accuracy
+def evaluateGA(groundtruth, result):
     # load logs and templates
-    df_groundtruth = pd.read_csv(groundtruth)
-    df_parsedlog = pd.read_csv(result)
-    compared_list = df_parsedlog['log'].tolist()
+    compared_list = result['log'].tolist()
 
     # select groundtruth logs that have been parsed
     parsed_idx = []
-    for idx, row in df_groundtruth.iterrows():
+    for idx, row in groundtruth.iterrows():
         if row['Content'] in compared_list:
             parsed_idx.append(idx)
             compared_list.remove(row['Content'])
@@ -81,11 +140,11 @@ def evaluateGA(dataset, groundtruth, result):
         print("Wrong number of groundtruth logs!")
         return 0
 
-    df_groundtruth = df_groundtruth.loc[parsed_idx]
+    groundtruth = groundtruth.loc[parsed_idx]
 
     # grouping
     groundtruth_dict = {}
-    for idx, row in df_groundtruth.iterrows():
+    for idx, row in groundtruth.iterrows():
         if row['EventTemplate'] not in groundtruth_dict:
             # create a new key
             groundtruth_dict[row['EventTemplate']] = [row['Content']]
@@ -94,7 +153,7 @@ def evaluateGA(dataset, groundtruth, result):
             groundtruth_dict[row['EventTemplate']].append(row['Content'])
 
     result_dict = {}
-    for idx, row in df_parsedlog.iterrows():
+    for idx, row in result.iterrows():
         if row['template'] not in result_dict:
             # create a new key
             result_dict[row['template']] = [row['log']]
@@ -153,7 +212,7 @@ def __init__(self,
     self.log_test, self.log_cand, self.gt_test, self.gt_cand = self.splitCandidates(self.log_path, self.cand_ratio, self.split_method)
 
     # build lookup map
-    # self.lookUpMap = self.buildLookupMap(self.map_path)
+    self.lookUpMap = self.buildLookupMap(self.map_path)
 
   # generate lookup map
   def buildLookupMap(self, map_path):
@@ -267,73 +326,6 @@ def generatePrompt(self, log, nearest_num=5):
       similarist_gt = self.gt_cand[idxes[0]]
       return prompt, similarist_gt
 
-  # compare if template is correctly extracted: if yes, return 1; else return 0
-  def compareTemplate(self, tpl_1, tpl_2):
-      token_list_1 = tpl_1.split()
-      token_list_2 = tpl_2.split()
-      if (len(token_list_1) != len(token_list_2)): return 0
-      length = len(token_list_1)
-      for i in range(length):
-        if (token_list_1[i] != token_list_2[i]): return 0
-      return 1;
-
-  # calculate parsing accuracy
-  def evaluatePA(self, result):
-      # len(result) may smaller than len(groundtruth)
-      length = len(result)
-      if length == 0: return 0
-      correct = 0
-      for i in range(length):
-        correct += self.compareTemplate(result[i], self.gt_test[i])
-      return correct/length
-
-  # correctly identified templates over total num of identified template
-  def evaluatePTA(self, result):
-      # generate a "template: log indexes list" mapping for groundtruth
-      oracle_tem_dict = {}
-      for idx in range(len(result)):
-          if self.gt_test[idx] not in oracle_tem_dict:
-            oracle_tem_dict[self.gt_test[idx]] = [idx]
-          else: oracle_tem_dict[self.gt_test[idx]].append(idx)
-
-      # generate mapping for identified template
-      result_tem_dict = {}
-      for idx in range(len(result)):
-          if result[idx] not in result_tem_dict:
-            result_tem_dict[result[idx]] = [idx]
-          else: result_tem_dict[result[idx]].append(idx)
-
-      correct_num = 0
-      for key in result_tem_dict.keys():
-          if key not in oracle_tem_dict: continue
-          else:
-            if oracle_tem_dict[key] == result_tem_dict[key]: correct_num += 1
-
-      return correct_num/len(result_tem_dict)
-
-  # correctly identified templates over total num of oracle template
-  def evaluateRTA(self, result):
-      oracle_tem_dict = {}
-      for idx in range(len(result)):
-          if self.gt_test[idx] not in oracle_tem_dict:
-            oracle_tem_dict[self.gt_test[idx]] = [idx]
-          else: oracle_tem_dict[self.gt_test[idx]].append(idx)
-
-      # generate mapping for identified template
-      result_tem_dict = {}
-      for idx in range(len(result)):
-          if result[idx] not in result_tem_dict:
-            result_tem_dict[result[idx]] = [idx]
-          else: result_tem_dict[result[idx]].append(idx)
-
-      correct_num = 0
-      for key in oracle_tem_dict.keys():
-          if key not in result_tem_dict: continue
-          else:
-            if oracle_tem_dict[key] == result_tem_dict[key]: correct_num += 1
-
-      return correct_num/len(oracle_tem_dict)
-
   def writeResult(self, result, path, limit):
       output = pd.DataFrame(data={"log": self.log_test[:limit], "template": result})
       output.to_csv(path, index=False)
@@ -380,8 +372,8 @@ def BatchParse(self, model, model_name, limit, N=5):
                                                   prompt=instruction + "\n\n\n" + prompt + "<prompt>:" + line.strip() + "\n<extraction>: ", 
                                                   temperature=temperature,
                                                   max_tokens=token_len)
-            except: # if interrupt by request busy
-              print("Request busy, log {} is now waiting ...".format(line_idx))
+            except Exception as e: # if exception occurs
+              print(e)
               re_id += 1
               if re_id < 5:
                 time.sleep(0.1)
@@ -417,10 +409,12 @@ def BatchParse(self, model, model_name, limit, N=5):
           df = pd.DataFrame(columns=['Dataset', 'Parsing Accuracy', 'Precision Template Accuracy', 'Recall Template Accuracy', 'Grouping Accuracy'])
         else:
           df = pd.read_csv("DivLog_bechmark_result.csv")
-        PA = self.evaluatePA(answer_list)
-        PTA = self.evaluatePTA(answer_list)
-        RTA = self.evaluateRTA(answer_list)
-        GA = evaluateGA(self.dataset, self.log_path, self.result_path)
+        df_groundtruth = pd.read_csv(self.log_path)
+        df_parsedlog = pd.read_csv(self.result_path)
+        PA = evaluatePA(df_groundtruth, df_parsedlog)
+        PTA = evaluatePTA(df_groundtruth, df_parsedlog)
+        RTA = evaluateRTA(df_groundtruth, df_parsedlog)
+        GA = evaluateGA(df_groundtruth, df_parsedlog)
         print("{}:\t PA:\t{:.6f}\tPTA:\t{:.6f}\tRTA:\t{:.6f}\tGA:\t{:.6f}".format(self.dataset, PA, PTA, RTA, GA))
         if self.dataset not in df['Dataset'].values:
           df.loc[len(df)] = [self.dataset, PA, PTA, RTA, GA]

diff --git a/logparser/DivLog/README.md b/logparser/DivLog/README.md
@@ -40,6 +40,22 @@ If you wish to re-run all the results (which may cost much time and api budget),
 rm -r results
 ```
 
+#### Attention:
+
+OpenAI has [shut down](https://platform.openai.com/docs/deprecations/2023-07-06-gpt-and-embeddings) the *Text Completion API* for the GPT-3 model series (`ada`,`babbage`,`curie`,`davinci`) as of January 4th, 2024. If you wish to apply the DivLog framework on other OpenAI *Chat Completion APIs* and re-run all the results, you may need to modify the API request in `BatchParse` of `DivLog.py`. Specifically, you need to replace the original API request design for GPT-3 models with the latest Chat Completion API:
+
+```python
+### Replace it
+response = openai.Completion.create(
+                                    model=model, 
+                                    prompt=instruction + "\n\n\n" + prompt + "<prompt>:" + line.strip() + "\n<extraction>: ", 
+                                    temperature=temperature,
+                                    max_tokens=token_len)
+```
+
+More details about APIs can be found [here](https://platform.openai.com/docs/api-reference/chat).
+
+
 ### Benchmark
 
 Running the benchmark script on Loghub_2k datasets, you could obtain the following results.