bin:analyse: add support for differential analyses

Previously analyse compared all patches under consideration disregarding previous evaluation results. This patch adds a new differential flag that utilises the existing evaluation results and only compares the newly added patches to the existing ones, reducing the number of comparisons. The differential evaluation process can be explained as follows: result = new_patches X existing_patches + new_patches X new_patches + existing_eval_results = new_patches X (new_patches + existing_patches) + existing_eval_results = new_patches X victims + existing_eval_results Signed-off-by: Rohit Sarkar <rohitsarkar5398@gmail.com>
rsarky · Aug 19, 2020 · 55da867 · 55da867
1 parent 4834ce8
commit 55da867
Showing 1 changed file with 34 additions and 7 deletions.
diff --git a/bin/pasta_analyse.py b/bin/pasta_analyse.py
@@ -35,7 +35,6 @@ def _evaluate_patch_list_wrapper(thresholds, args):
                                 orig, cand,
                                 parallelise=False)
 
-
 def find_cherries(repo, commit_hashes, dest_list):
     """
     find_cherries() takes a list of commit hashes, a list of potential
@@ -132,6 +131,10 @@ def analyse(config, prog, argv):
                              'e.g.: v0.1..v0.2 (default: %s)' %
                              config.upstream_range)
 
+    parser.add_argument('-differential', dest='differential', action='store_true',
+                        default=False,
+                        help='Perform a differential analysis')
+
     args = parser.parse_args(argv)
 
     config.thresholds.heading = args.thres_heading
@@ -147,6 +150,9 @@ def analyse(config, prog, argv):
         log.error('Analysis mode succ is not available in mailbox mode!')
         return -1
 
+    if not mbox and args.differential:
+        log.error('Differential analysis can only be performed in mailbox mode')
+
     f_cluster, cluster = config.load_cluster(must_exist=False)
 
     def fill_result(hashes, tag):
@@ -166,6 +172,7 @@ def fill_result(hashes, tag):
         # exists.
         config.load_ccache_mbox()
 
+        new_patches = set()
         if mode == 'rep':
             victims = repo.mbox.get_ids(config.mbox_time_window)
 
@@ -214,6 +221,9 @@ def fill_result(hashes, tag):
                 victims = linux_patches
                 repo.cache_evict_except(victims)
 
+
+            # get new downstream patches since previous analysis
+            new_patches = victims - cluster.get_downstream()
             log.info('Cached %d relevant mails' % len(available))
             fill_result(victims, False)
 
@@ -287,6 +297,8 @@ def fill_result(hashes, tag):
             else:
                 candidates = set(config.upstream_hashes)
 
+            # get new upstream patches since last analysis
+            new_patches |= candidates - cluster.get_upstream()
             fill_result(candidates, True)
 
             config.load_ccache_upstream()
@@ -307,12 +319,27 @@ def fill_result(hashes, tag):
 
             type = EvaluationType.PatchStack
 
-        log.info('Starting evaluation')
-        evaluation_result = evaluate_commit_list(repo, config.thresholds,
-                                                 mbox, type,
-                                                 representatives, candidates,
-                                                 parallelise=True, verbose=True,
-                                                 cpu_factor=args.cpu_factor)
+        if args.differential:
+            representatives = representatives | new_patches
+            log.info('Starting differential evaluation of %u new patches' % len(new_patches))
+            differential_evaluation = evaluate_commit_list(repo, config.thresholds,
+                                                           mbox, type,
+                                                           representatives, new_patches,
+                                                           parallelise=True, verbose=True,
+                                                           cpu_factor=args.cpu_factor)
+            evaluation_result = EvaluationResult.from_file(config.f_evaluation_result,
+                                                           config.d_false_positives)
+            if evaluation_result:
+                evaluation_result.merge(differential_evaluation)
+            else:
+                evaluation_result = differential_evaluation
+        else:
+            log.info('Starting evaluation')
+            evaluation_result = evaluate_commit_list(repo, config.thresholds,
+                                                     mbox, type,
+                                                     representatives, candidates,
+                                                     parallelise=True, verbose=True,
+                                                     cpu_factor=args.cpu_factor)
         log.info('  ↪ done.')
 
     evaluation_result.merge(cherries)