# Find bug seeding patterns
---
We want to find seeding patterns that may be used by SemSeed. This is supposed
to create a table comparing *Mutandis*.

In [1]:
import pandas as pd
import json
import os
from pathlib import Path
import codecs
import json
from typing import List, Dict, Any
import pandas as pd
from multiprocessing import Pool, cpu_count
from tqdm.notebook import trange, tqdm
benchmarks_dir = '../benchmarks'

# SemSeed
seeded_bugs_file_path  = os.path.join(benchmarks_dir, 'js_benchmark_seeded_bugs_efficiency_test.pkl')
seeded_bugs_files_dir  = os.path.join(benchmarks_dir, 'js_benchmark_seeded_bugs_efficiency_test')

In [59]:
def read_json_file(json_file_path)->Dict:
    try:
        obj_text = codecs.open(json_file_path, 'r', encoding='utf-8').read()
        r = json.loads(obj_text)
        return r
    except FileNotFoundError:
        print(
            "Please provide a correct file p. Eg. ./results/validated-conflicts.json")
        return {}
    except Exception as e:
        # Empty JSON file most likely due to abrupt killing of the process while writing
        # print (e)
        return {}

def read_dataset_given_files(extracted_data_files: List) -> pd.DataFrame:
    d = []
    with Pool(cpu_count()) as p:
        with tqdm(total=len(extracted_data_files)) as pbar:
            pbar.set_description_str(
                desc="Reading dataset from files", refresh=False)
            for i, each_vars in enumerate(
                    p.imap_unordered(read_json_file, extracted_data_files, 20)):
                pbar.update()
                d.append(each_vars)
            p.close()
            p.join()
    # print(d)
    extracted_dataset = pd.DataFrame(d)
    return extracted_dataset

def file_path_to_dataset(dataset_file_path, dir_path):
    if not Path(dataset_file_path).is_file():
        file_paths = list(Path(dir_path).rglob('*.json'))
        print(f"Number of files={len(file_paths)}")
        dataset = read_dataset_given_files(extracted_data_files=file_paths)
        print(f"Saving {dataset_file_path}")
        dataset.to_pickle(dataset_file_path,'gzip')
    else:
        print(f'Reading from {dataset_file_path}')
        dataset = pd.read_pickle(dataset_file_path,'gzip')
    print(f"Dataset contains {len(dataset)} examples")
    return dataset

In [60]:
seeded_bugs = file_path_to_dataset(dataset_file_path=seeded_bugs_file_path, dir_path=seeded_bugs_files_dir)

Reading from ../../../benchmarks/js_benchmark_seeded_bugs_efficiency_test.pkl
Dataset contains 677217 examples


### Syntax errors in seeded bugs
The above bugs have been seeded on a random sample of 1000 files.

In [7]:
print(f'Seeded {len(seeded_bugs)} bugs on random sample of 1000 files')

Seeded 677217 bugs on random sample of 1000 files


In [5]:
syntax_check = read_json_file(os.path.join(benchmarks_dir,'file_and_syntax_correct.json'))
inc = 0
for f,v in syntax_check.items():
    if not v:
        inc+=1
print(f'{inc} or {inc/len(seeded_bugs)*100}% Syntactically incorrect')

20320 or 3.0005153444169297% Syntactically incorrect


In [36]:
def get_only_idf_lit_containing_patterns(all_changes):
    """
    It is possible that every bug-fix pattern can not be used to seed bugs.
    We filter some of them here. For example:
        * we may filter very long change patterns (although we do it once while aggregating data from MongoDB)
        * we may select only those chage patterns that has atleast 'N' frequency
    """
    filtered_change_patterns = []

    # # ----------------------- Filtering number of tokens -------------------------
    # max_number_of_tokens = 10
    # for change_pattern in self.all_training_change_patterns:
    #     print('\n\n \t *** ***  Selecting only change patterns having total {} tokens *** ***'.format(max_number_of_tokens*2))
    #     if len(change_pattern['fix']) <= max_number_of_tokens and len(change_pattern['buggy']) <= max_number_of_tokens:
    #         filtered_change_patterns.append(change_pattern)

    # ----------------------- Filtering based on the frequency of the change patterns -----------------
    # min_frequency = 4
    # print('\n \t *** ***  Filtering only change patterns having minimum frequency  {} *** ***\n'.format(min_frequency))
    # mapping_of_change_patterns = SeedBugs._str_mapping_change_pattern_to_change(
    #     all_changes)

    # for mapped_seq in mapping_of_change_patterns:
    #     if len(mapping_of_change_patterns[mapped_seq]) >= min_frequency:
    #         filtered_change_patterns.extend(
    #             mapping_of_change_patterns[mapped_seq])

    # print("\tTotal {} change patterns and {} filtered change patterns ".format(
    #     len(mapping_of_change_patterns), len(filtered_change_patterns)))

    # ------------------- Remove those change patterns that does not contain any Identifiers/Literals ------------
    for t in all_changes:
        # If the change pattern contains at-least one Identifier/Literal, we use that.
        # Else the change pattern is discarded
        if 'Idf_' in ' '.join(t['fix']) or 'Idf_' in ' '.join(t['buggy']) or 'Lit_' in ' '.join(
                t['fix']) or 'Lit_' in ' '.join(t['buggy']):
            filtered_change_patterns.append(t)

    return filtered_change_patterns

In [37]:
with open(os.path.join(benchmarks_dir, 'training_changes.json'),'r') as f:
    patterns = json.load(f)
patterns = get_only_idf_lit_containing_patterns(patterns)
patterns  = pd.DataFrame(patterns)
print(f'Found {len(patterns)} patterns')

Found 2880 patterns


In [38]:
def select_only_single_token_changes(all_change_patterns):
    only_single_operator_change = []
    only_single_idf_change = []
    only_single_lit_change = []

    for _,r in patterns.iterrows():
        pattern = r.to_dict()
        # Operator ------------------
        f = []
        b = []
        for t in pattern['fix']:
            if 'Lit_' not in t and 'Idf_' not in t:
                f.append(t)
        for t in pattern['buggy']:
            if 'Lit_' not in t and 'Idf_' not in t:
                b.append(t)
        
        di_f_b_op = len(set(f)-set(b)) == 1
        di_b_f_op = len(set(b)-set(f)) == 1
        

        # Identifier and Literal ------------
        f_idf = []
        b_idf = []
        for t in pattern['fix']:
            if 'Idf_' in t:
                f_idf.append(t)
        for t in pattern['buggy']:
            if 'Idf_' in t:
                b_idf.append(t)
        
        di_f_b_idf = len(set(f_idf)-set(b_idf)) == 1
        di_b_f_idf = len(set(b_idf)-set(f_idf)) == 1
        
        
        
        f_lit = []
        b_lit = []
        for t in pattern['fix']:
            if 'Lit_' in t:
                f_lit.append(t)
        for t in pattern['buggy']:
            if 'Lit_' in t:
                b_lit.append(t)
        
        di_f_b_lit = len(set(f_lit)-set(b_lit)) == 1
        di_b_f_lit = len(set(b_lit)-set(f_lit)) == 1
        
        if di_f_b_op and di_b_f_op and di_f_b_idf==0==di_b_f_idf and di_f_b_lit==0== di_b_f_lit:
            only_single_operator_change.append(pattern)

        if di_f_b_idf and di_b_f_idf and di_f_b_op == 0 == di_b_f_op and di_f_b_lit==0== di_b_f_lit:
            only_single_idf_change.append(pattern)

        if di_f_b_lit and di_b_f_lit and di_f_b_op == 0 == di_b_f_op and di_f_b_idf==0==di_b_f_idf:
            only_single_lit_change.append(pattern)
    return pd.DataFrame(only_single_operator_change), pd.DataFrame(only_single_idf_change), pd.DataFrame(only_single_lit_change)

single_operator_patterns, single_idf_patterns, single_lit_patterns = select_only_single_token_changes(patterns)


In [42]:
single_operator_patterns

Unnamed: 0,_id,commit_hash,commit_time,local_repo_path,url,fix,fix_tokenType,fix_file_path,fix_actual,fix_range,fix_line,buggy,buggy_tokenType,buggy_file_path,buggy_actual,buggy_range,buggy_line
0,mozilla/pdf.js_406c2b2f2b5dfeda4c5ab1132965e9a...,406c2b2f2b5dfeda4c5ab1132965e9ad7844e05c,"23/08/2011, 16:49:25",../results/top_JS_repos/pdf.js,https://github.com/mozilla/pdf.js/commit/406c2...,"[this, ., Idf_1, ., Idf_2, <=, Idf_3]",BinaryExpression,pdf.js,"[this, ., current, ., fontSize, <=, kRasterize...","[152702, 152741]",4730-4730,"[this, ., Idf_1, ., Idf_2, <, Idf_3]",BinaryExpression,pdf.js,"[this, ., current, ., fontSize, <, kRasterizer...","[152702, 152740]",4730-4730
1,mozilla/pdf.js_2ad3a8bd1c3e237a310239de168709d...,2ad3a8bd1c3e237a310239de168709d8143d5d9c,"24/09/2011, 04:29:01",../results/top_JS_repos/pdf.js,https://github.com/mozilla/pdf.js/commit/2ad3a...,"[!, Idf_1, (, Idf_2, ), ||, Idf_2, ., Idf_3, !...",LogicalExpression,pdf.js,"[!, IsArray, (, obj, ), ||, obj, ., length, !=...","[104589, 104622]",3325-3325,"[!, Idf_1, (, Idf_2, ), ||, Idf_2, ., Idf_3, =...",LogicalExpression,pdf.js,"[!, IsArray, (, obj, ), ||, obj, ., length, ==...","[104589, 104622]",3325-3325
2,sequelize/sequelize_1b593a74942ce2295a2a767823...,1b593a74942ce2295a2a767823dbea22d5efc30b,"11/12/2011, 12:07:13",../results/top_JS_repos/sequelize,https://github.com/sequelize/sequelize/commit/...,"[typeof, Idf_1, !=, Lit_1]",BinaryExpression,lib/connectors/mysql/query-generator.js,"[typeof, indexName, !=, 'string']","[7877, 7905]",213-213,"[typeof, Idf_1, ==, Lit_1]",BinaryExpression,lib/connectors/mysql/query-generator.js,"[typeof, indexName, ==, 'string']","[7877, 7905]",213-213
3,mozilla/pdf.js_499a9b0146bf2b0dbf06963e16a970c...,499a9b0146bf2b0dbf06963e16a970c4fa8f1586,"12/01/2012, 16:15:45",../results/top_JS_repos/pdf.js,https://github.com/mozilla/pdf.js/commit/499a9...,"[Idf_1, ., Idf_2, ., Idf_3, >, Lit_1]",BinaryExpression,web/viewer.js,"[textDiv, ., dataset, ., textLength, >, 1]","[30845, 30875]",976-976,"[Idf_1, ., Idf_2, ., Idf_3, >=, Lit_1]",BinaryExpression,web/viewer.js,"[textDiv, ., dataset, ., textLength, >=, 1]","[30845, 30876]",976-976
4,adobe/brackets_af1eba2ea6344298763e77b725e54a2...,af1eba2ea6344298763e77b725e54a2eb545d3fd,"07/04/2012, 02:29:25",../results/top_JS_repos/brackets,https://github.com/adobe/brackets/commit/af1eb...,"[Idf_1, ., Idf_2, >=, Idf_3, ., Idf_4]",BinaryExpression,src/editor/CSSInlineEditor.js,"[cursorCoords, ., x, >=, ruleListOffset, ., left]","[15860, 15897]",353-353,"[Idf_1, ., Idf_2, >, Idf_3, ., Idf_4]",BinaryExpression,src/editor/CSSInlineEditor.js,"[cursorCoords, ., x, >, ruleListOffset, ., left]","[15860, 15896]",353-353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,serverless/serverless_178fab9473a4704ee1640138...,178fab9473a4704ee16401381ad58f097806a72a,"01/06/2017, 08:35:37",../results/top_JS_repos/serverless,https://github.com/serverless/serverless/commi...,"[`Missing ""handler"" property in function ""${, ...",TemplateLiteral,lib/plugins/aws/package/compile/functions/inde...,"[`Missing ""handler"" property in function ""${, ...","[3743, 3802]",101-101,"[`Missing ""handler"" property in function """"${,...",TemplateLiteral,lib/plugins/aws/package/compile/functions/inde...,"[`Missing ""handler"" property in function """"${,...","[3743, 3803]",101-101
70,nodejs/node_fc6f487f2e2eaa54e2de38b1cf9529dec5...,fc6f487f2e2eaa54e2de38b1cf9529dec5f8fcd5,"05/06/2017, 22:44:12",../results/top_JS_repos/node,https://github.com/nodejs/node/commit/fc6f487f...,"[var, Idf_1, =, `${, Idf_2, }`, ;]",VariableDeclaration,lib/internal/process/warning.js,"[var, msg, =, `${, prefix, }`, ;]","[2158, 2180]",93-93,"[let, Idf_1, =, `${, Idf_2, }`, ;]",VariableDeclaration,lib/internal/process/warning.js,"[let, msg, =, `${, prefix, }`, ;]","[2158, 2180]",93-93
71,RocketChat/Rocket.Chat_69b78031e98827de07929d1...,69b78031e98827de07929d1b33d4bc2c83362e2a,"24/06/2017, 13:04:54",../results/top_JS_repos/Rocket.Chat,https://github.com/RocketChat/Rocket.Chat/comm...,"[const, Idf_1, =, [, ], ;]",VariableDeclaration,packages/rocketchat-analytics/client/loadScrip...,"[const, domains, =, [, ], ;]","[1364, 1383]",30-30,"[let, Idf_1, =, [, ], ;]",VariableDeclaration,packages/rocketchat-analytics/client/loadScrip...,"[let, domains, =, [, ], ;]","[1364, 1381]",30-30
72,expressjs/express_daf66beda49ebac6086b81dd1896...,daf66beda49ebac6086b81dd1896a34395306a71,"04/08/2017, 03:20:55",../results/top_JS_repos/express,https://github.com/expressjs/express/commit/da...,"[Idf_1, ., Idf_2, (, Idf_3, ., Idf_4, (, Idf_5...",ExpressionStatement,examples/ejs/index.js,"[app, ., use, (, express, ., static, (, path, ...","[671, 727]",29-29,"[Idf_1, ., Idf_2, (, Idf_3, ., Idf_4, (, Idf_5...",ExpressionStatement,examples/ejs/index.js,"[app, ., use, (, express, ., static, (, path, ...","[671, 728]",29-29


In [41]:
single_idf_patterns

Unnamed: 0,_id,commit_hash,commit_time,local_repo_path,url,fix,fix_tokenType,fix_file_path,fix_actual,fix_range,fix_line,buggy,buggy_tokenType,buggy_file_path,buggy_actual,buggy_range,buggy_line
0,jquery/jquery_065cd7ecf62c7b2ad5b58d447aca7a9c...,065cd7ecf62c7b2ad5b58d447aca7a9cdb6f7bcb,"18/08/2006, 06:13:13",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/065cd7...,"[Idf_1, ., Idf_2, [, Idf_3, ]]",MemberExpression,src/jquery/jquery.js,"[jQuery, ., fn, [, n, ]]","[24094, 24108]",856-856,"[Idf_1, ., Idf_2, [, Idf_4, ]]",MemberExpression,src/jquery/jquery.js,"[jQuery, ., fn, [, i, ]]","[24094, 24108]",856-856
1,jquery/jquery_ea53a61a38ecdddaea6f344fcaf74bfe...,ea53a61a38ecdddaea6f344fcaf74bfe1d37397c,"15/09/2006, 00:28:25",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/ea53a6...,"[Idf_1, ., Idf_2, (, Idf_3, ), ;]",ExpressionStatement,src/jquery/jquery.js,"[args, ., unshift, (, event, ), ;]","[68660, 68682]",2135-2135,"[Idf_1, ., Idf_4, (, Idf_3, ), ;]",ExpressionStatement,src/jquery/jquery.js,"[args, ., unshiftT, (, event, ), ;]","[68660, 68683]",2135-2135
2,jquery/jquery_cafd40805a20d02fe1a904a4c0a0cc6f...,cafd40805a20d02fe1a904a4c0a0cc6f0c525f80,"10/10/2006, 17:00:54",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/cafd40...,"[!, Idf_1, ., Idf_2, ., Idf_3]",UnaryExpression,src/fx/fx.js,"[!, z, ., el, ., oldOverflow]","[16240, 16257]",512-512,"[!, Idf_1, ., Idf_2, ., Idf_4]",UnaryExpression,src/fx/fx.js,"[!, z, ., el, ., oldOverlay]","[16240, 16256]",512-512
3,jquery/jquery_f38251b2ce84a0ab6302475786c3e721...,f38251b2ce84a0ab6302475786c3e721dba2177c,"10/09/2007, 01:17:30",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/f38251...,"[Idf_1, :, Lit_1]",Property,src/core.js,"[prevAll, :, ""jQuery.dir(a,'previousSibling')""]","[26728, 26770]",1023-1023,"[Idf_2, :, Lit_1]",Property,src/core.js,"[prevtAll, :, ""jQuery.dir(a,'previousSibling')""]","[26728, 26771]",1023-1023
4,jquery/jquery_5736e8d90d5c7961e19cb707301d9f58...,5736e8d90d5c7961e19cb707301d9f58daa0aa3b,"18/10/2007, 19:25:08",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/5736e8...,"[Idf_1, ., Idf_2, ., Idf_3, &&, Idf_4, [, Lit_...",LogicalExpression,src/core.js,"[jQuery, ., browser, ., safari, &&, window, [,...","[36258, 36307]",1299-1299,"[Idf_1, ., Idf_2, ., Idf_3, &&, Idf_6, [, Lit_...",LogicalExpression,src/core.js,"[jQuery, ., browser, ., safari, &&, self, [, ""...","[36258, 36305]",1299-1299
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,transloadit/uppy_198cdee73369aa585b4c021e56639...,198cdee73369aa585b4c021e56639062020309e6,"06/08/2017, 09:59:35",../results/top_JS_repos/uppy,https://github.com/transloadit/uppy/commit/198...,"[return, `${, this, ., Idf_1, ., Idf_2, }/${, ...",ReturnStatement,src/uppy-base/src/plugins/Provider.js,"[return, `${, this, ., opts, ., host, }/${, th...","[795, 840]",36-36,"[return, `${, this, ., Idf_1, ., Idf_2, }/${, ...",ReturnStatement,src/uppy-base/src/plugins/Provider.js,"[return, `${, this, ., opts, ., host, }/${, th...","[795, 850]",36-36
392,transloadit/uppy_336e7d1663e244831df2bedddc8a9...,336e7d1663e244831df2bedddc8a9f2cb983053d,"09/08/2017, 18:19:23",../results/top_JS_repos/uppy,https://github.com/transloadit/uppy/commit/336...,"[this, ., Idf_1, ., Idf_2, (, )]",CallExpression,src/generic-provider-views/index.js,"[this, ., Provider, ., checkAuth, (, )]","[3083, 3108]",99-99,"[this, ., Idf_1, ., Idf_3, (, )]",CallExpression,src/generic-provider-views/index.js,"[this, ., Provider, ., auth, (, )]","[3083, 3103]",99-99
393,transloadit/uppy_ae530aed6a257ff091b38546d9eb0...,ae530aed6a257ff091b38546d9eb00dbbba3dc39,"30/08/2017, 14:21:51",../results/top_JS_repos/uppy,https://github.com/transloadit/uppy/commit/ae5...,"[this, ., Idf_1, ., Idf_2, (, Idf_3, )]",ExpressionStatement,src/plugins/Webcam/index.js,"[this, ., core, ., addFile, (, file, )]","[5359, 5382]",171-171,"[this, ., Idf_1, ., Idf_4, (, Idf_3, )]",ExpressionStatement,src/plugins/Webcam/index.js,"[this, ., core, ., addfile, (, file, )]","[5359, 5382]",171-171
394,RocketChat/Rocket.Chat_94699eefc29743da8ab0f03...,94699eefc29743da8ab0f03c6937cde35f359a21,"31/08/2017, 16:32:14",../results/top_JS_repos/Rocket.Chat,https://github.com/RocketChat/Rocket.Chat/comm...,"[Idf_1, =, Idf_2, ., Idf_3, ., Idf_4, (, Idf_1...",ExpressionStatement,packages/rocketchat-autotranslate/server/autot...,"[message, =, RocketChat, ., Markdown, ., parse...","[2661, 2716]",94-94,"[Idf_1, =, Idf_2, ., Idf_3, ., Idf_5, (, Idf_1...",ExpressionStatement,packages/rocketchat-autotranslate/server/autot...,"[message, =, RocketChat, ., Markdown, ., parse...","[2661, 2706]",94-94


In [40]:
single_lit_patterns

Unnamed: 0,_id,commit_hash,commit_time,local_repo_path,url,fix,fix_tokenType,fix_file_path,fix_actual,fix_range,fix_line,buggy,buggy_tokenType,buggy_file_path,buggy_actual,buggy_range,buggy_line
0,jquery/jquery_69ae9aeab0bae23f0fb6673d5ad58ffc...,69ae9aeab0bae23f0fb6673d5ad58ffce44c7841,"06/06/2006, 14:51:44",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/69ae9a...,"[Lit_1, :, Lit_2]",Property,jquery/jquery.js,"['#', :, ""a.attributes['id'].nodeValue == m[2]""]","[9350, 9393]",401-401,"[Lit_1, :, Lit_3]",Property,jquery/jquery.js,"['#', :, ""a.getAttribute('id') == m[2]""]","[9350, 9385]",401-401
1,jquery/jquery_32548da2476603b40df3a15afb536b27...,32548da2476603b40df3a15afb536b2750321aec,"05/07/2006, 04:03:38",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/32548d...,"[var, Idf_1, =, Idf_2, ., Idf_3, (, this, ., I...",VariableDeclaration,jquery/jquery.js,"[var, ret, =, jQuery, ., map, (, this, ., cur,...","[6344, 6390]",274-274,"[var, Idf_1, =, Idf_2, ., Idf_3, (, this, ., I...",VariableDeclaration,jquery/jquery.js,"[var, ret, =, jQuery, ., map, (, this, ., cur,...","[6344, 6390]",274-274
2,jquery/jquery_f7efcc858dec55fa3b9babb94ebc73b0...,f7efcc858dec55fa3b9babb94ebc73b0888e6d51,"31/08/2006, 05:59:51",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/f7efcc...,"[Idf_1, :, Lit_1]",Property,src/jquery/jquery.js,"[children, :, ""jQuery.sibling(a.firstChild)""]","[67382, 67422]",2386-2386,"[Idf_1, :, Lit_2]",Property,src/jquery/jquery.js,"[children, :, ""a.childNodes""]","[67382, 67406]",2386-2386
3,jquery/jquery_18e563abe53d7a7fb4169c869f01812c...,18e563abe53d7a7fb4169c869f01812c119ae191,"01/09/2006, 01:30:29",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/18e563...,"[Idf_1, :, Lit_1, ., Idf_2, (, Idf_3, ), &&, !...",Property,src/jquery/jquery.js,"[mozilla, :, /mozilla/, ., test, (, b, ), &&, ...","[47894, 47954]",1656-1656,"[Idf_1, :, Lit_1, ., Idf_2, (, Idf_3, ), &&, !...",Property,src/jquery/jquery.js,"[mozilla, :, /mozilla/, ., test, (, b, ), &&, ...","[47894, 47945]",1656-1656
4,jquery/jquery_c136717119ec986be67848ec45b56e23...,c136717119ec986be67848ec45b56e23d77d51fd,"30/11/2006, 00:25:33",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/c13671...,"[var, Idf_1, =, Lit_1, ;]",VariableDeclaration,build/build/lite.js,"[var, blockMatch, =, /\s*\/\*\*\s*((.|\n|\r\n)...","[32, 90]",3-3,"[var, Idf_1, =, Lit_2, ;]",VariableDeclaration,build/build/lite.js,"[var, blockMatch, =, /\s*\/\*\*\s*((.|\n)*?)\s...","[32, 85]",3-3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
801,adobe/brackets_fa18156fccd896a64aa107594228073...,fa18156fccd896a64aa107594228073f42addad4,"22/08/2017, 21:10:19",../results/top_JS_repos/brackets,https://github.com/adobe/brackets/commit/fa181...,"[Lit_1, :, Lit_2]",Property,src/nls/hr/strings.js,"[""ERROR_PREFS_CORRUPT_TITLE"", :, ""Greška prili...","[4243, 4322]",65-65,"[Lit_1, :, Lit_3]",Property,src/nls/hr/strings.js,"[""ERROR_PREFS_CORRUPT_TITLE"", :, ""Greška prili...","[4243, 4320]",65-65
802,serverless/serverless_490f517d1219c58f12438884...,490f517d1219c58f12438884409e11f6613e6a2e,"26/08/2017, 12:41:25",../results/top_JS_repos/serverless,https://github.com/serverless/serverless/commi...,"[Idf_1, (, Idf_2, ., Idf_3, ), ., Idf_4, ., Id...",ExpressionStatement,lib/plugins/aws/provider/awsProvider.test.js,"[expect, (, err, ., message, ), ., to, ., cont...","[6782, 6834]",241-241,"[Idf_1, (, Idf_2, ., Idf_3, ), ., Idf_4, ., Id...",ExpressionStatement,lib/plugins/aws/provider/awsProvider.test.js,"[expect, (, err, ., message, ), ., to, ., cont...","[6782, 6837]",241-241
803,mrdoob/three.js_45c15c792f448502d1da1eabb73c21...,45c15c792f448502d1da1eabb73c217df4275c3c,"06/09/2017, 03:23:41",../results/top_JS_repos/three.js,https://github.com/mrdoob/three.js/commit/45c1...,"[(, Idf_1, &, Lit_1, ), <<, Lit_2]",BinaryExpression,examples/js/libs/msgpack-js.js,"[(, byte, &, 0x1f, ), <<, 6]","[2644, 2662]",90-90,"[(, Idf_1, &, Lit_3, ), <<, Lit_2]",BinaryExpression,examples/js/libs/msgpack-js.js,"[(, byte, &, 0x0f, ), <<, 6]","[2644, 2662]",90-90
804,RocketChat/Rocket.Chat_47cf56f20a6e7aabecc540e...,47cf56f20a6e7aabecc540e47df583464fc511c4,"06/09/2017, 15:40:43",../results/top_JS_repos/Rocket.Chat,https://github.com/RocketChat/Rocket.Chat/comm...,"[return, Lit_1, ;]",ReturnStatement,packages/rocketchat-ui/client/lib/textarea-aut...,"[return, true, ;]","[2027, 2039]",67-67,"[return, Lit_2, ;]",ReturnStatement,packages/rocketchat-ui/client/lib/textarea-aut...,"[return, false, ;]","[2027, 2040]",67-67


In [16]:
patterns

Unnamed: 0,_id,commit_hash,commit_time,local_repo_path,url,fix,fix_tokenType,fix_file_path,fix_actual,fix_range,fix_line,buggy,buggy_tokenType,buggy_file_path,buggy_actual,buggy_range,buggy_line
0,jquery/jquery_c75701a0152a589c21c4862c33b69190...,c75701a0152a589c21c4862c33b691902a8b8f7b,"17/05/2006, 20:46:51",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/c75701...,"[typeof, Idf_1, !=, Lit_1]",BinaryExpression,jquery/jquery.js,"[typeof, v, !=, 'undefined']","[14637, 14660]",576-576,"[Idf_1, !==, Lit_2]",BinaryExpression,jquery/jquery.js,"[v, !==, null]","[14637, 14647]",576-576
1,jquery/jquery_65b1081e9e9f9b8c4a0f1621c98e823e...,65b1081e9e9f9b8c4a0f1621c98e823eac3e6bcf,"03/06/2006, 14:48:19",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/65b108...,"[Idf_1, =, new, Idf_2, ., Idf_3, ., Idf_4, (, ...",ExpressionStatement,fx/fx.js,"[ef, =, new, $, ., fx, ., Opacity, (, this, ,,...","[1196, 1226]",54-54,"[Idf_1, =, new, Idf_3, ., Idf_4, (, this, ,, I...",ExpressionStatement,fx/fx.js,"[ef, =, new, fx, ., Opacity, (, this, ,, o, ), ;]","[1196, 1224]",54-54
2,jquery/jquery_69ae9aeab0bae23f0fb6673d5ad58ffc...,69ae9aeab0bae23f0fb6673d5ad58ffce44c7841,"06/06/2006, 14:51:44",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/69ae9a...,"[Lit_1, :, Lit_2]",Property,jquery/jquery.js,"['#', :, ""a.attributes['id'].nodeValue == m[2]""]","[9350, 9393]",401-401,"[Lit_1, :, Lit_3]",Property,jquery/jquery.js,"['#', :, ""a.getAttribute('id') == m[2]""]","[9350, 9385]",401-401
3,jquery/jquery_a457262ef5bcb163417a1f6db0b770f1...,a457262ef5bcb163417a1f6db0b770f1fc21b3a4,"11/06/2006, 03:16:36",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/a45726...,"[{, return, (, !, Idf_1, ||, Idf_1, ==, Idf_2,...",BlockStatement,fx/fx.js,"[{, return, (, !, a, ||, a, ==, d, ||, b, ==, ...","[4595, 4621]",176-176,"[{, return, (, !, Idf_1, ||, Idf_1, ==, Idf_4,...",BlockStatement,fx/fx.js,"[{, return, (, !, a, ||, a, ==, c, ||, b, ==, ...","[4595, 4621]",176-176
4,jquery/jquery_34dd8afb259be3ae154887dc7db6b8a8...,34dd8afb259be3ae154887dc7db6b8a8fdab0b6a,"16/06/2006, 02:02:54",../results/top_JS_repos/jquery,https://github.com/jquery/jquery/commit/34dd8a...,"[Idf_1, =, Idf_1, ||, [, Idf_2, ., Idf_3, ., I...",ExpressionStatement,jquery/jquery.js,"[data, =, data, ||, [, $, ., event, ., fix, (,...","[19793, 19840]",806-806,"[Idf_1, =, Idf_1, ||, [, {, Idf_5, :, Idf_5, }...",ExpressionStatement,jquery/jquery.js,"[data, =, data, ||, [, {, type, :, type, }, ], ;]","[19793, 19825]",806-806
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3595,nwjs/nw.js_67ca76a0c8aff7be2420ba4ad75f9f8198a...,67ca76a0c8aff7be2420ba4ad75f9f8198a9f430,"06/11/2019, 02:55:20",../results/top_JS_repos/nw.js,https://github.com/nwjs/nw.js/commit/67ca76a0c...,"[!, Idf_1, ., Idf_2, (, Lit_1, ), &&, Idf_1, !...",LogicalExpression,src/resources/api_nw_newwin.js,"[!, key, ., endsWith, (, 'Internal', ), &&, ke...","[20806, 20850]",614-614,"[!, Idf_1, ., Idf_2, (, Lit_1, )]",UnaryExpression,src/resources/api_nw_newwin.js,"[!, key, ., endsWith, (, 'Internal', )]","[20806, 20831]",614-614
3596,serverless/serverless_5c44c885818107cd15ed3ebc...,5c44c885818107cd15ed3ebcddb048f7b60d8699,"07/11/2019, 10:26:04",../results/top_JS_repos/serverless,https://github.com/serverless/serverless/commi...,"[return, this, ., Idf_1, [, Lit_1, ], ===, Lit...",ReturnStatement,lib/plugins/aws/provider/awsProvider.js,"[return, this, ., options, [, 'aws-s3-accelera...","[14762, 14813]",410-410,"[return, !, !, this, ., Idf_1, [, Lit_3, ], ;]",ReturnStatement,lib/plugins/aws/provider/awsProvider.js,"[return, !, !, this, ., options, [, 'no-aws-s3...","[14762, 14808]",410-410
3597,TryGhost/Ghost_6c921bf2ef554c93c6bba1fd2c1fb29...,6c921bf2ef554c93c6bba1fd2c1fb2961dce2700,"08/11/2019, 11:28:07",../results/top_JS_repos/Ghost,https://github.com/TryGhost/Ghost/commit/6c921...,"[Idf_1, (, Lit_1, ), ., Idf_2, (, ), ., Idf_3,...",ExpressionStatement,core/test/regression/site/frontend_spec.js,"[$, (, 'title', ), ., text, (, ), ., should, ....","[13982, 14038]",354-354,"[Idf_1, (, Lit_1, ), ., Idf_2, (, ), ., Idf_3,...",ExpressionStatement,core/test/regression/site/frontend_spec.js,"[$, (, 'title', ), ., text, (, ), ., should, ....","[13982, 14036]",354-354
3598,microsoft/TypeScript_5c951635e511aa0963ddf42f6...,5c951635e511aa0963ddf42f66d3df64e08e0533,"11/11/2019, 20:24:20",../results/top_JS_repos/TypeScript,https://github.com/microsoft/TypeScript/commit...,"[const, Idf_1, =, Idf_2, (, Lit_1, ), ;]",VariableDeclaration,scripts/build/utils.js,"[const, chalk, =, require, (, ""chalk"", ), ;]","[298, 329]",11-11,"[const, {, Idf_3, :, Idf_1, }, =, Idf_2, (, Li...",VariableDeclaration,scripts/build/utils.js,"[const, {, default, :, chalk, }, =, require, (...","[298, 342]",11-11


## Generic mutation operators for branch conditions

There are three sets of mutation operators. Set one is for *loop statement*, set two for *conditional statement* and
the final set is for *return statement*. Many mutation operators for set one and two overlap. For SemSeed it
does not matter if the mutation is made to a binary operator in a loop or a conditional. Hence we do not separate
this. The following are the SemSeed counterparts of all mutation operators.

#### 1. Change literal value in the condition

In [45]:
# Collect the patterns that is also seeded by mutandis
list_of_all_patterns_also_seded_by_mutandis = set()

selected_patterns = []
for _,r in single_lit_patterns.iterrows():
    pattern = r.to_dict()
    if pattern['buggy_tokenType'] == 'BinaryExpression' == pattern['fix_tokenType']:
        selected_patterns.append(pattern)
        list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 17


Unnamed: 0,fix_actual,buggy_actual
0,"[code, ===, '(\n)']","[code, ===, '(undefined\n)']"
1,"['linux freebsd darwin', ., indexOf, (, proces...","['linux freebsd', ., indexOf, (, process, ., p..."
2,"[charsets, [, 0, ], ===, '.notdef']","[charsets, [, 0, ], ===, '.notedef']"
3,"[ev, ., type, ., toLowerCase, (, ), !=, 'touch...","[ev, ., type, ., toLowerCase, (, ), !=, ""touch..."
4,"[typeof, direction, !==, 'string']","[typeof, direction, !==, 'String']"
5,"[""This project is already at "", +, releaseToPr...","[""This project is already at Meteor "", +, rele..."
6,"[""This project is already at "", +, releaseToPr...","[""This project is already at Meteor "", +, rele..."
7,['Just head back to the discussion here: http:...,['Just head back to the discussion here: http:...
8,['Just head back to the discussion here: http:...,['Just head back to the discussion here: http:...
9,"[""\t\tdefine([], "", +, amdFactory, +, "");\n""]","[""\t\tdefine("", +, amdFactory, +, "");\n""]"


Selected the following

In [9]:
print(selected_patterns.iloc[6,4])
print(selected_patterns.iloc[9,4])

https://github.com/meteor/meteor/commit/339706882b22e9f9caa667638ac93d9aa343285d
https://github.com/webpack/webpack/commit/59656c9717c4d1b78232563d8fbe077067dd4c20


#### Matching seeded bug

In [10]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[6,4]]
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
4018,benchmarks/data/geometria-lab/Beseda/benchmark...,"[""Successfully stopped daemon with pid: "", +, ...","[""/admin/users/"", +, pid]","[Lit_1, +, Idf_1]","[Lit_2, +, Idf_1]","{'line': '20-20', 'range': [536, 582]}",1010,1018,False,0.4540,https://github.com/meteor/meteor/commit/339706...
12343,benchmarks/data/joshkurz/Black-Belt-AngularJS-...,"[""directives/"", +, name]","['directives/', +, name]","[Lit_1, +, Idf_1]","[Lit_2, +, Idf_1]","{'line': '172-172', 'range': [5170, 5188]}",1055,1057,False,0.3520,https://github.com/meteor/meteor/commit/339706...
12594,benchmarks/data/sebastienros/jint/Jint.Tests.E...,"["""", +, index]","[1, +, index]","[Lit_1, +, Idf_1]","[Lit_2, +, Idf_1]","{'line': '5-5', 'range': [186, 196]}",1005,1005,False,0.3115,https://github.com/meteor/meteor/commit/339706...
12955,benchmarks/data/xtuple/xtuple-server-core/task...,"[""# host mappings for: "", +, id]","['id', +, id]","[Lit_1, +, Idf_1]","[Lit_2, +, Idf_1]","{'line': '10-10', 'range': [449, 477]}",1008,1007,False,0.3735,https://github.com/meteor/meteor/commit/339706...
16826,benchmarks/data/jsdelivr/jsdelivr/files/spry/1...,"[""on"", +, eventType]","[""propertychange"", +, eventType]","[Lit_1, +, Idf_1]","[Lit_2, +, Idf_1]","{'line': '378-378', 'range': [18924, 18940]}",1106,1044,False,0.3030,https://github.com/meteor/meteor/commit/339706...
...,...,...,...,...,...,...,...,...,...,...,...
667404,benchmarks/data/Fluidbyte/ColtJS/src/colt.js,"[""Model Sync Error: [req] : "", +, req]","[404, +, req]","[Lit_1, +, Idf_1]","[Lit_2, +, Idf_1]","{'line': '325-325', 'range': [13526, 13560]}",1092,1039,False,0.4060,https://github.com/meteor/meteor/commit/339706...
667863,benchmarks/data/blackberry/BB10-WebWorks-Frame...,"[""| "", +, file]","[""file"", +, file]","[Lit_1, +, Idf_1]","[Lit_2, +, Idf_1]","{'line': '36-36', 'range': [1073, 1084]}",1021,1029,False,0.3275,https://github.com/meteor/meteor/commit/339706...
667881,benchmarks/data/sebastienros/jint/Jint.Tests.E...,"[""#3: obj[0] = -1; obj.length = {valueOf: func...","[""#8.2: obj[0] = -1; obj.length = {valueOf: fu...","[Lit_1, +, Idf_1]","[Lit_2, +, Idf_1]","{'line': '37-37', 'range': [812, 948]}",1001,1012,False,0.3685,https://github.com/meteor/meteor/commit/339706...
670341,benchmarks/data/eu81273/AngularJS-RequireJS-Sa...,"[""I'm the 2nd controller! "", +, tester]","['test', +, tester]","[Lit_1, +, Idf_1]","[Lit_2, +, Idf_1]","{'line': '5-5', 'range': [159, 194]}",1005,1003,False,0.3295,https://github.com/meteor/meteor/commit/339706...


#### 2. Replace relational operators

In [46]:
relational_operators = {'<', '>', '<=', '>=', '==', '!=', '===', '!=='}
selected_patterns = []
for _,r in single_operator_patterns.iterrows():
    pattern = r.to_dict()
    f = []
    b = []
    if pattern['buggy_tokenType'] == 'BinaryExpression' == pattern['fix_tokenType']:
        for t in pattern['fix']:
            if 'Lit_' not in t and 'Idf_' not in t:
                f.append(t)
        for t in pattern['buggy']:
            if 'Lit_' not in t and 'Idf_' not in t:
                b.append(t)
        
        if len(f) == 1 == len(b) and f != b and relational_operators.intersection(set(f)) and relational_operators.intersection(set(b)):
            list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
            selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 11


Unnamed: 0,fix_actual,buggy_actual
0,"[start, <=, cursor]","[start, <, cursor]"
1,"[newIndex, !==, oldIndex]","[newIndex, !=, oldIndex]"
2,"[newIndex, !=, oldIndex]","[newIndex, !==, oldIndex]"
3,"[strCounter, >=, 0]","[strCounter, >, 0]"
4,"[n, !==, title]","[n, !=, title]"
5,"[i, >, 0]","[i, >=, 0]"
6,"[influence, !==, 0]","[influence, >, 0]"
7,"[det, >, 0]","[det, >=, 0]"
8,"[offset, >=, 0]","[offset, >, 0]"
9,"[shaderID, !==, undefined]","[shaderID, ===, undefined]"


Selected the following

In [12]:
print(selected_patterns.iloc[4,4])
print(selected_patterns.iloc[5,4])

https://github.com/discourse/discourse/commit/1dd6699bf22d9a3f387fee36a1b46671c284d9ce
https://github.com/adobe/brackets/commit/30d50a59efd095b3788bad4d19c5c827c115fe67


#### Matching seeded bug

In [13]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[0,4]].sample(5)
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
517979,benchmarks/data/sebastienros/jint/Jint.Tests.E...,"[index_n, <=, index]","[index_n, <, index]","[Idf_1, <=, Idf_2]","[Idf_1, <, Idf_2]","{'line': '13-13', 'range': [416, 432]}",1005,1005,False,0.3535,https://github.com/adobe/brackets/commit/cdf9d...
560247,benchmarks/data/acumenbrands/rest_suite/lib/ne...,"[index, <=, count]","[index, <, count]","[Idf_1, <=, Idf_2]","[Idf_1, <, Idf_2]","{'line': '152-152', 'range': [6346, 6360]}",1076,1010,False,0.45,https://github.com/adobe/brackets/commit/cdf9d...
234826,benchmarks/data/qooxdoo/qooxdoo/framework/sour...,"[i, <=, max]","[i, <, max]","[Idf_1, <=, Idf_2]","[Idf_1, <, Idf_2]","{'line': '27-27', 'range': [1037, 1045]}",1045,1003,False,0.3725,https://github.com/adobe/brackets/commit/cdf9d...
422921,benchmarks/data/jstacoder/flask-ide/flask_ide/...,"[cur, <=, end]","[cur, <, end]","[Idf_1, <=, Idf_2]","[Idf_1, <, Idf_2]","{'line': '35-35', 'range': [1485, 1495]}",1021,1004,False,0.3455,https://github.com/adobe/brackets/commit/cdf9d...
99076,benchmarks/data/sebastienros/jint/Jint.Tests.E...,"[index_n, <=, index]","[index_n, <, index]","[Idf_1, <=, Idf_2]","[Idf_1, <, Idf_2]","{'line': '23-23', 'range': [733, 749]}",1005,1005,False,0.3535,https://github.com/adobe/brackets/commit/cdf9d...


#### 3. Replace logical operators

In [47]:
logical_operators = {'&&', '||'}
selected_patterns = []
for _,r in single_operator_patterns.iterrows():
    pattern = r.to_dict()
    f = []
    b = []
    if pattern['buggy_tokenType'] == 'LogicalExpression' == pattern['fix_tokenType']:
        for t in pattern['fix']:
            if 'Lit_' not in t and 'Idf_' not in t:
                f.append(t)
        for t in pattern['buggy']:
            if 'Lit_' not in t and 'Idf_' not in t:
                b.append(t)
        f_lops = logical_operators.intersection(set(f))
        b_lops = logical_operators.intersection(set(b))

        if  f != b and f_lops and b_lops and f_lops != b_lops :
            selected_patterns.append(pattern)
            list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 4


Unnamed: 0,fix_actual,buggy_actual
0,"[err, ., code, !==, 134, &&, err, ., signal, !...","[err, ., code, !==, 134, ||, err, ., signal, !..."
1,"[!, token, ||, !, token, [, 1, ]]","[!, token, &&, !, token, [, 1, ]]"
2,"[!, offset, ., x, &&, !, offset, ., y]","[!, offset, ., x, ||, !, offset, ., y]"
3,"[pose, ., angularVelocity, !==, null, &&, !, a...","[pose, ., angularVelocity, !==, null, ||, !, a..."


Selected the following

#### Matching seeded bug

In [24]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[2,4]]
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
654442,benchmarks/data/cdnjs/cdnjs/ajax/libs/custom-e...,"[!, property, ., set, &&, !, property, ., get]","[!, property, ., set, ||, !, property, ., get]","[!, Idf_1, ., Idf_2, &&, !, Idf_1, ., Idf_3]","[!, Idf_1, ., Idf_2, ||, !, Idf_1, ., Idf_3]","{'line': '158-158', 'range': [5838, 5868]}",1083,1011,False,0.314,https://github.com/Leaflet/Leaflet/commit/c4b6...


#### 4. Swap consecutive nested for/while

We can't have this pattern since we look for single line changes

#### 5. Replace arithmetic operators

In [48]:
arithmetic_operators = {'+' '-' '/' '*' '%' '**', '+=', '-='}
selected_patterns = []
for _,r in single_operator_patterns.iterrows():
    pattern = r.to_dict()
    f = []
    b = []
    # if pattern['buggy_tokenType'] == 'LogicalExpression' == pattern['fix_tokenType']:
    for t in pattern['fix']:
        if 'Lit_' not in t and 'Idf_' not in t:
            f.append(t)
    for t in pattern['buggy']:
        if 'Lit_' not in t and 'Idf_' not in t:
            b.append(t)
    f_lops = arithmetic_operators.intersection(set(f))
    b_lops = arithmetic_operators.intersection(set(b))

    if  f != b and f_lops and b_lops and f_lops != b_lops :
        list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
        selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 1


Unnamed: 0,fix_actual,buggy_actual
0,"[minutes, -=, offsetMinutes, ;]","[minutes, +=, offsetMinutes, ;]"


Selected the following

In [69]:
print(selected_patterns.iloc[0, 4])

https://github.com/mozilla/pdf.js/commit/4acb744f77a3828efdd26d08f6ffd9517302b3a8
https://github.com/photonstorm/phaser/commit/b48c2a8173b357b391aa4c16ccbfa3346dd9038f


#### Matching seeded bug

In [26]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[0,4]]
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
163860,benchmarks/data/chadn/heroku-sails/node_module...,"[end, -=, lenIncomplete, ;]","[end, +=, lenIncomplete, ;]","[Idf_1, -=, Idf_2, ;]","[Idf_1, +=, Idf_2, ;]","{'line': '52-52', 'range': [1918, 1939]}",1015,1006,False,0.3625,https://github.com/mozilla/pdf.js/commit/4acb7...
443032,benchmarks/data/tcorral/Refactoring_Patterns/1...,"[nSubTotal, -=, nDiscountValue, ;]","[nSubTotal, +=, nDiscountValue, ;]","[Idf_1, -=, Idf_2, ;]","[Idf_1, +=, Idf_2, ;]","{'line': '17-17', 'range': [615, 643]}",1012,1001,False,0.3375,https://github.com/mozilla/pdf.js/commit/4acb7...
612263,benchmarks/data/mdipierro/web2py-appliances/Sy...,"[cpt_last, -=, step, ;]","[cpt_last, +=, step, ;]","[Idf_1, -=, Idf_2, ;]","[Idf_1, +=, Idf_2, ;]","{'line': '100-100', 'range': [5084, 5101]}",1071,1045,False,0.3495,https://github.com/mozilla/pdf.js/commit/4acb7...


#### 6. Replace x++/x-- with ++x/--x
**Note:** Could not find an instance where this happens

In [29]:
UpdateOperator = {'++', '--'}
selected_patterns = []
for _,r in single_operator_patterns.iterrows():
    pattern = r.to_dict()
    f = []
    b = []
    # if pattern['buggy_tokenType'] == 'LogicalExpression' == pattern['fix_tokenType']:
    for t in pattern['fix']:
        if 'Lit_' not in t and 'Idf_' not in t:
            f.append(t)
    for t in pattern['buggy']:
        if 'Lit_' not in t and 'Idf_' not in t:
            b.append(t)
    f_lops = UpdateOperator.intersection(set(f))
    b_lops = UpdateOperator.intersection(set(b))

    if  f != b and f_lops and b_lops and f_lops != b_lops :
        selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
if len(selected_patterns):
    selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 0


#### 7. Remove 'break'/'continue'

**Note:** Could not find removing the statements. We could only find instances where either of these
statements has been replaced with another.

In [59]:
selected_patterns = []
toks = ['break', 'continue']
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    f = []
    b = []
    for t in pattern['fix']:
        if 'Lit_' not in t and 'Idf_' not in t:
            f.append(t)
    for t in pattern['buggy']:
        if 'Lit_' not in t and 'Idf_' not in t:
            b.append(t)
    for op in toks:

        if op in f and op not in b:
            if pattern['url'] not in seen_url:
                seen_url.add(pattern['url'])
                selected_patterns.append(pattern)
        if op not in f and op in b:
            if pattern['url'] not in seen_url:
                seen_url.add(pattern['url'])
                selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 1


Unnamed: 0,fix_actual,buggy_actual
0,"[if, (, !, module, ., chunkCondition, ), conti...","[if, (, !, module, ., chunkCondition, ), retur..."


Selected the following

In [61]:
print(selected_patterns.iloc[0,4])
# print(selected_patterns.iloc[1,4])

https://github.com/webpack/webpack/commit/114abeeb4d1023b3da8f5ac22686c83ff78356a7


#### Matching seeded bug

In [62]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[0,4]]
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url


#### 8. Replace 0/1 with false/true and vice-versa in the condition

In [49]:
selected_patterns = []
toks = ['0', '1']
rep_toks = ['false', 'true']
seen_url = set()
for _,r in single_lit_patterns.iterrows():
    pattern = r.to_dict()
    for idx, tk in enumerate(toks):
        if tk in pattern['fix_actual'] and rep_toks[idx] in pattern['buggy_actual']:
            if pattern['url'] not in seen_url:
                seen_url.add(pattern['url'])
                list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
                selected_patterns.append(pattern)
        if rep_toks[idx] in pattern['fix_actual'] and tk in pattern['buggy_actual']:
                    if pattern['url'] not in seen_url:
                        seen_url.add(pattern['url'])
                        list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
                        selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 4


Unnamed: 0,fix_actual,buggy_actual
0,"[timerId, =, true, ;]","[timerId, =, 1, ;]"
1,"[equals, (, obj, ., contains, (, data, [, 0, ]...","[equals, (, obj, ., contains, (, data, [, 0, ]..."
2,"[EI, :, {, fnName, :, 'endInlineImage', ,, num...","[EI, :, {, fnName, :, 'endInlineImage', ,, num..."
3,"[{, model, :, Label, ,, required, :, false, ,,...","[{, model, :, Label, ,, required, :, false, ,,..."


#### Matching seeded bug

In [33]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[0,4]]
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
65,benchmarks/data/xinyu198736/htmljs/assets/js/p...,"[PCAAV, =, """", ;]","[PCAAV, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '68-68', 'range': [18839, 18850]}",1034,1006,False,0.3625,https://github.com/jquery/jquery/commit/ab1504...
349,benchmarks/data/yetone/collipa/collipa/static/...,"[a, =, true, ;]","[a, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '938-938', 'range': [41875, 41884]}",1323,1307,False,0.5145,https://github.com/jquery/jquery/commit/ab1504...
3318,benchmarks/data/kandanapp/kandan/app/assets/ja...,"[element, =, null, ;]","[element, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '77-77', 'range': [3207, 3222]}",1083,1054,False,0.4090,https://github.com/jquery/jquery/commit/ab1504...
5220,benchmarks/data/micheleg/dash-to-dock/myDash.js,"[pos, =, 0, ;]","[pos, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '714-714', 'range': [32244, 32252]}",1466,1066,False,0.4185,https://github.com/jquery/jquery/commit/ab1504...
5913,benchmarks/data/yathit/ydn-db/test/crud/list_t...,"[get_done, =, true, ;]","[get_done, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '154-154', 'range': [3930, 3946]}",1057,1034,False,0.6450,https://github.com/jquery/jquery/commit/ab1504...
...,...,...,...,...,...,...,...,...,...,...,...
670182,benchmarks/data/ASTPP/ASTPP/web_interface/astp...,"[val, =, 0, ;]","[val, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '44-44', 'range': [1750, 1758]}",1018,1012,False,0.3530,https://github.com/jquery/jquery/commit/ab1504...
672416,benchmarks/data/atesh/XKit/xkit.js,"[conflicting_extension, =, true, ;]","[conflicting_extension, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '695-695', 'range': [33128, 33157]}",1170,1247,False,0.6160,https://github.com/jquery/jquery/commit/ab1504...
673733,benchmarks/data/jonschlinkert/engines/index.js,"[done, =, true, ;]","[done, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '71-71', 'range': [1812, 1824]}",1053,1032,False,0.6270,https://github.com/jquery/jquery/commit/ab1504...
673863,benchmarks/data/OpenAcademy/HTML5-open-academy...,"[inEditor, =, true, ;]","[inEditor, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '51-51', 'range': [1878, 1894]}",1040,1035,False,0.6185,https://github.com/jquery/jquery/commit/ab1504...


#### 9. Remove return
Not possible in our pattern

In [50]:
selected_patterns = []
tk = 'return'
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    copy_patter = list(pattern['fix_actual'])
    if tk in pattern['fix_actual'] and tk not in pattern['buggy_actual']:
        copy_patter.remove(tk)
        if pattern['buggy_actual'] == copy_patter:
            list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
            selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 13


Unnamed: 0,fix_actual,buggy_actual
0,"[return, f, ., apply, (, this, ,, arguments, )...","[f, ., apply, (, this, ,, arguments, ), ;]"
1,"[if, (, err, ), return, cb, (, err, ), ;]","[if, (, err, ), cb, (, err, ), ;]"
2,"[return, defaultFocus, ., apply, (, this, ,, a...","[defaultFocus, ., apply, (, this, ,, arguments..."
3,"[return, local_moment, ., apply, (, null, ,, a...","[local_moment, ., apply, (, null, ,, arguments..."
4,"[return, callback, (, err, ), ;]","[callback, (, err, ), ;]"
5,"[return, ParanoidUser, ., bulkCreate, (, data, )]","[ParanoidUser, ., bulkCreate, (, data, )]"
6,"[return, del, (, paths, ., out, ,, done, ), ;]","[del, (, paths, ., out, ,, done, ), ;]"
7,"[return, this, ., _imap, ., serverSupports, (,...","[this, ., _imap, ., serverSupports, (, capabil..."
8,"[if, (, db, ==, null, ), return, resolve, (, )]","[if, (, db, ==, null, ), resolve, (, )]"
9,"[return, this, ., play, (, null, ,, 0, ,, volu...","[this, ., play, (, null, ,, 0, ,, volume, ,, t..."


In [66]:
print(selected_patterns.iloc[0,4])
print(selected_patterns.iloc[5,4])

https://github.com/jquery/jquery/commit/a8bafe78745247dd13e3472582df29471e50aa28
https://github.com/sequelize/sequelize/commit/95d5ebac6eb0ebda411218c47e6db33cd210f005


#### Matching seeded bug

In [67]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[4,4]]
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
4150,benchmarks/data/jxcore/jxcore/deps/v8/test/mjs...,"[return, Number, (, val, ), ;]","[Number, (, val, ), ;]","[return, Idf_1, (, Idf_2, ), ;]","[Idf_1, (, Idf_2, ), ;]","{'line': '2-2', 'range': [29, 48]}",1005,1143,False,0.3075,https://github.com/webpack/webpack/commit/6085...
4788,benchmarks/data/joola/joola/test/unit/99_REST/...,"[return, done, (, err, ), ;]","[done, (, err, ), ;]","[return, Idf_1, (, Idf_2, ), ;]","[Idf_1, (, Idf_2, ), ;]","{'line': '95-95', 'range': [3447, 3464]}",1019,1028,False,0.8090,https://github.com/webpack/webpack/commit/6085...
10298,benchmarks/data/joola/joola/test/unit/99_REST/...,"[return, done, (, err, ), ;]","[done, (, err, ), ;]","[return, Idf_1, (, Idf_2, ), ;]","[Idf_1, (, Idf_2, ), ;]","{'line': '20-20', 'range': [645, 662]}",1019,1028,False,0.8090,https://github.com/webpack/webpack/commit/6085...
17665,benchmarks/data/jsbin/jsbin/lib/handlers/sessi...,"[return, next, (, err, ), ;]","[next, (, err, ), ;]","[return, Idf_1, (, Idf_2, ), ;]","[Idf_1, (, Idf_2, ), ;]","{'line': '53-53', 'range': [1808, 1825]}",1098,1060,False,0.7515,https://github.com/webpack/webpack/commit/6085...
19188,benchmarks/data/jsbin/jsbin/lib/handlers/sessi...,"[return, next, (, err, ), ;]","[next, (, err, ), ;]","[return, Idf_1, (, Idf_2, ), ;]","[Idf_1, (, Idf_2, ), ;]","{'line': '23-23', 'range': [875, 892]}",1098,1060,False,0.7515,https://github.com/webpack/webpack/commit/6085...
...,...,...,...,...,...,...,...,...,...,...,...
635297,benchmarks/data/mozilla/node-convict/test/stat...,"[return, done, (, e, ), ;]","[done, (, e, ), ;]","[return, Idf_1, (, Idf_2, ), ;]","[Idf_1, (, Idf_2, ), ;]","{'line': '60-60', 'range': [1884, 1899]}",1013,1010,False,0.5325,https://github.com/webpack/webpack/commit/6085...
641197,benchmarks/data/hectorcorrea/hectorcorrea.com/...,"[return, callback, (, err, ), ;]","[callback, (, err, ), ;]","[return, Idf_1, (, Idf_2, ), ;]","[Idf_1, (, Idf_2, ), ;]","{'line': '127-127', 'range': [3973, 3994]}",1024,1019,False,1.0000,https://github.com/webpack/webpack/commit/6085...
645877,benchmarks/data/petkaantonov/bluebird/test/moc...,"[return, clearTimeout, (, id, ), ;]","[clearTimeout, (, id, ), ;]","[return, Idf_1, (, Idf_2, ), ;]","[Idf_1, (, Idf_2, ), ;]","{'line': '50-50', 'range': [2154, 2178]}",1023,1022,False,0.4040,https://github.com/webpack/webpack/commit/6085...
659675,benchmarks/data/hectorcorrea/hectorcorrea.com/...,"[return, callback, (, err, ), ;]","[callback, (, err, ), ;]","[return, Idf_1, (, Idf_2, ), ;]","[Idf_1, (, Idf_2, ), ;]","{'line': '131-131', 'range': [4253, 4274]}",1024,1019,False,1.0000,https://github.com/webpack/webpack/commit/6085...


#### 10. Replace return true with return false and vice versa

In [51]:
selected_patterns = []
toks = ['returntrue', 'returnfalse']
rep_toks = ['returnfalse', 'returntrue']
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    for idx, tk in enumerate(toks):
        if tk in ''.join(pattern['fix_actual']) and rep_toks[idx] in ''.join(pattern['buggy_actual']):
            if pattern['url'] not in seen_url:
                seen_url.add(pattern['url'])
                list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
                selected_patterns.append(pattern)
        if rep_toks[idx] in ''.join(pattern['fix_actual']) and tk in ''.join(pattern['buggy_actual']):
                    if pattern['url'] not in seen_url:
                        seen_url.add(pattern['url'])
                        list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
                        selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 4


Unnamed: 0,fix_actual,buggy_actual
0,"[return, false, ;]","[return, true, ;]"
1,"[return, true, ;]","[return, false, ;]"
2,"[sandbox, ., stub, (, ghost, ., mail, ,, 'isWi...","[sandbox, ., stub, (, ghost, ., mail, ,, 'isWi..."
3,"[return, true, ;]","[return, false, ;]"


selected the following

In [39]:
print(selected_patterns.iloc[0, 4])
print(selected_patterns.iloc[1, 4])

https://github.com/emberjs/ember.js/commit/7aaf128c0098d020ccf67e16642dccddb937bdef
https://github.com/lodash/lodash/commit/a2a71a107e2aa5cfb7ab9cfa8c10b41885334160


#### Matching seeded bug

In [41]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[0,4]].sample(5)
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
22128,benchmarks/data/tinyspeck/glitch-GameServerJS/...,"[return, false, ;]","[return, true, ;]","[return, Lit_1, ;]","[return, Lit_2, ;]","{'line': '129-129', 'range': [3791, 3804]}",1224,1324,False,1.0,https://github.com/emberjs/ember.js/commit/7aa...
183488,benchmarks/data/Galloman/ss2d/ss2d.js/src/scen...,"[return, null, ;]","[return, true, ;]","[return, Lit_1, ;]","[return, Lit_2, ;]","{'line': '99-99', 'range': [3616, 3628]}",1113,1032,False,0.732,https://github.com/emberjs/ember.js/commit/7aa...
305365,benchmarks/data/cloudkick/cast/lib/plugins/man...,"[return, true, ;]","[return, false, ;]","[return, Lit_1, ;]","[return, Lit_2, ;]","{'line': '138-138', 'range': [5512, 5524]}",1093,1034,False,0.928,https://github.com/emberjs/ember.js/commit/7aa...
219678,benchmarks/data/documentcloud/visualsearch/bui...,"[return, false, ;]","[return, true, ;]","[return, Lit_1, ;]","[return, Lit_2, ;]","{'line': '1083-1083', 'range': [43455, 43468]}",1215,1191,False,1.0,https://github.com/emberjs/ember.js/commit/7aa...
516299,benchmarks/data/cdnjs/cdnjs/ajax/libs/dojo/1.1...,"[return, false, ;]","[return, true, ;]","[return, Lit_1, ;]","[return, Lit_2, ;]","{'line': '63-63', 'range': [2657, 2670]}",1085,1039,False,1.0,https://github.com/emberjs/ember.js/commit/7aa...


## JavaScript-Specific Mutation Operators

#### 1. Adding/Removing the var keyword

In [52]:
selected_patterns = []
toks = ['var']
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    f = []
    b = []
    for t in pattern['fix']:
        if 'Lit_' not in t and 'Idf_' not in t:
            f.append(t)
    for t in pattern['buggy']:
        if 'Lit_' not in t and 'Idf_' not in t:
            b.append(t)
    for op in toks:
        di_f_b = len(set(f)-set(b)) == 1
        di_b_f = len(set(b)-set(f)) == 1

        if op in f and op not in b:
            if pattern['url'] not in seen_url:
                seen_url.add(pattern['url'])
                if 'const' not in pattern['buggy_actual']:
                    list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
                    selected_patterns.append(pattern)
        if op not in f and op in b:
            if pattern['url'] not in seen_url:
                seen_url.add(pattern['url'])
                if 'let' not in pattern['fix_actual']:
                    list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
                    selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 24


Unnamed: 0,fix_actual,buggy_actual
0,"[var, i, =, 0, ,, l, =, not, ., length]","[i, =, 0, ,, l, =, not, ., length]"
1,"[var, filter, =, style, ., filter, ||, jQuery,...","[filter, =, style, ., filter, ||, jQuery, ., c..."
2,"[var, match, =, jQuery, ., noData, [, elem, .,...","[match, =, jQuery, ., noData, [, elem, ., node..."
3,"[var, encoding, =, arguments, [, 3, ], ;]","[encoding, =, arguments, [, 3, ], ;]"
4,"[var, stats, =, fs, ., statSync, (, requestPat...","[stats, =, fs, ., statSync, (, requestPath, ), ;]"
5,"[candidateInfluence, =, influences, [, i, ]]","[var, candidateInfluence, =, influences, [, i, ]]"
6,"[var, envDialect, =, process, ., env, ., DIALE...","[envDialect, =, process, ., env, ., DIALECT, |..."
7,"[var, appView, =, Discourse, ., __container__,...","[appView, =, Discourse, ., __container__, ., l..."
8,"[var, promise, =, new, Em, ., Deferred, (, ), ;]","[promise, =, new, Em, ., Deferred, (, ), ;]"
9,"[var, opts, =, options, ||, {, }, ;]","[opts, =, options, ||, {, }, ;]"


Selected the following

In [43]:
print(selected_patterns.iloc[13,4])
print(selected_patterns.iloc[28,4])

https://github.com/Reactive-Extensions/RxJS/commit/d3e10f70cca7c7a32d765cc91877f503a10b47ca
https://github.com/alvarotrigo/fullPage.js/commit/bd1bc752c4520508f6be646b6889b0c34a30d17f


#### Matching seeded bug

In [92]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[19,4]].sample(5).sort_values(by='mean_similarity')
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
354207,benchmarks/data/iamtrk/twitterArchitectureClon...,"[map, =, {, }]","[var, map, =, {, }]","[Idf_1, =, {, }]","[var, Idf_1, =, {, }]","{'line': '137-137', 'range': [4839, 4847]}",1036,1024,False,0.308,https://github.com/electron/electron/commit/bd...
641345,benchmarks/data/cdnjs/cdnjs/ajax/libs/topojson...,"[arcsByEnd, =, {, }]","[var, arcsByEnd, =, {, }]","[Idf_1, =, {, }]","[var, Idf_1, =, {, }]","{'line': '3-3', 'range': [73, 87]}",1047,1004,False,0.319,https://github.com/electron/electron/commit/bd...
671532,benchmarks/data/Akihabara/akihabara/src/akihab...,"[data, =, {, }]","[var, data, =, {, }]","[Idf_1, =, {, }]","[var, Idf_1, =, {, }]","{'line': '18-18', 'range': [434, 443]}",1075,1026,False,0.327,https://github.com/electron/electron/commit/bd...
262743,benchmarks/data/WebAppSolutionInc/sencha-cafe-...,"[dispatch, =, {, }]","[var, dispatch, =, {, }]","[Idf_1, =, {, }]","[var, Idf_1, =, {, }]","{'line': '74-74', 'range': [2851, 2864]}",1027,1007,False,0.329,https://github.com/electron/electron/commit/bd...
350673,benchmarks/data/Fluidbyte/ColtJS/src/colt.js,"[request, =, {, }]","[var, request, =, {, }]","[Idf_1, =, {, }]","[var, Idf_1, =, {, }]","{'line': '350-350', 'range': [14403, 14415]}",1092,1039,False,0.345,https://github.com/electron/electron/commit/bd...


#### 2. Remove global search flag from replace
**Not found**

In [10]:
selected_patterns = []
toks = ['replace']
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    f = []
    b = []
    for idx,t in enumerate(pattern['fix']):
        if 'Idf_' in t:
            f.append(pattern['fix_actual'][idx])
    for idx,t in enumerate(pattern['buggy']):
        if 'Idf_' in t:
            b.append(pattern['buggy_actual'][idx])
    for op in toks:
        if op in f and op in b:
            f_idx = pattern['fix_actual'].index(op)
            b_idx = pattern['buggy_actual'].index(op)
            f_regex = pattern['fix_actual'][f_idx+2:f_idx+3]
            b_regex = pattern['buggy_actual'][f_idx+2:f_idx+3]
            # print(b_regex)
            # print(f_regex)
            cr = (b_regex[0][-1]=='g' and f_regex[0][-1]!='g') or (b_regex[0][-1]!='g' and f_regex[0][-1]=='g')
            if cr and pattern['url'] not in seen_url:
                seen_url.add(pattern['url'])
                selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 7


Unnamed: 0,fix_actual,buggy_actual
0,"[return, (, t, ||, """", ), ., replace, (, /^\s+...","[return, t, ., replace, (, /^\s+|\s+$/g, ,, """"..."
1,"[return, markup, ., replace, (, /&/g, ,, ""&amp...","[return, markup, ., replace, (, ""&"", ,, ""&amp;..."
2,"[cwd, =, process, ., cwd, (, ), ., replace, (,...","[cwd, =, process, ., cwd, (, ), ., replace, (,..."
3,"[textMessage, =, textMessage, ., replace, (, /...","[textMessage, =, textMessage, ., replace, (, '..."
4,"[textMessage, =, textMessage, ., replace, (, /...","[textMessage, =, textMessage, ., replace, (, '..."
5,"[suffix, =, suffix, &&, suffix, ., replace, (,...","[suffix, =, suffix, &&, suffix, ., replace, (,..."
6,"[const, jsCode, =, transformResult, ., code, ....","[const, jsCode, =, unescape, (, transformResul..."


Selected the following

In [11]:
print(selected_patterns.iloc[1,4])
print(selected_patterns.iloc[5,4])

https://github.com/select2/select2/commit/b422d4efef486e8ac724220229e0c6d42b7f28ad
https://github.com/emberjs/ember.js/commit/2e99d1ff3b8a28f966b52c12cb5109b377216c10


#### Matching seeded bug

In [13]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[5,4]].sort_values(by='mean_similarity')
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
611641,benchmarks/data/thomaschampagne/StravaPlus/hoo...,"[return, brng, ., replace, (, ""360"", ,, ""0"", )...","[return, brng, ., replace, (, ""="", ,, ""0"", ), ;]","[return, Idf_1, ., Idf_2, (, Lit_1, ,, Lit_2, ...","[return, Idf_1, ., Idf_2, (, Lit_3, ,, Lit_2, ...","{'line': '86-86', 'range': [2795, 2827]}",1017,1020,False,0.4245,https://github.com/select2/select2/commit/b422...
257971,benchmarks/data/malikov/Authenticate.me-Node-S...,"[return, description, ., replace, (, /\d+/g, ,...","[return, description, ., replace, (, ""="", ,, ""...","[return, Idf_1, ., Idf_2, (, Lit_1, ,, Lit_2, ...","[return, Idf_1, ., Idf_2, (, Lit_3, ,, Lit_2, ...","{'line': '227-227', 'range': [11638, 11677]}",1122,1022,False,0.5235,https://github.com/select2/select2/commit/b422...
322005,benchmarks/data/derdesign/protos/lib/controlle...,"[return, val, ., replace, (, /^_/, ,, """", ), ;]","[return, val, ., replace, (, ""="", ,, """", ), ;]","[return, Idf_1, ., Idf_2, (, Lit_1, ,, Lit_2, ...","[return, Idf_1, ., Idf_2, (, Lit_3, ,, Lit_2, ...","{'line': '30-30', 'range': [881, 910]}",1087,1034,False,0.54775,https://github.com/select2/select2/commit/b422...
122967,benchmarks/data/sgruhier/typeahead-addresspick...,"[return, str, ., replace, (, /[\-\[\]\/\{\}\(\...","[return, str, ., replace, (, ""="", ,, ""\\$&"", )...","[return, Idf_1, ., Idf_2, (, Lit_1, ,, Lit_2, ...","[return, Idf_1, ., Idf_2, (, Lit_3, ,, Lit_2, ...","{'line': '12-12', 'range': [432, 498]}",1298,1137,False,0.664,https://github.com/select2/select2/commit/b422...
47946,benchmarks/data/route360/r360-js/src/control/P...,"[return, queryToEscape, ., replace, (, /([.?*+...","[return, queryToEscape, ., replace, (, ""="", ,,...","[return, Idf_1, ., Idf_2, (, Lit_1, ,, Lit_2, ...","[return, Idf_1, ., Idf_2, (, Lit_3, ,, Lit_2, ...","{'line': '158-158', 'range': [12463, 12526]}",1074,1096,False,0.6795,https://github.com/select2/select2/commit/b422...
339299,benchmarks/data/sstrigler/JSJaC/utils/jsdoc3/t...,"[return, str, ., replace, (, /</g, ,, ""&lt;"", ...","[return, str, ., replace, (, ""="", ,, ""&lt;"", )...","[return, Idf_1, ., Idf_2, (, Lit_1, ,, Lit_2, ...","[return, Idf_1, ., Idf_2, (, Lit_3, ,, Lit_2, ...","{'line': '22-22', 'range': [700, 733]}",1086,1073,False,0.78075,https://github.com/select2/select2/commit/b422...


In [120]:
p.iloc[1,0]

'benchmarks/data/malikov/Authenticate.me-Node-Server/node_modules/node-inspector/front-end/CanvasReplayStateView.js'

#### 3. Removing the integer base argument from parseInt
**Note:** Although many patterns exist that contain *parseInt* there does not exist
anything that captures exactly what mutandis does

In [14]:
selected_patterns = []
toks = ['parseInt']
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    f = []
    b = []
    for idx,t in enumerate(pattern['fix']):
        if 'Idf_' in t:
            f.append(pattern['fix_actual'][idx])
    for idx,t in enumerate(pattern['buggy']):
        if 'Idf_' in t:
            b.append(pattern['buggy_actual'][idx])
    for op in toks:
        if op in f and op in b:
            selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 4


Unnamed: 0,fix_actual,buggy_actual
0,"[safari2, =, safari, &&, parseInt, (, version,...","[safari2, =, safari, &&, parseInt, (, version,..."
1,"[parseInt, (, process, ., versions, ., openssl...","[parseInt, (, process, ., versions, ., openssl..."
2,"[padding, =, parseInt, (, getComputedStyle, (,...","[padding, =, parseInt, (, getComputedStyle, (,..."
3,"[timeout, =, parseInt, (, args, [, 1, ], ||, 2...","[timeout, =, parseInt, (, args, [, 1, ], ||, 1..."


In [None]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[1,4]].sort_values(by='mean_similarity')
p

#### 4. Changing setTimeout function
**Note:** The exact pattern as expected by mutandis does not exist

In [16]:
selected_patterns = []
toks = ['setTimeout','setInterval']
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    f = []
    b = []
    for idx,t in enumerate(pattern['fix']):
        if 'Idf_' in t:
            f.append(pattern['fix_actual'][idx])
    for idx,t in enumerate(pattern['buggy']):
        if 'Idf_' in t:
            b.append(pattern['buggy_actual'][idx])
    for op in toks:
        if op in f and op in b:
            selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 5


Unnamed: 0,fix_actual,buggy_actual
0,"[i, ==, null, ||, typeof, array, ==, 'string',...","[i, ==, null, ||, typeof, array, ==, 'function..."
1,"[elem, ., setInterval, &&, (, elem, !=, window...","[elem, ., setInterval, &&, elem, !=, window]"
2,"[setTimeout, (, start, ,, 1000, ), ;]","[setTimeout, (, start, ,, 100, ), ;]"
3,"[socket, ., setTimeout, (, 9999, ), ;]","[socket, ., setTimeout, (, 1000, ), ;]"
4,"[setTimeout, (, (, ), =>, {, this, ., resume, ...","[setTimeout, (, (, ), =>, {, this, ., resume, ..."


#### 5. Replacing 'undefined' with 'null'
**None found**

In [71]:
selected_patterns = []
toks = ['undefined']
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    f = []
    b = []
    if len(pattern['fix']) != len(pattern['buggy']):
        continue
    for idx,t in enumerate(pattern['fix']):
        if 'Idf_' in t:
            f.append(pattern['fix_actual'][idx])
    for idx,t in enumerate(pattern['buggy']):
        if 'Idf_' in t:
            b.append(pattern['buggy_actual'][idx])
    for op in toks:
        if op in f  and 'null' in pattern['buggy_actual']:
            list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
            selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 1


Unnamed: 0,fix_actual,buggy_actual
0,"[s, ., dataType, ===, ""script"", &&, s, ., cach...","[s, ., dataType, ===, ""script"", &&, s, ., cach..."


In [74]:
print(selected_patterns.iloc[0,8])
print(selected_patterns.iloc[0,14])

['s', '.', 'dataType', '===', '"script"', '&&', 's', '.', 'cache', '===', 'undefined']
['s', '.', 'dataType', '===', '"script"', '&&', 's', '.', 'cache', '===', 'null']


Selected the following

Selected the following

In [17]:
print(selected_patterns.iloc[5,4])

https://github.com/lerna/lerna/commit/2fa02a835c2c898dce5e8aa1d27d3eed3854c529


#### Matching seeded bug

In [72]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[0,4]].sort_values(by='mean_similarity')
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url,commit_hash


#### 6. Removing 'this' keyword

In [54]:
selected_patterns = []
tok = 'this'
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    if tok in pattern['fix_actual'] and tok not in pattern['buggy_actual'] and len(pattern['buggy_actual'])+1==len(pattern['fix_actual']):
        selected_patterns.append(pattern)
        list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 2


Unnamed: 0,fix_actual,buggy_actual
0,"[exports, ., EventEmitter, ., call, (, this, )...","[exports, ., EventEmitter, ., call, (, ), ;]"
1,"[if, (, !, handler, ), {, return, this, ;, }]","[if, (, !, handler, ), {, return, ;, }]"


Selected the following

In [102]:
selected_patterns.iloc[0]

_id                nodejs/node_87d92f619a9b6c307a45e5a92ae63152e3...
commit_hash                 87d92f619a9b6c307a45e5a92ae63152e3b5b059
commit_time                                     20/01/2010, 23:59:38
local_repo_path                         ../results/top_JS_repos/node
url                https://github.com/nodejs/node/commit/87d92f61...
fix                       [Idf_1, ., Idf_2, ., Idf_3, (, this, ), ;]
fix_tokenType                                    ExpressionStatement
fix_file_path                                            src/node.js
fix_actual         [exports, ., EventEmitter, ., call, (, this, )...
fix_range                                               [6352, 6384]
fix_line                                                     195-195
buggy                           [Idf_1, ., Idf_2, ., Idf_3, (, ), ;]
buggy_tokenType                                  ExpressionStatement
buggy_file_path                                          src/node.js
buggy_actual            [exports, 

#### Matching seeded bug

In [101]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[0,4]].sample(5).sort_values(by='mean_similarity')
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
426945,benchmarks/data/malikov/Authenticate.me-Node-S...,"[WebInspector, ., View, ., call, (, this, ), ;]","[WebInspector, ., View, ., call, (, ), ;]","[Idf_1, ., Idf_2, ., Idf_3, (, this, ), ;]","[Idf_1, ., Idf_2, ., Idf_3, (, ), ;]","{'line': '2-2', 'range': [68, 97]}",1122,1022,False,0.527667,https://github.com/nodejs/node/commit/87d92f61...
333620,benchmarks/data/auchenberg/chrome-devtools-app...,"[WebInspector, ., VBox, ., call, (, this, ), ;]","[WebInspector, ., VBox, ., call, (, ), ;]","[Idf_1, ., Idf_2, ., Idf_3, (, this, ), ;]","[Idf_1, ., Idf_2, ., Idf_3, (, ), ;]","{'line': '2-2', 'range': [48, 77]}",1059,1009,False,0.541333,https://github.com/nodejs/node/commit/87d92f61...
588789,benchmarks/data/ondras/rot.js/src/scheduler/sc...,"[ROT, ., Scheduler, ., call, (, this, ), ;]","[ROT, ., Scheduler, ., call, (, ), ;]","[Idf_1, ., Idf_2, ., Idf_3, (, this, ), ;]","[Idf_1, ., Idf_2, ., Idf_3, (, ), ;]","{'line': '2-2', 'range': [40, 65]}",1009,1000,False,0.562667,https://github.com/nodejs/node/commit/87d92f61...
26301,benchmarks/data/substance/visual-editor/lib/oo...,"[oo, ., EventEmitter, ., call, (, this, ), ;]","[oo, ., EventEmitter, ., call, (, ), ;]","[Idf_1, ., Idf_2, ., Idf_3, (, this, ), ;]","[Idf_1, ., Idf_2, ., Idf_3, (, ), ;]","{'line': '300-300', 'range': [10612, 10639]}",1050,1014,False,0.752,https://github.com/nodejs/node/commit/87d92f61...
313059,benchmarks/data/deoxxa/jsmc/lib/chunk.js,"[events, ., EventEmitter, ., call, (, this, ), ;]","[events, ., EventEmitter, ., call, (, ), ;]","[Idf_1, ., Idf_2, ., Idf_3, (, this, ), ;]","[Idf_1, ., Idf_2, ., Idf_3, (, ), ;]","{'line': '4-4', 'range': [116, 147]}",1017,1014,False,0.769667,https://github.com/nodejs/node/commit/87d92f61...


#### 7. Replacing (function()!==false) by (function())
**Note:** Could not find any instance

In [51]:
selected_patterns = []
tok = ')!==false'
rep_tok = ')'
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    if tok in ''.join(pattern['fix_actual']) and rep_tok in ''.join(pattern['buggy_actual']):
        selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 1


Unnamed: 0,fix_actual,buggy_actual
0,"[config, ., get, (, 'compress', ), !==, false]","[config, ., get, (, 'server', ), ., compress, ..."


## Variable mutation
The following are few mutation operators that do not overlap with the branch mutation
operators

#### Change the value assigned to the variable

In [55]:
selected_patterns = []
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    if ['var', 'Idf_1', '=']==pattern['fix'][0:3] == pattern['buggy'][0:3]:
        if pattern['fix'][3] !=  pattern['buggy'][3]:
            selected_patterns.append(pattern)
            list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 88


Unnamed: 0,fix_actual,buggy_actual
0,"[var, blockMatch, =, /\s*\/\*\*\s*((.|\n|\r\n)...","[var, blockMatch, =, /\s*\/\*\*\s*((.|\n)*?)\s..."
1,"[var, exclude, =, /z-?index|font-?weight|opaci...","[var, exclude, =, /z-?index|font-?weight|opaci..."
2,"[var, exclude, =, /z-?index|font-?weight|opaci...","[var, exclude, =, /z-?index|font-?weight|opaci..."
3,"[var, cmd, =, 'NODE_PATH=', +, libDir, +, ' ',...","[var, cmd, =, nodeBinary, +, ' http://localhos..."
4,"[var, initial, =, arguments, ., length, >, 2, ;]","[var, initial, =, memo, !==, void, 0, ;]"
...,...,...
83,"[var, $this, =, $, (, this, ), ;]","[var, $this, =, jQuery, (, this, ), ;]"
84,"[var, noDead, =, new, URL, (, input, ), ;]","[var, noDead, =, url, ., parse, (, input, ), ;]"
85,"[var, validFontSizeRegExpStr, =, ""^([0-9]+)?(\...","[var, validFontSizeRegExpStr, =, ""^([0-9]+)?(...."
86,"[var, stats, =, compilation, ?, this, ., _getS...","[var, stats, =, this, ., _getStats, (, compila..."


In [11]:
selected_patterns.sample(3)

Unnamed: 0,_id,commit_hash,commit_time,local_repo_path,url,fix,fix_tokenType,fix_file_path,fix_actual,fix_range,fix_line,buggy,buggy_tokenType,buggy_file_path,buggy_actual,buggy_range,buggy_line
3,nodejs/node_a021db151a6c890281e4367d2a2319290c...,a021db151a6c890281e4367d2a2319290c70576d,"07/11/2009, 20:07:55",../results/top_JS_repos/node,https://github.com/nodejs/node/commit/a021db15...,"[var, Idf_1, =, Lit_1, +, Idf_2, +, Lit_2, +, ...",VariableDeclaration,test/mjsunit/test-remote-module-loading.js,"[var, cmd, =, 'NODE_PATH=', +, libDir, +, ' ',...","[603, 688]",24-24,"[var, Idf_1, =, Idf_3, +, Lit_3, +, Idf_4, +, ...",VariableDeclaration,test/mjsunit/test-remote-module-loading.js,"[var, cmd, =, nodeBinary, +, ' http://localhos...","[603, 664]",24-24
57,jashkenas/underscore_68d8b763fa75c3912217e7b08...,68d8b763fa75c3912217e7b08d36637a6140b1a1,"26/01/2015, 18:53:53",../results/top_JS_repos/underscore,https://github.com/jashkenas/underscore/commit...,"[var, Idf_1, =, Lit_1]",VariableDeclaration,underscore.js,"[var, index, =, 1]","[3543, 3556]",102-102,"[var, Idf_1, =, Lit_2]",VariableDeclaration,underscore.js,"[var, index, =, 0]","[3543, 3556]",102-102
29,meteor/meteor_e15ab4fc21a3c46ceab9f102897999cb...,e15ab4fc21a3c46ceab9f102897999cb7ed3ada5,"31/07/2013, 02:10:03",../results/top_JS_repos/meteor,https://github.com/meteor/meteor/commit/e15ab4...,"[var, Idf_1, =, Lit_1, ;]",VariableDeclaration,packages/ui/attrs.js,"[var, ATTRIBUTE_NAME_REGEX, =, /^[^\s""'>/=]+$/...","[1, 44]",2-2,"[var, Idf_1, =, Lit_2, ;]",VariableDeclaration,packages/ui/attrs.js,"[var, ATTRIBUTE_NAME_REGEX, =, /^[^\s""'>/=/]+$...","[1, 45]",2-2


#### Matching seeded bug

In [12]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[57,4]].sort_values(by='mean_similarity')
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
304175,benchmarks/data/jxcore/jxcore/deps/mozjs/src/t...,"[var, m, =, 0]","[var, m, =, 1]","[var, Idf_1, =, Lit_1]","[var, Idf_1, =, Lit_2]","{'line': '15-15', 'range': [284, 293]}",1007,1003,False,0.5455,https://github.com/jashkenas/underscore/commit...
261621,benchmarks/data/paullewis/a3/deploy/www/sample...,"[var, w, =, 0]","[var, w, =, 120]","[var, Idf_1, =, Lit_1]","[var, Idf_1, =, Lit_2]","{'line': '138-138', 'range': [4629, 4638]}",1108,1037,False,0.5600,https://github.com/jashkenas/underscore/commit...
507088,benchmarks/data/icodeforlove/node-requester/li...,"[var, proxy, =, 0]","[var, proxy, =, 80]","[var, Idf_1, =, Lit_1]","[var, Idf_1, =, Lit_2]","{'line': '62-62', 'range': [2839, 2852]}",1048,1017,False,0.5660,https://github.com/jashkenas/underscore/commit...
93758,benchmarks/data/icodeforlove/node-requester/li...,"[var, proxy, =, 0]","[var, proxy, =, 80]","[var, Idf_1, =, Lit_1]","[var, Idf_1, =, Lit_2]","{'line': '73-73', 'range': [3228, 3241]}",1048,1017,False,0.5660,https://github.com/jashkenas/underscore/commit...
196663,benchmarks/data/paullewis/a3/deploy/www/sample...,"[var, v, =, 0]","[var, v, =, 100]","[var, Idf_1, =, Lit_1]","[var, Idf_1, =, Lit_2]","{'line': '76-76', 'range': [2153, 2162]}",1108,1037,False,0.5760,https://github.com/jashkenas/underscore/commit...
...,...,...,...,...,...,...,...,...,...,...,...
213353,benchmarks/data/stackforge/compass-web/v1/ods/...,"[var, i, =, 1]","[var, i, =, 0]","[var, Idf_1, =, Lit_1]","[var, Idf_1, =, Lit_2]","{'line': '219-219', 'range': [12402, 12411]}",1104,1090,False,0.8265,https://github.com/jashkenas/underscore/commit...
353220,benchmarks/data/sebastienros/jint/Jint.Tests.E...,"[var, i, =, 1]","[var, i, =, 0]","[var, Idf_1, =, Lit_1]","[var, Idf_1, =, Lit_2]","{'line': '3-3', 'range': [48, 57]}",1003,1000,False,0.8265,https://github.com/jashkenas/underscore/commit...
323819,benchmarks/data/sharelatex/web-sharelatex/publ...,"[var, i, =, 1]","[var, i, =, 0]","[var, Idf_1, =, Lit_1]","[var, Idf_1, =, Lit_2]","{'line': '12-12', 'range': [529, 538]}",1050,1020,False,0.8265,https://github.com/jashkenas/underscore/commit...
13633,benchmarks/data/oreillymedia/data_structures_a...,"[var, i, =, 1]","[var, i, =, 0]","[var, Idf_1, =, Lit_1]","[var, Idf_1, =, Lit_2]","{'line': '3-3', 'range': [55, 64]}",1002,1000,False,0.8265,https://github.com/jashkenas/underscore/commit...


#### Remove variable declaration

In [56]:
selected_patterns = []
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    if ['var', 'Idf_1', '=']==pattern['fix'][0:3] and pattern['buggy'][0:2] == ['Idf_1', '=']:
            list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
            selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 15


Unnamed: 0,fix_actual,buggy_actual
0,"[var, i, =, 0, ,, l, =, not, ., length]","[i, =, 0, ,, l, =, not, ., length]"
1,"[var, filter, =, style, ., filter, ||, jQuery,...","[filter, =, style, ., filter, ||, jQuery, ., c..."
2,"[var, match, =, jQuery, ., noData, [, elem, .,...","[match, =, jQuery, ., noData, [, elem, ., node..."
3,"[var, encoding, =, arguments, [, 3, ], ;]","[encoding, =, arguments, [, 3, ], ;]"
4,"[var, stats, =, fs, ., statSync, (, requestPat...","[stats, =, fs, ., statSync, (, requestPath, ), ;]"
5,"[var, envDialect, =, process, ., env, ., DIALE...","[envDialect, =, process, ., env, ., DIALECT, |..."
6,"[var, appView, =, Discourse, ., __container__,...","[appView, =, Discourse, ., __container__, ., l..."
7,"[var, promise, =, new, Em, ., Deferred, (, ), ;]","[promise, =, new, Em, ., Deferred, (, ), ;]"
8,"[var, opts, =, options, ||, {, }, ;]","[opts, =, options, ||, {, }, ;]"
9,"[var, o, =, 0, ,, ol, =, offsets, ., length]","[o, =, 0, ,, ol, =, offsets, ., length]"


#### Matching seeded bug

In [18]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[0,4]].sort_values(by='mean_similarity')
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
23718,benchmarks/data/ariatemplates/ariatemplates/te...,"[var, j, =, 0, ,, tcl, =, testCases, ., length]","[j, =, 0, ,, tcl, =, testCases, ., length]","[var, Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., ...","[Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., Idf_4]","{'line': '70-70', 'range': [4319, 4352]}",1102,1124,False,0.6664,https://github.com/jquery/jquery/commit/0b36b2...
102132,benchmarks/data/cdnjs/cdnjs/ajax/libs/react-ro...,"[var, k, =, 0, ,, kl, =, keys, ., length]","[k, =, 0, ,, kl, =, keys, ., length]","[var, Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., ...","[Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., Idf_4]","{'line': '1850-1850', 'range': [81716, 81743]}",1318,1116,False,0.6676,https://github.com/jquery/jquery/commit/0b36b2...
388527,benchmarks/data/NV/chrome-devtools-autosave/ch...,"[var, i, =, 0, ,, ii, =, rules, ., length]","[i, =, 0, ,, ii, =, rules, ., length]","[var, Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., ...","[Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., Idf_4]","{'line': '59-59', 'range': [1737, 1765]}",1110,1054,False,0.7144,https://github.com/jquery/jquery/commit/0b36b2...
372610,benchmarks/data/cdnjs/cdnjs/ajax/libs/react-ro...,"[var, i, =, 0, ,, il, =, objKeys, ., length]","[i, =, 0, ,, il, =, objKeys, ., length]","[var, Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., ...","[Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., Idf_4]","{'line': '1993-1993', 'range': [87185, 87215]}",1318,1116,False,0.716,https://github.com/jquery/jquery/commit/0b36b2...
241712,benchmarks/data/cdnjs/cdnjs/ajax/libs/react-ro...,"[var, i, =, 0, ,, il, =, objKeys, ., length]","[i, =, 0, ,, il, =, objKeys, ., length]","[var, Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., ...","[Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., Idf_4]","{'line': '1966-1966', 'range': [85893, 85923]}",1318,1116,False,0.716,https://github.com/jquery/jquery/commit/0b36b2...
392550,benchmarks/data/digitalbiblesociety/browserbib...,"[var, i, =, 0, ,, il, =, verseLocations, ., le...","[i, =, 0, ,, il, =, verseLocations, ., length]","[var, Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., ...","[Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., Idf_4]","{'line': '203-203', 'range': [7992, 8029]}",1097,1053,False,0.7168,https://github.com/jquery/jquery/commit/0b36b2...
526949,benchmarks/data/digitalbiblesociety/browserbib...,"[var, i, =, 0, ,, il, =, locationData, ., length]","[i, =, 0, ,, il, =, locationData, ., length]","[var, Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., ...","[Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., Idf_4]","{'line': '121-121', 'range': [4720, 4755]}",1097,1053,False,0.7184,https://github.com/jquery/jquery/commit/0b36b2...
73572,benchmarks/data/digitalbiblesociety/browserbib...,"[var, i, =, 0, ,, il, =, locationData, ., length]","[i, =, 0, ,, il, =, locationData, ., length]","[var, Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., ...","[Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., Idf_4]","{'line': '70-70', 'range': [2966, 3001]}",1097,1053,False,0.7184,https://github.com/jquery/jquery/commit/0b36b2...
524839,benchmarks/data/NV/chrome-devtools-autosave/ch...,"[var, i, =, 0, ,, ii, =, serversElements, ., l...","[i, =, 0, ,, ii, =, serversElements, ., length]","[var, Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., ...","[Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., Idf_4]","{'line': '298-298', 'range': [9237, 9275]}",1110,1054,False,0.7212,https://github.com/jquery/jquery/commit/0b36b2...
61565,benchmarks/data/digitalbiblesociety/browserbib...,"[var, i, =, 0, ,, il, =, contentToHighlight, ....","[i, =, 0, ,, il, =, contentToHighlight, ., len...","[var, Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., ...","[Idf_1, =, Lit_1, ,, Idf_2, =, Idf_3, ., Idf_4]","{'line': '190-190', 'range': [7472, 7513]}",1097,1053,False,0.7278,https://github.com/jquery/jquery/commit/0b36b2...


#### Change variable type

In [57]:
def get_type_of_value(val):
    nmbr = True
    if val == 'true' or val == 'false':
        return 'boolean'
    
    for c in val:
        if not c.isdigit():
            nmbr=False
    if nmbr:
        return 'number'
    return 'string'

selected_patterns = []
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    if ['Idf_1', '=', 'Lit_1']==pattern['fix'][0:3] and pattern['buggy'][0:3] == ['Idf_1', '=', 'Lit_2']:
            buggy_value = pattern['buggy_actual'][2]
            fix_value = pattern['fix_actual'][2]
            fx_type = get_type_of_value(fix_value)
            bg_type = get_type_of_value(buggy_value)
            if bg_type != fx_type:
                list_of_all_patterns_also_seded_by_mutandis.add(pattern['commit_hash'])
                selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 1


Unnamed: 0,fix_actual,buggy_actual
0,"[timerId, =, true, ;]","[timerId, =, 1, ;]"


#### Matching seeded bug

In [14]:
p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[0,4]].sort_values(by='mean_similarity')
p

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
503082,benchmarks/data/thomaschampagne/StravaPlus/hoo...,"[dp, =, 0, ;]","[dp, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '37-37', 'range': [1272, 1279]}",1017,1020,False,0.3010,https://github.com/jquery/jquery/commit/ab1504...
602056,benchmarks/data/thomaschampagne/StravaPlus/hoo...,"[dp, =, 0, ;]","[dp, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '41-41', 'range': [1372, 1379]}",1017,1020,False,0.3010,https://github.com/jquery/jquery/commit/ab1504...
283385,benchmarks/data/creationix/js-git/net/transpor...,"[verified, =, 2, ;]","[verified, =, 3, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '45-45', 'range': [1926, 1939]}",1023,1020,False,0.3015,https://github.com/jquery/jquery/commit/ab1504...
608986,benchmarks/data/yetone/collipa/collipa/static/...,"[buttonImageURL, =, """", ;]","[buttonImageURL, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '421-421', 'range': [23066, 23086]}",1323,1307,False,0.3050,https://github.com/jquery/jquery/commit/ab1504...
514686,benchmarks/data/cdnjs/cdnjs/ajax/libs/moment.j...,"[input, =, 0, ;]","[input, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '1241-1241', 'range': [41775, 41785]}",1509,1262,False,0.3085,https://github.com/jquery/jquery/commit/ab1504...
...,...,...,...,...,...,...,...,...,...,...,...
638443,benchmarks/data/alduro/generator-flux-webapp/a...,"[started, =, true, ;]","[started, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '95-95', 'range': [3408, 3423]}",1050,1064,False,0.7245,https://github.com/jquery/jquery/commit/ab1504...
202485,benchmarks/data/kyoung/trek/static/js/trek/mai...,"[displayCaptainsLog, =, true, ;]","[displayCaptainsLog, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '29-29', 'range': [824, 850]}",1016,1014,False,0.7330,https://github.com/jquery/jquery/commit/ab1504...
382635,benchmarks/data/garyv/srcery/srcery.js,"[queued, =, false, ;]","[queued, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '59-59', 'range': [2175, 2190]}",1031,1012,False,0.7425,https://github.com/jquery/jquery/commit/ab1504...
178165,benchmarks/data/garyv/srcery/srcery.js,"[queued, =, true, ;]","[queued, =, 1, ;]","[Idf_1, =, Lit_1, ;]","[Idf_1, =, Lit_2, ;]","{'line': '81-81', 'range': [2962, 2976]}",1031,1012,False,0.7785,https://github.com/jquery/jquery/commit/ab1504...


#### Swap Parameters
**None Found**

In [55]:
selected_patterns = []
seen_url = set()
for _,r in patterns.iterrows():
    pattern = r.to_dict()
    if "(Idf_3,Idf_4)" in ''.join(pattern['fix']) and "(Idf_4,Idf_3)" in ''.join(pattern['buggy']):
        selected_patterns.append(pattern)
selected_patterns = pd.DataFrame(selected_patterns)
print(f'Number of selected patterns = {len(selected_patterns)}')
selected_patterns[['fix_actual','buggy_actual']]

Number of selected patterns = 10


Unnamed: 0,fix_actual,buggy_actual
0,"[assert, ., strictEqual, (, completed, ,, expe...","[assert, ., strictEqual, (, expected, ,, compl..."
1,"[assert, ., strictEqual, (, chars_recved, ,, x...","[assert, ., strictEqual, (, x, ,, chars_recved..."
2,"[assert, ., strictEqual, (, cbcount, ,, N, ), ;]","[assert, ., strictEqual, (, N, ,, cbcount, ), ;]"
3,"[assert, ., strictEqual, (, returnedError, ,, ...","[assert, ., strictEqual, (, theError, ,, retur..."
4,"[assert, ., deepStrictEqual, (, actualChunks, ...","[assert, ., deepStrictEqual, (, expectChunks, ..."
5,"[assert, ., strictEqual, (, result, ,, expecte...","[assert, ., strictEqual, (, expected, ,, resul..."
6,"[assert, ., strictEqual, (, code, ,, expectCod...","[assert, ., strictEqual, (, expectCode, ,, cod..."
7,"[assert, ., strictEqual, (, result, ,, expecte...","[assert, ., strictEqual, (, expected, ,, resul..."
8,"[assert, ., strictEqual, (, err, ,, expected, ...","[assert, ., strictEqual, (, expected, ,, err, ..."
9,"[assert, ., deepStrictEqual, (, rawHeaders, ,,...","[assert, ., deepStrictEqual, (, expected, ,, r..."


#### Matching seeded bug

In [56]:
for i in range(len(selected_patterns)):
    p = seeded_bugs[seeded_bugs['seeding_pattern_url']==selected_patterns.iloc[i,4]].sort_values(by='mean_similarity')
    print(p)

Empty DataFrame
Columns: [file_name_where_intended, target_token_sequence-Correct, target_token_sequence-Buggy, token_sequence_abstraction-Correct, token_sequence_abstraction-Buggy, target_line_range, num_of_available_identifiers_to_choose_from, num_of_available_literals_to_choose_from, error, mean_similarity, seeding_pattern_url]
Index: []
Empty DataFrame
Columns: [file_name_where_intended, target_token_sequence-Correct, target_token_sequence-Buggy, token_sequence_abstraction-Correct, token_sequence_abstraction-Buggy, target_line_range, num_of_available_identifiers_to_choose_from, num_of_available_literals_to_choose_from, error, mean_similarity, seeding_pattern_url]
Index: []
Empty DataFrame
Columns: [file_name_where_intended, target_token_sequence-Correct, target_token_sequence-Buggy, token_sequence_abstraction-Correct, token_sequence_abstraction-Buggy, target_line_range, num_of_available_identifiers_to_choose_from, num_of_available_literals_to_choose_from, error, mean_similarity, se

#### Remove Parameters

In [58]:
seeded_bugs[seeded_bugs['seeding_pattern_url']=='https://github.com/jquery/jquery/commit/ed1144816b07fbb2ea94217fd967626619344ad9'].sort_values(by='mean_similarity')

Unnamed: 0,file_name_where_intended,target_token_sequence-Correct,target_token_sequence-Buggy,token_sequence_abstraction-Correct,token_sequence_abstraction-Buggy,target_line_range,num_of_available_identifiers_to_choose_from,num_of_available_literals_to_choose_from,error,mean_similarity,seeding_pattern_url
173030,benchmarks/data/kriszyp/xstyle/core/Rule.js,"[value, =, generate, ., forSelector, (, value,...","[value, =, generate, ., forSelector, (, value,...","[Idf_1, =, Idf_2, ., Idf_3, (, Idf_1, ,, Idf_4...","[Idf_1, =, Idf_2, ., Idf_3, (, Idf_1, ), ;]","{'line': '111-111', 'range': [4201, 4243]}",1112,1019,False,0.3564,https://github.com/jquery/jquery/commit/ed1144...
100776,benchmarks/data/SlexAxton/yepnope.js/src/yepno...,"[url, =, yepnope, ., urlFormatter, (, url, ,, ...","[url, =, yepnope, ., urlFormatter, (, url, ), ;]","[Idf_1, =, Idf_2, ., Idf_3, (, Idf_1, ,, Idf_4...","[Idf_1, =, Idf_2, ., Idf_3, (, Idf_1, ), ;]","{'line': '155-155', 'range': [4440, 4479]}",1029,1020,False,0.4146,https://github.com/jquery/jquery/commit/ed1144...
416465,benchmarks/data/buildfirst/buildfirst/appendix...,"[result, =, _, ., difference, (, result, ,, ma...","[result, =, _, ., difference, (, result, ), ;]","[Idf_1, =, Idf_2, ., Idf_3, (, Idf_1, ,, Idf_4...","[Idf_1, =, Idf_2, ., Idf_3, (, Idf_1, ), ;]","{'line': '48-48', 'range': [1577, 1616]}",1008,1021,False,0.419,https://github.com/jquery/jquery/commit/ed1144...
430644,benchmarks/data/buildfirst/buildfirst/appendix...,"[result, =, _, ., union, (, result, ,, matches...","[result, =, _, ., union, (, result, ), ;]","[Idf_1, =, Idf_2, ., Idf_3, (, Idf_1, ,, Idf_4...","[Idf_1, =, Idf_2, ., Idf_3, (, Idf_1, ), ;]","{'line': '50-50', 'range': [1646, 1680]}",1008,1021,False,0.4344,https://github.com/jquery/jquery/commit/ed1144...


In [58]:
print(f'Of {len(patterns)} patterns, {len(list_of_all_patterns_also_seded_by_mutandis)} could be seeded by mutandis')


Of 2880 patterns, 165 could be seeded by mutandis


In [65]:
seeded_bugs['commit_hash'] = seeded_bugs['seeding_pattern_url'].apply(lambda x: x.split('/')[-1])

In [69]:
possibly_seeded_by_mutandis = seeded_bugs.loc[seeded_bugs['commit_hash'].isin(list_of_all_patterns_also_seded_by_mutandis)]
print(f'Of all {len(seeded_bugs)} bugs, {len(possibly_seeded_by_mutandis)} could be possibly seeded by mutandis')

Of all 677217 bugs, 12787 could be possibly seeded by mutandis
