# RQ1: Comparison with Baselines

In [10]:
import sys
sys.path.append("../implementations/")
from classes.frameworks import Frameworks
import copy
import os
import matplotlib.pyplot as plt
import matplotlib
from matplotlib import cm
from collections import defaultdict
import numpy as np
import dill
import os
import math
frameworks = defaultdict(dict)
memo_dir = "./data/memo/"
muffin_dir = "./data/muffin/"
lemon_dir = "./data/lemon/"
cradle_dir = "./data/cradle/"
all_experiments = {
    "memo": memo_dir,
    "muffin": muffin_dir,
    "lemon": lemon_dir,
    "cradle": cradle_dir
}

# Get The Test Coverage

In [11]:
def load_class(home_dir, method):
    backend = ["tensorflow_new"]
    result_dict = {}
    for bk in backend:
        print("========= working on backend: {} ========".format(bk))
        file_path = os.path.join(home_dir, f"{bk}.pkl")
        if method == "cradle":
            file_path = os.path.join(home_dir, f"{bk}_origin_models.pkl")
            
        result_dict[bk] = dill.load(open(file_path, "rb+"))
    return result_dict

In [12]:
backend = ["tensorflow_new"]
save_dir = "./all_coverages"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
frameworks = defaultdict(dict)
file_list = os.listdir(save_dir)
frameworks = defaultdict(dict)

for method in all_experiments:
    print("working on method: ", method)
    if f"{method}_tensorflow_new.pkl" in file_list:
        print(f"{method} already exists, directly load the overall result")
        for bk in backend:
            frameworks[bk][method] = dill.load(open(os.path.join(save_dir, f"{method}_{bk}.pkl"), "rb"))
    else:
        result_dict = load_class(all_experiments[method], method)
        for bk in backend:
            print("working on backend: ", bk)
            frameworks[bk][method] = result_dict[bk]
            with open(os.path.join(save_dir, f"{method}_{bk}.pkl"), "wb") as file:
                dill.dump(frameworks[bk][method], file)


working on method:  memo
memo already exists, directly load the overall result
working on method:  muffin
muffin already exists, directly load the overall result
working on method:  lemon
lemon already exists, directly load the overall result
working on method:  cradle
cradle already exists, directly load the overall result


In [28]:
backends = ["tensorflow_new"]
import json
tensorflow_modules = json.load(open("../implementations/scripts/analysis/tensorflow_related_modules.json", "rb+"))
tensorflow_modules_meta = json.load(open("../implementations/scripts/analysis/tensorflow_modules_meta.json", "rb+"))

import os
def search_within_files(file_keywards, files, cache):
    for file in files:
        dirname = os.path.dirname(file)
        basename = os.path.basename(file)
        if dirname not in tensorflow_modules:
            continue
        if basename not in tensorflow_modules[dirname]:
            continue
        if file_keywards not in file:
            continue
        for module in tensorflow_modules_meta:
            if dirname in tensorflow_modules_meta[module]:
                break
        hit_line, total_line, hit_branch, total_branch = cache[module]
        lt,lm,_,bh,bm,_ = files[file].coverage
        hit_line += lt
        total_line += lt+lm
        hit_branch += bh
        total_branch += bh+bm
        cache[module] = (hit_line, total_line, hit_branch, total_branch)
    return cache

def ceil(x, decimal):
    return math.ceil(x*10**decimal)/10**decimal

for bk in backends:
    print("working on backend: ", bk)
    for method in frameworks[bk]:
        cache = {}
        print("\n---- working on method: ", method)
        hit_line, total_line, hit_branch, total_branch = 0,0,0,0
        file_keywards = ".py"
        for module in tensorflow_modules_meta:
            cache[module] = (hit_line, total_line, hit_branch, total_branch)
        cache = search_within_files(file_keywards, frameworks[bk][method].c_files, cache)
        cache = search_within_files(file_keywards, frameworks[bk][method].py_files, cache)
        for module in cache:
            print(f"The coverage of module: {module} is:")
            hl, tl, hr, tr = cache[module]
            hit_branch += hr
            total_branch += tr
            hit_line += hl
            total_line += tl
            print(f"total_branch: {tr}, hit_branch: {hr}, coverage: {hr/tr},\
                  total_line: {tl}, coverage: {hl/tl}")
#         print(f"Branch Coverage: {hit_branch}/{total_branch}({ceil(hit_branch/total_branch, 3)}), \
#               Line Coverage: {hit_line}/{total_line}({ceil(hit_line/total_line, 3)})")
        
        print(f"Branch Coverage: {hit_branch}/{total_branch}({hit_branch/total_branch}), \
              Line Coverage: {hit_line}/{total_line}({hit_line/total_line})")
#         break

working on backend:  tensorflow_new

---- working on method:  memo
The coverage of module: API is:
total_branch: 3024, hit_branch: 1198, coverage: 0.39616402116402116,                  total_line: 7487, coverage: 0.5709897155068786
The coverage of module: Operators is:
total_branch: 11308, hit_branch: 1008, coverage: 0.08914043155288291,                  total_line: 32562, coverage: 0.21322400343959216
The coverage of module: Model is:
total_branch: 2194, hit_branch: 941, coverage: 0.42889699179580676,                  total_line: 4961, coverage: 0.575488812739367
Branch Coverage: 3147/16526(0.19042720561539392),               Line Coverage: 14073/45010(0.3126638524772273)

---- working on method:  muffin
The coverage of module: API is:
total_branch: 3024, hit_branch: 1065, coverage: 0.3521825396825397,                  total_line: 7487, coverage: 0.5231735007346067
The coverage of module: Operators is:
total_branch: 11308, hit_branch: 755, coverage: 0.06676689069685178,               

# Get the Coverage For Layer Call Diversity

In [33]:
def load_cov(home_dir, method):
    file_path = os.path.join(home_dir, f"{bk}_api_cov.txt")
    result_dict = {}
    with open(file_path, "r") as file:
        content = file.read().split("\n")[-2]
    content = content.split("; ")
    result_dict["layer input coverage"] = content[3]
    result_dict["layer parameter coverage"] = content[2]
    result_dict["layer sequence coverage"] = content[1]
    result_dict["layer api coverage"] = content[0]
    result_dict["layer input dimension coverage"] = content[4]
    result_dict["layer input datatype coverage"] = content[5]
    result_dict["layer input shape coverage"] = content[6]
    print(result_dict)

for method in all_experiments:
    print("working on method: ", method)
    load_cov(all_experiments[method], method)


working on method:  memo
{'layer input coverage': '0.6971279373368147', 'layer parameter coverage': '0.5017081503172279', 'layer sequence coverage': '0.1884606816662954', 'layer api coverage': '1.0', 'layer input dimension coverage': '0.9401709401709402', 'layer input datatype coverage': '0.4774011299435028', 'layer input shape coverage': '0.864406779661017'}
working on method:  muffin
{'layer input coverage': '0.34073107049608353', 'layer parameter coverage': '0.25866276232308444', 'layer sequence coverage': '0.07529516596123859', 'layer api coverage': '0.6865671641791045', 'layer input dimension coverage': '0.41025641025641024', 'layer input datatype coverage': '0.10734463276836158', 'layer input shape coverage': '0.5932203389830508'}
working on method:  lemon
{'layer input coverage': '0.1422976501305483', 'layer parameter coverage': '0.06442166910688141', 'layer sequence coverage': '0.011806638449543327', 'layer api coverage': '0.29850746268656714', 'layer input dimension coverage':