In [1]:
import pandas as pd
import numpy as np
import sys

### Import Oracle Creator

In [2]:
sys.path.append("../../../../Tracer/src")
from PreProcessing.Traces.TraceCreator import create_trace_matrix_from_trace_list

def create_trace_list(path_to_oracle):
    oracle = open(path_to_oracle).read().split("\n")
    trace_list = []
    for trace_item in oracle:
        if len(trace_item) > 0:
            source, targets = trace_item.split(":")
            for target in targets.split(" "):
                if len(target) != 0:
                    trace_list.append(" ".join([source, target]))
                else:
                    trace_list.append(" ".join([source, ""]))
    return create_trace_matrix_from_trace_list(trace_list)

# Original Oracles

In [3]:
r2d = pd.read_csv("../../Oracles/DataFrames/Level_1_to_Level_2.csv").set_index("id")
d2c = pd.read_csv("../../Oracles/DataFrames/Level_2_to_Level_3.csv").set_index("id")
r2c = pd.read_csv("../../Oracles/DataFrames/Level_1_to_Level_3.csv").set_index("id")

In [4]:
d2c.head()

Unnamed: 0_level_0,141.txt,149.txt,154.txt,152.txt,151.txt,134.txt,136.txt,124.txt,118.txt,147.txt,...,117.txt,145.txt,123.txt,148.txt,120.txt,158.txt,122.txt,132.txt,114.txt,155.txt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
41.txt,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
46.txt,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
48.txt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
33.txt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
39.txt,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


# Fill missing R-D

In [5]:
c_intersection = set(d2c.columns) & set(r2c.columns)

In [6]:
expected_traces = []
for c_id in c_intersection:
    designs_associated = list(d2c[c_id][d2c[c_id] == 1].index)
    requirements_associated = list(r2c[c_id][r2c[c_id] == 1].index)
    for d_id in designs_associated:
        for r_id in requirements_associated:
            expected_traces.append((r_id, d_id))
print("Number of expected associated: ", len(expected_traces))

Number of expected associated:  270


In [7]:
missed_traced = 0
for e_t in expected_traces:
    r_id = e_t[0]
    d_id = e_t[1]
    if d_id in d2c.index and c_id in d2c.columns and r2d.loc[r_id][d_id] == 0:
        missed_traced = missed_traced + 1
        r2d.loc[r_id][d_id] = 1
print("Missed Traced: ", missed_traced)

Missed Traced:  104


# Fill Missing D->C

In [8]:
r_intersection = set(r2d.index) & set(r2c.index)

In [9]:
list(r2c.loc[r_id][r2c.loc[r_id]==1].index)

['147.txt', '123.txt', '158.txt', '122.txt']

In [10]:
expected_traces = []
for r_id in r_intersection:
    designs_associated = list(r2d.loc[r_id][r2d.loc[r_id] == 1].index)
    classes_associated = list(r2c.loc[r_id][r2c.loc[r_id]==1].index)
    for d_id in designs_associated:
        for c_id in classes_associated:
            expected_traces.append((d_id, c_id))
print("Number of expected associated: ", len(expected_traces))

Number of expected associated:  414


In [11]:
missed_traced = 0
for e_t in expected_traces:
    d_id = e_t[0]
    c_id = e_t[1]
    if d_id in d2c.index and c_id in d2c.columns and d2c.loc[d_id][c_id] == 0:
        missed_traced = missed_traced + 1
        d2c.loc[d_id][c_id] = 1
print("Missed Traced: ", missed_traced)

Missed Traced:  55


# Fill Missing R->C

In [12]:
expected_full_traces = []
for r_id in r2d.index:
    associated_designs = list(r2d.loc[r_id][r2d.loc[r_id]==1].index)
    for d_id in associated_designs:
        if d_id in d2c.index:
            classes_associated = list(d2c.loc[d_id][d2c.loc[d_id]==1].index)
            for c_id in classes_associated:
                expected_full_traces.append((r_id, c_id))
print("Expected r2c associations: ", len(expected_full_traces))

Expected r2c associations:  874


In [13]:
missed_traced = 0
for e_t in expected_full_traces:
    r_id = e_t[0]
    c_id = e_t[1]
    if r_id in r2c.index and c_id in r2c.columns and r2c.loc[r_id][c_id] == 0:
        missed_traced = missed_traced + 1
        r2c.loc[r_id][c_id] = 1
print("Missed Traced: ", missed_traced)

Missed Traced: 

 166


# Export

In [14]:
r2d.to_csv("../../Oracles/DataFrames/Level_1_to_Level_2.csv")
d2c.to_csv("../../Oracles/DataFrames/Level_2_to_Level_3.csv")
r2c.to_csv("../../Oracles/DataFrames/Level_1_to_Level_3.csv")
r2c.to_csv("../../Oracles/Relations.csv")
print("Done!")

Done!


In [15]:
r2c.sum().sum()

259.0

In [16]:
r2c.shape[0] * r2c.shape[1]

1050