# Problem need to be addressed 

In this research, we proposal a new fault tolerance analysis technique - fault tolerance boundary is designed specific to the data variable.
1. What's the fault tolerance boundary.
2. It's a technique can be equivlent to the fault injection campaign.
3. We propose a method to approximate the boundary.
    1. heuristic method to measure the boundary.
    2. correlation analysis between variables to understand the relationship between the fault tolerance across variables.
    
  

In [3]:
import numpy as np
import seaborn as sns
import pandas as pd
import os.path
import matplotlib.pyplot as plt
import random
import math
from IPython.core.debugger import set_trace
from datetime import datetime
import sys
random.seed(datetime.now())

In [4]:
DATASET = "bs" #current available datasets fft, lu, cg, other potential dataset.,

In [5]:
# Threshold that used to decide whether the final outcome is SDC or Masked. For the loading data
# set, there are two different datasets. One is the exhaust fault injection campaign which contains
# all the fault injection information. The other is the test data set which is the sample experiment 
# contain the detail propagation information used for the downstream propagation analysis.
PROPAGATION_DATA_PATH = ""
CAMPAIGN_DATA_PATH = ""
PROPAGATION_INJECTION_DATA_PATH = ""
THRESHOLD = 0

if DATASET == "fft":
    PROPAGATION_DATA_PATH = "../static/data/fft/fft_complete"
    CAMPAIGN_DATA_PATH = "../static/data/fft/fft_injectlog.log"
    #PROPAGATION_INJECTION_DATA_PATH = "../static/data/fft_complete.csv"
    THRESHOLD = 0.001
elif DATASET == "cg":
    PROPAGATION_DATA_PATH = "../static/data/cg/cg_in8"
    CAMPAIGN_DATA_PATH = "../static/data/cg/cg_in8/injectlog.log"
    #PROPAGATION_INJECTION_DATA_PATH = "../static/data/cg_in8.csv"
    THRESHOLD = 0.07
elif DATASET == "lu":
    PROPAGATION_DATA_PATH = "../static/data/lu/lu20000"
    CAMPAIGN_DATA_PATH = "../static/data/lu/injectlog.log"
    #PROPAGATION_INJECTION_DATA_PATH = "../static/data/lu_complete.csv"
    THRESHOLD = 0.0001
elif DATASET == 'bs':
    PROPAGATION_DATA_PATH = "../static/data/black-scholes/"
    CAMPAIGN_DATA_PATH = "../static/data/black-scholes/injectlog_complete.log"
    #PROPAGATION_INJECTION_DATA_PATH = ""
    THRESHOLD = 0.0001
     
# The fault injection campaign experiment
FAULT_INJECTION_CAMPAIGN = pd.read_csv(CAMPAIGN_DATA_PATH,  sep=" ", names=['fileindex', 'file', 'linenum', 'variable','byte_num', 'corrupted','init_value','to', 'corrupt_value', 'mask', 'byte','expo', 'ss', 'op', 'diffnorm', 'empty'])

# The size of the fault injection experiment
SIZE = len(FAULT_INJECTION_CAMPAIGN)

# The percentage of the test experiment over exhaust fault injection campaign.
TEST_EXPERIMENT_NUMBER = int(SIZE * 0.01)

# Golden Run
GOLDEN_RUN = pd.read_csv(PROPAGATION_DATA_PATH+"/golden.log",  sep=" ", names=['file', 'linenum', 'variable', 'value'])

# Subset of fault injection campaign that used to verify the quality of a dynamic instruction
# FAULT_INJECTION_TEST_CAMPAIGN = pd.read_csv(TEST_PATH, sep=",")
TEST_EXPERIMENTS = FAULT_INJECTION_CAMPAIGN.sample(n=TEST_EXPERIMENT_NUMBER, replace=False,  random_state=1)

# SDC ratio of test experiments
SDC_COUNT = 0
for index, row in FAULT_INJECTION_CAMPAIGN.iterrows():
    if float(row['diffnorm']) > THRESHOLD:
        SDC_COUNT += 1

print("The SDC ratio in this experiment is",SDC_COUNT/len(FAULT_INJECTION_CAMPAIGN))
print("The size of the experiment is",  len(TEST_EXPERIMENTS))
print("The size of the exhaust fault injection experiments", len(FAULT_INJECTION_CAMPAIGN))

The SDC ratio in this experiment is 0.6884949882075472
The size of the experiment is 27136
The size of the exhaust fault injection experiments 2713600


# Golden Fault Injection Boundary
Golden fault injection boundary use the exhaust fault inject campaign to build the boundary in practical sense. If the error value is above a specific threshold, then the location will not tolerance the error.

In [6]:
# In fault injection campaign analysis, the prediction outcome is positive and negative.
# during the prediction process, we ignore the crash case.
# For the prediction, we need to think about four different cases
# True negative, False positive
# False negative, True positive
def predict(boundary):
    positive_prediction = 0
    crash = 0
    negative_prediction = 0
    result = {}
    bits = []
    nan = float('nan')
    inf = float('inf')

    for index, row in FAULT_INJECTION_CAMPAIGN.iterrows():
        instruction_index = int(row["byte_num"].split("#")[1])
        inject_error = 0
        corrupt_value = float(row['corrupt_value'])
        init_value = float(row['init_value'])
        
        if init_value != 0 and corrupt_value != nan and corrupt_value != inf:
            inject_error = abs((corrupt_value - init_value)/init_value)
        
        diffnorm = float(row['diffnorm'])
        if inject_error < boundary[instruction_index]:
            #bits.append(row["bit"])
            if diffnorm < THRESHOLD:
                positive_prediction += 1
            elif math.isnan(diffnorm) or math.isinf(diffnorm):
                crash += 1
            else:
                negative_prediction += 1     
    
    result["positive"] = positive_prediction
    result["negative"] = negative_prediction
    result["crash"] = crash
    result["accuracy"] = positive_prediction/(positive_prediction + crash + negative_prediction)
    
    return (result, bits)

def getGoldenFaultToleranceBoundary():
    boundary = {}
    nan = float('nan')
    inf = float('inf')
    for index, row in FAULT_INJECTION_CAMPAIGN.iterrows():
        corrupt_value = float(row['corrupt_value'])
        init_value = float(row['init_value'])
        inject_error = 0
        norm = float(row["diffnorm"])
        
        try:
            instruction_index = int(row["byte_num"].split("#")[1])
        except:
            print(row)
        
        if init_value != 0 and corrupt_value != nan and corrupt_value != inf:
            inject_error = abs((corrupt_value - init_value)/init_value)
  
        if instruction_index not in boundary:
            boundary[instruction_index] = float("+inf")
        
        if norm > THRESHOLD:
            if boundary[instruction_index] > inject_error:
                boundary[instruction_index] = inject_error    
    return boundary

boundary = getGoldenFaultToleranceBoundary()
print(predict(boundary)[0])

{'positive': 802800, 'negative': 0, 'crash': 0, 'accuracy': 1.0}


# A Single Fault Injection Experiment

Understand how a single fault injection experiment can help to understand the result of the other fault injection experiment.

Randomly select 1000 fault injection experiments and test the prediction accuracy of each masked experiment.

In [7]:
#The number of fault injection experiments that a single fault injection can predict is masked.
def single_masked_prediction(fault_inject_run, golden_run, experiments, threshold = 0.001):
    
    # The experiment ends early
    if len(fault_inject_run) < len(golden_run):
        print("Bad fault injection experiment!")
        return False
    
    # Get the prediction boundary of a single fault injection experiment.
    boundary = np.abs(np.array(fault_inject_run.value[0:len(golden_run)], dtype="float") - np.array(golden_run.value, dtype="float"))    
    if np.isnan(boundary).any() or np.isinf(boundary).any():
        print("This experiment outcome is masked, but the propagation process contains nan or infinity event")
        return False
    
    # The number of tested experiment
    # print(len(experiments))
    
    positive_prediction = 0
    negative_prediction = 0
    SDC_count = 0
    crash = 0
    count = len(experiments)
  
    for index, row in experiments.iterrows():
        
        #if pd.isna(row["byte_num"]):
        #    continue
        
        #print(row['byte_num'], np.isnan(row["byte_num"]))
        instruction_index = int(row["byte_num"].split("#")[1])
        inject_error = abs(float(row["corrupt_value"]) - float(row["init_value"]))
        diffnorm = float(row['diffnorm'])
        
        if inject_error <= boundary[instruction_index]:
            if diffnorm < threshold:
                positive_prediction += 1
            elif math.isnan(diffnorm) or math.isinf(diffnorm):
                crash += 1
            else:
                negative_prediction += 1     
    
    if (positive_prediction + negative_prediction) is 0:
        return {"crash":crash/count, "positive": positive_prediction/count, "negative": negative_prediction/count, "accuracy":0,  "total":positive_prediction+negative_prediction}    
    else:
        return {"crash":crash/count, "positive": positive_prediction/count, "negative": negative_prediction/count, "total":positive_prediction+negative_prediction, "accuracy":positive_prediction/(positive_prediction + negative_prediction+crash)}  

#The number of fault injection experiment that the program can predict as SDC
def single_SDC_prediction(fault_inject_run, golden_run, experiments, threshold = 0.001):
    pass

In [8]:
# During the fault injection experiment, filter out the bad prediction case and left with the good prediction case.
GOOD_MASKED_RUN = []

In [9]:
diffnorms = np.array(FAULT_INJECTION_CAMPAIGN.diffnorm, dtype='float')
length = len(TEST_EXPERIMENTS)
list_indexs = []
pre_res = {}
index = 0
#Understand the pruning technique.
for _, row in TEST_EXPERIMENTS.iterrows():
    file_index = row["fileindex"]
    norm = float(row["diffnorm"])
    
    index += 1
    if index % (length/10) ==0:
        print(index/float(length), "experiment")
    
    #verify the select fault injection experiment is valuable
    if norm != 0 and not np.isinf(norm) and not math.isnan(norm) and norm < THRESHOLD :
        fault_inject_run = pd.read_csv(PROPAGATION_DATA_PATH+"/appstate_"+str(file_index)+".log",  sep=' ', names=['file', 'linenum', 'variable', 'value'])
        pre_res = single_masked_prediction(fault_inject_run, GOLDEN_RUN, FAULT_INJECTION_CAMPAIGN, THRESHOLD)
        
        #if the predictor reject the current experiment for prediction, continue to next experiment
        if not pre_res:
            continue
        
        if pre_res['accuracy'] >= 1 :
            GOOD_MASKED_RUN.append(file_index)
        
        list_indexs.append({"diffnorm":math.log10(norm), "index":index, "crash":pre_res["crash"], "negative":pre_res["negative"], "positive":pre_res["positive"], 'accuracy':pre_res['accuracy']})

FileNotFoundError: File b'../static/data/black-scholes//appstate_2002402.log' does not exist

In [None]:
import matplotlib.pyplot as plt
#plt.style.use('seaborn-whitegrid')
fig = plt.figure()
list_indexs = sorted(list_indexs, key = lambda i: i['diffnorm']) 
x = []
y = []
z = []
k = []
for i in range(len(list_indexs)):
    x.append(list_indexs[i]['diffnorm'])
    y.append(list_indexs[i]['positive'] + list_indexs[i]['negative'] + list_indexs[i]['crash'])
    z.append(list_indexs[i]['negative'])
    k.append(list_indexs[i]['accuracy'])

In [None]:
sns.set(style="ticks")
sns.axes_style('white')
df = pd.DataFrame(data = {"x":x, "y":y, "k":k,"z":z})

plt.scatter(x, y, c=k, cmap='viridis')
plt.xlabel('norm(log10)')
plt.ylabel('prediction rate')
#plt.clim(0,1)
plt.colorbar()
plt.show()

In [None]:
plt.subplot(2, 2, 1)
plt.hist(df.x)
plt.xlabel('norm')
plt.ylabel('number of experiments')

plt.subplot(2, 2, 2)
plt.hist(df.y)
plt.xlabel('positive prediction')

plt.subplot(2, 2, 3)
plt.hist(df.k)
plt.xlabel('prediction accuracy')

plt.subplot(2, 2, 4)
plt.hist(df.z)
plt.xlabel('negative prediction')

# We use a first 1000 experiment as a filter to filter out outlier propagation data

In [None]:
tracking_length = len(GOLDEN_RUN.value)
def construct_boundary(cases):
    boundary = []
    for i in range(tracking_length):
        boundary.append(0)

    for i in range(len(cases)):
        index = cases[i]
        #masked_run_path = "../static/data/cg/cg_in8/appstate_"+ str(index)+".log" 
        masked_run_path = PATH +"/appstate_"+ str(index) + ".log"
        #Check whether the tracking file is on the path
        #TODO: check whether can regenerate the data
        #print(masked_run_path)
        if not os.path.isfile(masked_run_path):
            print("file does not exist")
            continue

        masked_run = pd.read_csv(masked_run_path,  sep=" ", names=['file', 'linenum', 'variable', 'value'])
        masked_run_value = np.array(masked_run.value, dtype='float')
        
        #This is a werid information in the data.
        #TODO: may check why such outcome is shown
        if len(masked_run_value) < tracking_length:
            print("odd!")
            continue

        #array contain NAN, ignore the run
        if np.isnan(np.min(masked_run_value)):
            continue

        comparision_result = np.abs(masked_run_value[0:tracking_length] - GOLDEN_RUN.value)
        for j in range(tracking_length):
            if comparision_result[j] > boundary[j]:
                boundary[j] = comparision_result[j]
                
    return boundary

def predict(boundary):
    positive_prediction = 0
    crash = 0
    negative_prediction = 0
    result = {}
    bits = []

    for index, row in FAULT_INJECTION_TEST_CAMPAIGN.iterrows():
        instruction_index = int(row["DI"]) 
        
        inject_error = abs(row["out_xor"])
        diffnorm = float(row['diffnormr'])
        
        if inject_error <= boundary[instruction_index]:
            bits.append(row["bit"])
            if diffnorm < THRESHOLD:
                positive_prediction += 1
            elif math.isnan(diffnorm) or math.isinf(diffnorm):
                crash += 1
            else:
                negative_prediction += 1     
    
    result["positive"] = positive_prediction
    result["negative"] = negative_prediction
    result["crash"] = crash
    result["accuracy"] = positive_prediction/(positive_prediction + crash + negative_prediction)
    
    return (result, bits)

In [None]:
print(len(FAULT_INJECTION_TEST_CAMPAIGN))

In [None]:
boundary = construct_boundary(GOOD_MASKED_RUN)

In [None]:
res, bits = predict(boundary)
plt.hist(bits)
print(res)
#plt.hist(df.x)
#plt.xlabel('norm')
#plt.ylabel('number of experiments')

In [None]:
#plot the boundary
plt.plot(boundary)

# The Golden Boundary of the Fault Injection Experiment

1. A bit map base boundary
2. A value scale base boundary

In the following code, the construction of the golden boundary is based on the exhuast fault injection campaign.
The method of using boundary analysis is based on the assumption that for each fault injection location, there is 
a threshold such that the inject error exceed that value will result into SDC outcome.
 
 
Using the fault injection campaign to construct the boundary is also not perfect because each location has 64 sample to estimate the threshold value

In [None]:
#dataset = "fft"

In [None]:
path = ""
# Threshold that used to decide whether the final outcome is SDC or Masked.
threshold = 0.01

if dataset == "cg":
    path = "../static/data/cg_in8.csv"
    threshold = 0.07
elif dataset == "fft":
    path = "../static/data/fft_complete.csv"
    threshold = 0.001
elif dataset == "lu":
    path = "../static/data/lu_complete.csv"
    threshold = 0.0001

fault_injection_campaign = pd.read_csv(path)
#fault_injection_campaign
#File index, Function, Line, Variable, out_xor, out_xor_relative, diffnormr, outcome, iter, bit, DI

In [None]:
masked_golden_boundary = {}
sdc_golden_boundary = {}
x = []
y = []
z = []
count = 0
for index, row in fault_injection_campaign.iterrows():
    #print(index)
    inject_error = abs(float(row["out_xor"]))
    instruction_index = row["DI"]
    norm = row["diffnormr"]
    
    #log scale the inject error
    if inject_error > 1:
        inject_error = math.log10(inject_error)
    
    #if inject_error > 20:# and norm < threshold:
    #    inject_error = 20
    #init
    
    if instruction_index not in masked_golden_boundary:
        #print(instruction_index)
        masked_golden_boundary[instruction_index] = 0
        sdc_golden_boundary[instruction_index] = 10000#sys.float_info.max
        x.append(instruction_index)
    
    #if the outcome is SDC
    if norm > threshold:
        if sdc_golden_boundary[instruction_index] > inject_error:
            sdc_golden_boundary[instruction_index] = inject_error
            count += 1
    
    #if the outcome is masked
    elif norm < threshold:
        if masked_golden_boundary[instruction_index] < inject_error:
            masked_golden_boundary[instruction_index] = inject_error
    
# plot the golden boundary
for i in x:
    y.append(masked_golden_boundary[i])
    z.append(sdc_golden_boundary[i])

print(threshold)

#plt.plot(x, y, color='green')
#plt.plot(x, z, color='red')
#plt.ylabel("Negative Prediction")
#plt.xlabel("instruction")

In [None]:
start = 0
end = -1

x1 = np.array(x)[start:end]
y1 = np.array(y)[start:end]
z1 = np.array(z)[start:end]

print(threshold)

plt.plot(x1, y1, color='green')
#plt.plot(x1, z1, color='red')
plt.ylabel("boundary value")
plt.xlabel("instruction")

# The standard diviation boxplot.

1. Random sample one thousand experiments and draw the standard deviation plot 

In [None]:
if dataset == "cg":
    fault_injection_campaign = pd.read_csv('../static/data/cg_in8.csv')
    path = "../static/data/cg/cg_in8"
elif dataset == "fft":
    fault_injection_campaign = pd.read_csv('../static/data/fft20000.csv')
    path = "../static/data/fft/fft_complete"
elif dataset == "lu":
    fault_injection_campaign = pd.read_csv('../static/data/lu_complete.csv')
    path = "../static/data/lu/lu_20000"

fault_injection_campaign_sampleset = fault_injection_campaign.sample(n=500, replace=False)
masked_runs = []
expected_program_length = len(golden_run)
golden_run_value = np.array(golden_run['value'], dtype='float')
    
for index, row in fault_injection_campaign_sampleset.iterrows():
    #print(row['oun,tcome'])
    #print(row['File_index'])
    if row['outcome'] == 'Masked':
        fault_inject_run = pd.read_csv(path+"/appstate_"+str(row['File_index'])+".log",  sep=' ', names=['file', 'linenum', 'variable', 'value'])
        fault_inject_run_value = np.array(fault_inject_run['value'], dtype='float')
        if len(fault_inject_run) < expected_program_length:
            continue
        masked_runs.append(np.abs(fault_inject_run_value[0:expected_program_length] - golden_run_value))
    
    
masked_runs = np.array(masked_runs)

In [None]:
plt.plot(np.mean(masked_runs, axis=0)[0:300], color='green')
#plt.plot(np.std(masked_runs, axis=0)[150:350], color='orange')
#plt.plot(masked_runs[], color='black')

In [None]:
golden_run_path = "../static/data/cg/cg_in8/golden.log"
golden_run = pd.read_csv(golden_run_path,  sep=' ', names=['file', 'linenum', 'variable', 'value'], header=0)
golden_run_value = np.array(golden_run.value, dtype='float')
cg_fault_injection = pd.read_csv("../static/data/cg_in8.csv")
SIZE = len(cg_fault_injection)

# Experiment 1 -- CG
Take all the fault injections in the inital condition. Find all the fault injection cases that result into masked. For each masked case, compare error run with the golden run to get a error run curve. Combine all the error run curve to construct a error boundary for masked and SDC.

In [None]:
cg_fault_injection_experiment = pd.read_csv('matrix/in10_data/in10/injectlog.log',  sep=' ', names=['file', 'linenum', 'variable','byte_num', 'corrupted','init_value','to', 'corrupt_value', 'mask', 'byte','expo', 'ss', 'op', 'diffnorm', 'empty'])

### SDC Ratio over entire program

In [None]:
iters = list(cg_fault_injection.iter)
iters.reverse()
count  = 0
last_zero_iteration_index = SIZE - iters.index(0)
dynamic_step_to_record_computation_result = int(last_zero_iteration_index/64)

for i in range(last_zero_iteration_index, len(iters)):
    if cg_fault_injection.outcome[i] == "SDC":
        count += 1

print("SDC ratio over entire program: ", count/len(iters))

In [None]:
#only tracking the maximum value of the masked run after this time step.
tracking_length = len(golden_run_value)

def construct_boundary(number_of_dynamic_instruction):
    boundary = []
    ground_truth = {"Masked": 0, "SDC":0, "DUE":0}

    for i in range(tracking_length):
        boundary.append({'max':0, 'min':0})

    for i in range(number_of_dynamic_instruction):
        index = int(random.random() * SIZE)
        
        ground_truth[cg_fault_injection.outcome[index]] += 1
        
        if cg_fault_injection.outcome[index] == "Masked":
            masked_run_path = "cg_simulation/appstate_"+ str(index)+".log" 

            #Check whether the tracking file is on the path
            #TODO: check whether can regenerate the data
            if not os.path.isfile(masked_run_path):
                continue

            masked_run = pd.read_csv(masked_run_path,  sep=",", names=['file', 'linenum', 'variable', 'value'], header=0)
            masked_run_value = np.array(masked_run.value, dtype='float')
            
            #This is a werid information in the data.
            #TODO: may check why such outcome is shown
            if len(masked_run_value) < tracking_length:
                print("odd!")
                continue
            
            #array contain NAN, ignore the run
            if np.isnan(np.min(masked_run_value)):
                continue
            
            comparision_result = masked_run_value[0:tracking_length] - golden_run_value
        else:
            continue
        
        
        for j in range(dynamic_step_to_record_computation_result, tracking_length):
            if comparision_result[j] > boundary[j]['max'] and comparision_result[j] >= 0:
                boundary[j]['max'] = comparision_result[j]

            if comparision_result[j] < boundary[j]['min'] and comparision_result[j] < 0:
                boundary[j]['min'] = comparision_result[j]
                
    return [boundary, ground_truth]

def predict(boundary):
    masked_true_positive = 0
    masked_false_positive = 0
    sdc_true_positive = 0
    sdc_false_positive = 0
    result = {}

    for i in range(last_zero_iteration_index + 1, SIZE):
        index = math.floor(i/64)
        if cg_fault_injection.out_xor[i] < boundary[index]['max'] and cg_fault_injection.out_xor[i] > boundary[index]['min']:
            if cg_fault_injection.outcome[i] == "Masked":
                masked_true_positive += 1
            else:
                masked_false_positive += 1
        else:
            if cg_fault_injection.outcome[i] == "SDC":
                sdc_true_positive += 1
            else:
                sdc_false_positive += 1

    result["predict masked case"] =  (masked_true_positive + masked_false_positive)/SIZE
    result["predict SDC case"] = (sdc_true_positive + sdc_false_positive)/SIZE
    result["masked prediction accuracy"] = masked_true_positive/(masked_true_positive + masked_false_positive)
    result["SDC prediction accuracy"] = sdc_true_positive/(sdc_true_positive + sdc_false_positive)
    
    return result

## Can we think this as a lable propagation problem?

In [None]:
experiments = []

ground_truth = []

for i in range(10):
    print(i)
    boundary = construct_boundary(2000)    
    experiments.append(predict(boundary[0]))
    ground_truth.append(boundary[1])

    
uniform_test = []
intuition = []

for i in range(10):
    intuition.append(experiments[i]['predict SDC case'])
    uniform_test.append(ground_truth[i]['SDC']/2000)

print(np.mean(intuition))
print(intuition)
print(np.mean(uniform_test))

# Experiment 2 -- FFT

In [None]:
path = "../static/data/cg/cg_in8/"
golden_run_path = path+"golden.log"
golden_run = pd.read_csv(golden_run_path,  sep=' ', names=['file', 'linenum', 'variable', 'value'])
golden_run_value = np.array(golden_run.value, dtype='float')
fault_injection_experiment = pd.read_csv(path+'injectlog.log',  sep=' ', names=['file', 'linenum', 'variable','byte_num', 'corrupted','init_value','to', 'corrupt_value', 'mask', 'byte','expo', 'ss', 'op', 'diffnorm', 'empty'])
SIZE = len(fault_injection_experiment)
threshold = 0.001

In [None]:
def load_data(path):
    
    golden_run_path = path + "golden.log"
    golden_run = pd.read_csv(golden_run_path,  sep=' ', names=['file', 'linenum', 'variable', 'value'])
    golden_run_value = np.array(golden_run.value, dtype='float')
    fault_injection_experiment = pd.read_csv(path+"injectlog.log",  sep=' ', names=['file', 'linenum', 'variable','byte_num', 'corrupted','init_value','to', 'corrupt_value', 'mask', 'byte','expo', 'ss', 'op', 'diffnorm', 'empty'])
    
    return [golden_run,fault_injection_experiment]

def SDC_ratio(experiments): 
    sdc_count = 0
    for i in range(0, len(experiments)):
        if float(experiments.diffnorm[i]) > threshold:
            sdc_count += 1
    
    return sdc_count / len(experiments)

def getBoundary(golden_run, percent, experiments, path):
    boundary = []
    nums = int(len(experiments) * percent)
    
    print(nums)

    for i in range(len(golden_run)):
        boundary.append({"min": 0, "max":0})

    for i in range(nums):
        
        diffnorm = float(experiments.diffnorm[i])
        if diffnorm > threshold or math.isnan(diffnorm) or math.isinf(diffnorm):
            continue
        
        index = int(random.random() * len(experiments))

        file_path = path+"appstate_"+str(index)+".log"
    
        fault_inject_run = pd.read_csv(file_path,  sep=' ', names=["file", "linenum", "variable", "value"])
    
        if len(fault_inject_run) < len(golden_run):
            print("weird!")
            continue
    
        values = np.array(fault_inject_run.value[0:len(golden_run)], dtype="float") - np.array(golden_run.value, dtype="float")
    
        for j in range(len(golden_run)):              
            
            if values[j] >= 0 and values[j] > boundary[j]["max"]:
                boundary[j]["max"] = values[j]
                       
            if values[j] < 0 and values[j] < boundary[j]["min"]:
                boundary[j]["min"] = values[j] 
    
    return boundary

def prediction(experiments, boundary, threshold=0.001):
    positive_prediction = 0
    negative_prediction = 0
    crash = 0
    length = len(experiments)
    
    for i in experiments.fileindex:
        index = math.floor(i/64)
        
        #print(i)
        
        inject_error = float(experiments.corrupt_value[i]) - float(experiments.init_value[i])
    
        if(inject_error > boundary[index]["min"] and inject_error < boundary[index]["max"]):
            if float(experiments.diffnorm[i]) < threshold:
                positive_prediction += 1
            elif math.isnan(float(experiments.diffnorm[i])) or math.isinf(float(experiments.diffnorm[i])):
                crash += 1
            else:
                negative_prediction += 1
    return {"positive": positive_prediction/len(experiments), "negative": negative_prediction/len(experiments), "Crash": crash/len(experiments)}

# Predict Result of 20 x 20 Matrix

In [None]:
#golden_run, experiments = load_data(path)
#boundary = getBoundary(golden_run, 0.0001, experiments, path)
#experiments = pd.read_csv("../static/data/cg/cg_in8/injectlog.log",  sep=' ', names=['file', 'linenum', 'variable','byte_num', 'corrupted','init_value','to', 'corrupt_value', 'mask', 'byte','expo', 'ss', 'op', 'diffnorm', 'empty'])
#print(len(experiments))

# Using 6400 fault injections run in FFT to test the masked case. 

In [None]:
boundary = []

for i in range(len(golden_run)):
    boundary.append({"min": 0, "max":0})

for i in range(8400):
    
    if fault_injection_experiment.diffnorm[i] > 0.07:
        continue
    
    index = int(random.random() * len(fault_injection_experiment))

    file_path = "matrix/in27_data/in27/appstate_"+str(index)+".log"
    
    fault_inject_run = pd.read_csv(file_path,  sep=' ', names=["file", "linenum", "variable", "value"])
    
    if len(fault_inject_run) < len(golden_run):
        print("weird!")
        continue
    
    values = np.array(fault_inject_run.value[0:len(golden_run)], dtype="float") - np.array(golden_run.value, dtype="float")
    
    for j in range(len(golden_run)):
        if values[j] >= 0 and values[j] > boundary[j]["max"]:
            boundary[j]["max"] = values[j]
                       
        if values[j] < 0 and values[j] < boundary[j]["min"]:
            boundary[j]["min"] = values[j] 

In [None]:
positive_prediction = 0
negative_prediction = 0
for i in range(len(fault_injection_experiment.diffnorm)):
    
    index = math.floor(i/64)
    
    #if i%64 <= 52:
    #    continue
    
    inject_error = float(fault_injection_experiment.corrupt_value[i]) - float(fault_injection_experiment.init_value[i])
    
    if(inject_error > boundary[index]["min"] and inject_error < boundary[index]["max"]):
        if fault_injection_experiment.diffnorm[i] < 0.07:
            positive_prediction += 1
        else:
            negative_prediction += 1

In [None]:
#relative error and absolute error
#for i in range(len(golden_run_value)):
#   if golden_run_value[i] != 0:
#        absolute = abs(golden_run_value[i] -  masked_run_value[i])

false_positive = 0
true_positive = 0
unsure = 0
total = (811 - 160) * 64
for i in range(160, 811):
    diff = abs(golden_run_value[i] -  masked_run_value[i]) 
    for b in range(64):
        if diff > abs(cg_fault_injection.out_xor[i * 64 + b]) and cg_fault_injection.outcome[i * 64 + b] != "Masked":
            #print(cg_fault_injection.diffnormr[i * 64 + b], cg_fault_injection.Variable[i * 64 + b], (diff - abs(cg_fault_injection.out_xor[i * 64 + b])))
            false_positive += 1
            print(i * 64 + b)
        elif diff > abs(cg_fault_injection.out_xor[i * 64 + b]) and cg_fault_injection.outcome[i * 64 + b] == "Masked":
            true_positive += 1
        else:
            unsure += 1
    
    #sdc_diff = abs(golden_run_value[i] -  sdc_run_value[i])
    #for b in range(64):
    #    if sdc_diff < abs(cg_fault_injection.out_xor[i * 64 + b]) and cg_fault_injection.outcome[i * 64 + b] != "SDC":
    #        false_positive += 1
    #    elif sdc_diff < abs(cg_fault_injection.out_xor[i * 64 + b]) and cg_fault_injection.outcome[i * 64 + b] == "SDC":
    #        true_positive += 1
    #    else:
    #        unsure +=1
            
print(false_positive/total)
print(true_positive/total)
print(unsure/total)
    
#print(i, cg_fault_injection.Variable[i], cg_fault_injection.out_xor[i], cg_fault_injection.outcome[i])
#for i in range(243*64,244*64):
#    print(i,cg_fault_injection.Variable[i], cg_fault_injection.out_xor[i], cg_fault_injection.outcome[i])
#print(golden_run_value[243])
#print(len(cg_fault_injection))
#golden_run_value-error_run_value[0:811]

In [None]:
number_of_instructions = {}

propagation_path = []

for index in range(100):
    if index in[15731, 16246, 17523]:
        continue
    
    file_path = "cg_simulation/appstate_"+str(index)+".log"
    error_run = pd.read_csv(file_path,  sep=' ')
    
    
    #For different fault injection case, what is the number of different execution dynamic instructions
    if len(error_run) in number_of_instructions:
        number_of_instructions[len(error_run)] += 1
    else:
        number_of_instructions[len(error_run)] = 1
        
    #
    index = min(len(golden_run), len(error_run))
    #result = np.array(golden_run.value, dtype=float)[:index] - np.array(error_run.value, dtype=float)[:index]
    #propagation_elements = golden_run.line[np.where(result != 0)[0]]
    
    #path = "=>"
    #number_of_propagation_element = 5
    #for e in propagation_elements:
    #    if str(e) not in path:
    #        path += str(e)
    #        path += "=>"
    #        number_of_propagation_element -= 1
        
    #    if number_of_propagation_element == 0:
    #        break
    #if path not in propagation_path:
    #    propagation_path.append(path)

## What is the probability of the different number of storing dynamic instructions.
## What is the probability of SDC.

It's expected that if the program ends early, then there an unexpected crash during the program execution in the earlier stage. At the same time, if the program has more execution than it expected. It will have high chance causes silent data corruption.

In [None]:
cg_outcome = cg_fault_injection['outcome'].value_counts()

print(cg_outcome)
print(sum(cg_outcome))
#propagation_path

In [None]:
number_of_instructions

# If you want to rewrite your code.

4. interactive fault injection campaign.

### 1. A sensitive analysis across the whole program with a mapping framework.
A desity scatter plot to understand the input and output sensitivity of the program. User can selective choose the high sensitive data and mapping back to the original visualization.
    
    a. how many clusters in the plot.
    
    b. Where is each of them comes from. 
    
    c. The sample that comes from the fault injection in same location.

#### 2. Explore function level resiliency of the program. What's the difference compare to the source code level?

    a.Explore the resiliency of different program component. Aggregate the data in variable level


#### 3. How to measure the impact of one variable/function over the other?
 


# The number of times a function is called.

In [None]:
counter = {}

line_to_func = {167:'readA', 175:'readB', 33:'waxpby', 25:'matvec', 48:'dot_r2', 75:'solve_cg', 76:'solve_cg',87:'solve_cg',57:'dot', 90:'solve_cg',91:'solve_cg',40:"daxpby", 82:'daxpby',83:'daxpby',84:'daxpby'}

for index, row in golden_run.iterrows():
    key = line_to_func[row['line']]
    if key not in counter:
        counter[key] = 1
    else:
        counter[key] += 1

In [None]:
number_of_instruction = sum(list(counter.values()))
for item in counter.items():
    print(item[0], item[1]/number_of_instruction * 100)

# Error Propagation Analysis in one program component.

In [None]:
golden_run_path = "cg_simulation/golden.log"
golden_run = pd.read_csv(golden_run_path,  sep=' ', names=['file', 'linenum', 'variable', 'value'])
golden_run_value = np.array(golden_run.value)
##### this is a very interesting but werid case
##### There is a NAN occur during the computation, but the  error is masked at the end.
#index_range = [17341, 24381]

##### cg p_ap_dot first interation 64 experiment
#index_range = [15552, 15603]

#file_path = "cg_simulation1/appstate_"+str(17341)+".log"
#error_run = pd.read_csv(file_path,  sep=' ')

#for i in range(len(error_run.value)):
#    print(error_run.line[i], error_run.variable[i], error_run.value[i])
#print(len(error_run.value))

#print(error_run)
#data_set = {}


#golden_run_value = np.array(golden_run.value[0:800], dtype='float')

#for index in range(index_range[0], index_range[1]+1):
#    file_path = "cg_simulation/appstate_"+str(index)+".log"
#    error_run = pd.read_csv(file_path,  sep=' ', names=['file', 'linenum', 'variable', 'value'])

#    data_set[index] = golden_run_value - np.array(error_run.value[0:800], dtype='float')


##### a specific case line 75 case 10290
##### a specific case line 75 case 10296
indexs =  [10290, 10274]# 10288] #10297]# 10296, 10289]

delta_x = [65536, 1.0]# 1321922331132047.5]#, -307779.3308780107]
x_data = range(0, 811, 1)

for index,item in enumerate(indexs): 
    file_path = "cg_simulation/appstate_"+str(item)+".log"
    error_run = pd.read_csv(file_path,  sep=' ', names=['file', 'linenum', 'variable', 'value'])
    error_run_value = np.array(error_run.value)

    sensitivity_val = (error_run_value[0:811] - golden_run_value[0:811])/delta_x[index]
    sns.lineplot(x=x_data, y=sensitivity_val, sort=False, lw=1)
    
print(sensitivity_val.argmax())

In [None]:
#for index in range(index_range[0], index_range[1]+1):
#    print(len(data_set[index].values))
#data_set[index] = golden_run.values - error_run.values   


x_data = range(0,811,1)
sns.lineplot(x=x_data, y=sensitivity_val, sort=False, lw=1)

#for index in range(index_range[0], index_range[0]+20): 
#    sns.lineplot(x=x_data, y=data_set[index], sort=False, lw=1)

## What role the truncation error is playing in the propagation analysis
## What's the threshold of different program components.
## How much error is masked by the truncate error and How much error is masked by the model feature