In [2]:
import pandas as pd

In [3]:
files = [
    "data/swin_base_patch4_window7_224_imagenet_fp32_WindowAttention_1.00e-03.csv", 
    "data/swin_base_patch4_window7_224_imagenet_fp32_SwinTransformerBlock_1.00e-03.csv",
    "data/swin_base_patch4_window7_224_imagenet_fp32_Mlp_1.00e-03.csv",
    "data/vit_base_patch16_224_imagenet_fp32_Attention_1.00e-03.csv", 
    "data/vit_base_patch16_224_imagenet_fp32_Block_1.00e-03.csv",
    "data/vit_base_patch16_224_imagenet_fp32_Mlp_1.00e-03.csv",
]

In [4]:
for file in files:
    print("="*50)
    if "SwinTransformerBlock" in file:
        print(f" [INFO] SwinTransformerBlock")
    elif "Mlp" in file:
        print(f" [INFO] Mlp")
    elif "WindowAttention" in file:
        print(f" [INFO] WindowAttention")
    else:
        print(f" [INFO] Unknown")
    
    df = pd.read_csv(file)

    accuracy_wo_fault = (df["prediction_without_fault"] == df["ground_truth"]).sum() / len(df)
    accuracy_with_fault = (df["prediction_with_fault"] == df["ground_truth"]).sum() / len(df)

    print(f" [INFO] Accuracy without fault: {accuracy_wo_fault * 100:.4f}%")
    print(f" [INFO] Accuracy with fault: {accuracy_with_fault * 100:.4f}%")
    print()
    count = 0
    for i, row in df.iterrows():
        out_wo_fault, out_with_fault = int(row["prediction_without_fault"]), int(row["prediction_with_fault"])
        if out_with_fault != out_wo_fault:
            count += 1
            print(f" [INFO] Critical error for image {i}. Expected {out_wo_fault} but got {out_with_fault}. Ground truth: {int(row['ground_truth'])}")

    print(f" [INFO] Total critical errors: {count}")

 [INFO] WindowAttention


FileNotFoundError: [Errno 2] No such file or directory: 'data/swin_base_patch4_window7_224_imagenet_fp32_WindowAttention_1.00e-03.csv'

In [5]:
dataset = "imagenet"
precision = "fp32"
models = [
    "swin_base_patch4_window7_224",
    "vit_base_patch16_224",
]

for model in models:
    file = f"data/{model}_{dataset}_{precision}_correct_predictions.csv"
    if model == "swin_base_patch4_window7_224":
        to_open = "data/swin_base_patch4_window7_224_imagenet_fp32_WindowAttention_1.00e-03.csv"
    elif model == "vit_base_patch16_224":
        top_open = "data/vit_base_patch16_224_imagenet_fp32_Attention_1.00e-03.csv"

    df = pd.read_csv(to_open)
    df = df[df["prediction_without_fault"] == df["ground_truth"]]
    df.to_csv(file, index=True)

FileNotFoundError: [Errno 2] No such file or directory: 'data/swin_base_patch4_window7_224_imagenet_fp32_WindowAttention_1.00e-03.csv'

In [6]:
import torch
import os

INDICES_IDX, VALUES_IDX = 0, 1

dataset = "imagenet"
precision = "fp32"
models = [
    "swin_base_patch4_window7_224",
    "vit_base_patch16_224",
]

final_dfs = []
for model in models:
    dfs = []
    logits = [f for f in os.listdir(f"data/top5prob") if f"{model}_{dataset}_{precision}" in f]
    
    # logit = next(iter(logits))
    # logit = torch.load(f"data/top5prob/{logit}")
    # print(logit.values[0])
    for logit_path in logits:
        logit = torch.load(f"data/top5prob/{logit_path}")
        dfs.append({
            "model": model,
            "dataset": dataset,
            "precision": precision,
            "top1_prob": logit[VALUES_IDX][0].item(),
            "top2_prob": logit[VALUES_IDX][1].item(),
            "top_diff": logit[VALUES_IDX][0].item() - logit[VALUES_IDX][1].item(),
            "top1_class": int(logit[INDICES_IDX][0].item()),
            "top2_class": int(logit[INDICES_IDX][1].item()),
        })

    df = pd.DataFrame(dfs)
    final_dfs.append((model, df))
    df.to_csv(f"data/{model}_{dataset}_{precision}_top5_prob.csv", index=False)



  logit = torch.load(f"data/top5prob/{logit_path}")


In [55]:
for model, df in final_dfs:
    print("="*50)
    print(f"[INFO] {model}")
    # print(f" Min diff: {df['top_diff'].min():.4f}")
    # print(f" Max diff: {df['top_diff'].max():.4f}")
    # print(f" Mean diff: {df['top_diff'].mean():.4f}")
    # print(f" Median diff: {df['top_diff'].median():.4f}")
    # print(f" Q25% diff: {df['top_diff'].quantile(0.25):.4f}")
    # print(f" Q75% diff: {df['top_diff'].quantile(0.75):.4f}")
    # print(f" Q5% diff: {df['top_diff'].quantile(0.05):.4f}")
    # print(f" Q10% diff: {df['top_diff'].quantile(0.10):.4f}")
    # print(f" diff < 0.01: {(df['top_diff'] > 0.01).sum()}")
    print(f" top1_prob < 10%: {(df['top1_prob'] < 0.1).sum()}")
    print(f" top1_prob < 20%: {(df['top1_prob'] < 0.2).sum()}")
    print(f" top1_prob < 30%: {(df['top1_prob'] < 0.3).sum()}")
    print(f" top1_prob < 40%: {(df['top1_prob'] < 0.4).sum()}")
    print(f" top1_prob < 50%: {(df['top1_prob'] < 0.5).sum()}")


[INFO] swin_base_patch4_window7_224
 top1_prob < 10%: 6
 top1_prob < 20%: 128
 top1_prob < 30%: 443
 top1_prob < 40%: 1257
 top1_prob < 50%: 2671
[INFO] vit_base_patch16_224
 top1_prob < 10%: 7
 top1_prob < 20%: 88
 top1_prob < 30%: 416
 top1_prob < 40%: 1164
 top1_prob < 50%: 2671


In [61]:
# get the top5 prob of the crirical images

model_names = {
    "vit_base_patch16_224": "ViT-B224",
    "swin_base_patch4_window7_224": "Swin-B-W7-224"
}

def get_name(x):
    return model_names[x]

critical_images_df = pd.read_csv("data/fi_critical_images.csv")

crit_prob_dfs = []

for i, row in critical_images_df.iterrows():
    model = row["model"]
    image_id = row["image_id"]
    file = f"data/top5prob/{model}_imagenet_fp32_top5prob_{image_id}.pt"
    logit = torch.load(file)
    print(f" [INFO] Image {image_id} for model {model}")
    print(f" [INFO] Top1 prob: {logit[VALUES_IDX][0].item():.4f}")
    print(f" [INFO] Top2 prob: {logit[VALUES_IDX][1].item():.4f}")
    crit_prob_dfs.append({
        "model": model,
        "image_id": image_id,
        "top1_prob": logit[VALUES_IDX][0].item(),
        "top2_prob": logit[VALUES_IDX][1].item(),
        "top_diff": logit[VALUES_IDX][0].item() - logit[VALUES_IDX][1].item(),
        "top1_class": int(logit[INDICES_IDX][0].item()),
        "top2_class": int(logit[INDICES_IDX][1].item()),
    })

crit_prob_df = pd.DataFrame(crit_prob_dfs)
crit_prob_df["model"] = crit_prob_df["model"].apply(get_name)
crit_prob_df.to_csv("data/critical_images_top5_prob.csv", index=False)
crit_prob_df.to_excel("data/critical_images_top5_prob.xlsx", index=False)


 [INFO] Image 7729 for model swin_base_patch4_window7_224
 [INFO] Top1 prob: 0.2539
 [INFO] Top2 prob: 0.2530
 [INFO] Image 20727 for model swin_base_patch4_window7_224
 [INFO] Top1 prob: 0.4278
 [INFO] Top2 prob: 0.4188
 [INFO] Image 21088 for model swin_base_patch4_window7_224
 [INFO] Top1 prob: 0.2401
 [INFO] Top2 prob: 0.2392
 [INFO] Image 30515 for model swin_base_patch4_window7_224
 [INFO] Top1 prob: 0.3462
 [INFO] Top2 prob: 0.3445
 [INFO] Image 3249 for model swin_base_patch4_window7_224
 [INFO] Top1 prob: 0.2475
 [INFO] Top2 prob: 0.2461
 [INFO] Image 5529 for model swin_base_patch4_window7_224
 [INFO] Top1 prob: 0.3448
 [INFO] Top2 prob: 0.3421
 [INFO] Image 7729 for model swin_base_patch4_window7_224
 [INFO] Top1 prob: 0.2539
 [INFO] Top2 prob: 0.2530
 [INFO] Image 19373 for model swin_base_patch4_window7_224
 [INFO] Top1 prob: 0.1827
 [INFO] Top2 prob: 0.1822
 [INFO] Image 23333 for model swin_base_patch4_window7_224
 [INFO] Top1 prob: 0.3918
 [INFO] Top2 prob: 0.3894
 [INF

  logit = torch.load(file)


In [8]:
for model, df in final_dfs:
    print("="*50)
    df_test = df.copy()
    df_test = df_test[(df_test["top1_prob"] < 0.2) & (df_test["top_diff"] <= 0.02)]
    for i, row in df_test.iterrows():
        print(f" [INFO] {model} - {row['top1_prob']:.4f} - {row['top2_prob']:.4f} - {row['top_diff']:.4f}")
    print(f" [INFO] Total: {len(df_test)}")

 [INFO] swin_base_patch4_window7_224 - 0.1061 - 0.0954 - 0.0107
 [INFO] swin_base_patch4_window7_224 - 0.0954 - 0.0816 - 0.0138
 [INFO] swin_base_patch4_window7_224 - 0.1340 - 0.1225 - 0.0115
 [INFO] swin_base_patch4_window7_224 - 0.1827 - 0.1822 - 0.0005
 [INFO] swin_base_patch4_window7_224 - 0.1675 - 0.1513 - 0.0162
 [INFO] swin_base_patch4_window7_224 - 0.1272 - 0.1131 - 0.0141
 [INFO] swin_base_patch4_window7_224 - 0.1779 - 0.1666 - 0.0113
 [INFO] swin_base_patch4_window7_224 - 0.1823 - 0.1652 - 0.0171
 [INFO] swin_base_patch4_window7_224 - 0.0703 - 0.0642 - 0.0061
 [INFO] swin_base_patch4_window7_224 - 0.1678 - 0.1577 - 0.0102
 [INFO] swin_base_patch4_window7_224 - 0.1482 - 0.1389 - 0.0093
 [INFO] swin_base_patch4_window7_224 - 0.0939 - 0.0839 - 0.0101
 [INFO] swin_base_patch4_window7_224 - 0.1093 - 0.1057 - 0.0036
 [INFO] swin_base_patch4_window7_224 - 0.1703 - 0.1648 - 0.0056
 [INFO] swin_base_patch4_window7_224 - 0.1364 - 0.1237 - 0.0127
 [INFO] swin_base_patch4_window7_224 - 0

In [None]:
machines = [
    "titanv",
    "a20001",
]

files = [
    "swin_base_patch4_window7_224_imagenet_fp32_WindowAttention_1.00e-03.csv", 
    "swin_base_patch4_window7_224_imagenet_fp32_SwinTransformerBlock_1.00e-03.csv",
    "swin_base_patch4_window7_224_imagenet_fp32_Mlp_1.00e-03.csv",
    "vit_base_patch16_224_imagenet_fp32_Attention_1.00e-03.csv", 
    "vit_base_patch16_224_imagenet_fp32_Block_1.00e-03.csv",
    "vit_base_patch16_224_imagenet_fp32_Mlp_1.00e-03.csv",
]

df_fi = []

for machine in machines:
    full_machine = f"carol{machine}"
    print("="*50)
    print(f" [INFO] {machine}")
    for file in files:
        path = f"data/vit_fi_res/{full_machine}/{file}"
        df = pd.read_csv(path)

        split = file.split("_imagenet_")
        model = split[0]
        microop = split[1].split("_")[1]

        print("-"*20)
        print
        if "SwinTransformerBlock" in file:
            print(f" [INFO] SwinTransformerBlock")
        elif "Mlp" in file:
            print(f" [INFO] Mlp")
        elif "WindowAttention" in file:
            print(f" [INFO] WindowAttention")
        elif "Attention" in file:
            print(f" [INFO] Attention")
        elif "Block" in file:
            print(f" [INFO] Block")
        else:
            print(f" [INFO] Unknown")

        accuracy_wo_fault = (df["prediction_without_fault"] == df["ground_truth"]).sum() / len(df)
        accuracy_with_fault = (df["prediction_with_fault"] == df["ground_truth"]).sum() / len(df)

        print(f" [INFO] Accuracy without fault: {accuracy_wo_fault * 100:.4f}%")
        print(f" [INFO] Accuracy with fault: {accuracy_with_fault * 100:.4f}%")
        print()
        count = 0
        for i, row in df.iterrows():
            out_wo_fault, out_with_fault = int(row["prediction_without_fault"]), int(row["prediction_with_fault"])
            if out_with_fault != out_wo_fault:
                count += 1
                print(f" [INFO] Critical error for image {i}. Expected {out_wo_fault} but got {out_with_fault}. Ground truth: {int(row['ground_truth'])}")
                df_fi.append({
                    "model": model,
                    "microop": microop,
                    "machine": machine,
                    "image_id": i,
                    "prediction_without_fault": out_wo_fault,
                    "prediction_with_fault": out_with_fault,
                })

        print(f" [INFO] Total critical errors: {count}")

df_fi = pd.DataFrame(df_fi)
df_fi.to_csv("data/fi_critical_errors.csv", index=False)
        

    

 [INFO] titanv
--------------------
 [INFO] WindowAttention
 [INFO] Accuracy without fault: 100.0000%
 [INFO] Accuracy with fault: 99.9906%

 [INFO] Critical error for image 7729. Expected 173 but got 253. Ground truth: 173
 [INFO] Critical error for image 20727. Expected 475 but got 15. Ground truth: 475
 [INFO] Critical error for image 21088. Expected 485 but got 482. Ground truth: 485
 [INFO] Critical error for image 30515. Expected 710 but got 767. Ground truth: 710
 [INFO] Total critical errors: 4
--------------------
 [INFO] SwinTransformerBlock
 [INFO] Accuracy without fault: 100.0000%
 [INFO] Accuracy with fault: 99.9789%

 [INFO] Critical error for image 3249. Expected 74 but got 73. Ground truth: 74
 [INFO] Critical error for image 5529. Expected 124 but got 123. Ground truth: 124
 [INFO] Critical error for image 7729. Expected 173 but got 253. Ground truth: 173
 [INFO] Critical error for image 19373. Expected 443 but got 618. Ground truth: 443
 [INFO] Critical error for imag

In [49]:
crit_faults = pd.read_csv("data/fi_critical_errors.csv")
df_carola20001 = crit_faults[crit_faults["machine"] == "a20001"].reset_index(drop=True)
df_caroltitanv = crit_faults[crit_faults["machine"] == "titanv"].reset_index(drop=True)
df_carola20001 = df_carola20001[["model", "microop", "image_id", "prediction_without_fault", "prediction_with_fault"]]
df_caroltitanv = df_caroltitanv[["model", "microop", "image_id", "prediction_without_fault", "prediction_with_fault"]]

if pd.DataFrame.equals(df_carola20001, df_caroltitanv):
    print("[INFO] Dataframes are equal")
    crit_faults = df_carola20001

# crit_faults = crit_faults.groupby(["model", 'image_id']).agg({
#     "microop": "count",
# })
crit_faults.to_csv("data/fi_critical_images.csv", index=False)
# crit_faults.to_excel("data/fi_critical_errors_grouped.xlsx")

[INFO] Dataframes are equal


In [46]:
test = crit_faults.groupby(["model", "microop"]).agg({
    "image_id": "count",
})
test = test.rename(columns={"image_id": "count"})
test.to_excel("data/crit_count_per_model_microop.xlsx")
test

Unnamed: 0_level_0,Unnamed: 1_level_0,count
model,microop,Unnamed: 2_level_1
swin_base_patch4_window7_224,Mlp,6
swin_base_patch4_window7_224,SwinTransformerBlock,9
swin_base_patch4_window7_224,WindowAttention,4
vit_base_patch16_224,Attention,11
vit_base_patch16_224,Block,8
vit_base_patch16_224,Mlp,8


In [None]:
model_names = {
    "vit_base_patch16_224": "ViT-B224",
    "swin_base_patch4_window7_224": "Swin-B-W7-224"
}

def concat(x):
    return " - ".join(sorted(set(x)))

def get_name(x):
    return model_names[x]

test_unique = crit_faults.groupby(["image_id"]).agg({
    "image_id": "count",
    "model": concat,
    "microop": concat,
})
test_unique = test_unique.rename(columns={"image_id": "count"}).reset_index()
test_unique["model"] = test_unique["model"].apply(get_name)
test_unique.to_csv("data/fi_critical_images_unique.csv", index=False)
test_unique.to_excel("data/fi_critical_images_unique.xlsx", index=False)
# test_unique

Unnamed: 0,image_id,count,model,microop
0,2650,1,ViT-B224,Attention
1,2917,1,Swin-B-W7-224,Mlp
2,2978,1,ViT-B224,Mlp
3,3249,1,Swin-B-W7-224,SwinTransformerBlock
4,5529,1,Swin-B-W7-224,SwinTransformerBlock
5,6645,2,ViT-B224,Attention - Block
6,6787,1,ViT-B224,Mlp
7,7729,2,Swin-B-W7-224,SwinTransformerBlock - WindowAttention
8,9838,1,Swin-B-W7-224,Mlp
9,12912,3,ViT-B224,Attention - Block - Mlp


In [40]:
import re 
import numpy as np
path = "data/relative_err_saves/"
files = [f for f in os.listdir(path) if ".pt" in f]

crit_imgs = pd.read_csv("data/fi_critical_images.csv")

dfs = []

for file in files:
    data = file.split(".")[0].split("-")
    model = data[1]
    microop = data[2]
    batch = data[3]
    batch_size = data[4]
    batch = int(re.search(r'batch(\d+)', file).group(1))
    batch_size = int(re.search(r'batchsize(\d+)', file).group(1))
    a = batch_size * batch
    b = batch_size * (batch + 1)

    good_imgs = crit_imgs[
        (crit_imgs["model"] == model) 
        & (crit_imgs["microop"] == microop)
        & (crit_imgs["image_id"] >= a)
        & (crit_imgs["image_id"] < b)
    ]

    output = torch.load(os.path.join(path, file), map_location="cpu")
    fault_free, faulty = output[:batch_size, :, :], output[batch_size:, :, :]

    for i, row in good_imgs.iterrows():
        image_id = int(row['image_id'])
        image_id_in_batch = image_id % batch_size
        diff = fault_free[image_id_in_batch] - faulty[image_id_in_batch]
        diff = diff.numpy()
        non_zero_diff = diff[diff != 0]
        average_diff = np.mean(non_zero_diff)
        abs_mean = np.mean(np.abs(non_zero_diff))
        median = np.median(non_zero_diff)
        q1 = np.percentile(non_zero_diff, 0.25)
        q3 = np.percentile(non_zero_diff, 0.75)
        altered_percentage = (non_zero_diff.size / diff.size) * 100
        df = {
            "model": model,
            "microop": microop,
            "batch_id": batch,
            "image_id_in_batch": image_id_in_batch,
            "image_id_in_dataset": image_id,
            "batch_size": batch_size,
            "mean": average_diff,
            "mean of absolute diff": abs_mean,
            "median": median,
            "Q1": q1,
            "Q3": q3,
            "altered%": altered_percentage,
            "min": non_zero_diff.min(),
            "max": non_zero_diff.max(),
        }
        dfs.append(df)

err_magnitude = pd.DataFrame(dfs)
err_magnitude.to_csv("data/crirtical_error_magnitude.csv")
err_magnitude

  output = torch.load(os.path.join(path, file), map_location="cpu")


Unnamed: 0,model,microop,batch_id,image_id_in_batch,image_id_in_dataset,batch_size,mean,mean of absolute diff,median,Q1,Q3,altered%,min,max
0,vit_base_patch16_224,Block,1128,23,36119,32,-4.293204e-05,0.010621,1.300126e-05,-0.111616,-0.076241,86.022102,-1.739353,1.664471
1,vit_base_patch16_224,Attention,1128,23,36119,32,7.747526e-05,0.00583,4.979316e-06,-0.063705,-0.040907,68.179595,-2.107471,2.157242
2,vit_base_patch16_224,Mlp,651,2,20834,32,-3.476413e-06,0.005438,3.615132e-06,-0.056145,-0.039217,58.876639,-0.286009,0.278805
3,swin_base_patch4_window7_224,WindowAttention,953,19,30515,32,7.531619e-05,0.007511,6.922521e-06,-0.07623,-0.051482,66.208945,-1.938377,3.179081
4,vit_base_patch16_224,Mlp,564,3,18051,32,3.446592e-05,0.005487,7.077004e-06,-0.055613,-0.039136,58.909687,-0.158668,0.168374
5,swin_base_patch4_window7_224,SwinTransformerBlock,729,5,23333,32,1.350652e-05,0.019925,2.642907e-05,-0.236534,-0.155794,85.427296,-4.453262,7.403
6,vit_base_patch16_224,Block,403,16,12912,32,4.198725e-05,0.009374,1.717405e-05,-0.09814,-0.0662,85.819189,-1.606651,1.712128
7,swin_base_patch4_window7_224,Mlp,307,14,9838,32,-0.0007065118,0.015427,-1.149543e-05,-0.224257,-0.133411,73.335858,-6.11108,2.652569
8,vit_base_patch16_224,Attention,1048,31,33567,32,-1.061726e-05,0.006249,-1.381151e-06,-0.063815,-0.044449,68.25891,-0.223935,1.001724
9,swin_base_patch4_window7_224,WindowAttention,647,23,20727,32,-3.216248e-05,0.00872,-8.642673e-07,-0.088035,-0.061422,65.882095,-1.527234,1.140491
