In [18]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import numpy as np
import os
import pandas as pd

In [2]:
datapath = "data/"

In [3]:
def get_normalization_params(dataset_name, datapath):
    if dataset_name == "cifar10":
        # stds are different in paper wtf
        train_dataset = datasets.CIFAR10(os.path.join(datapath, dataset_name), train=True, transform=transforms.ToTensor(), download=True)
    elif dataset_name == "cifar100":
        pass
    elif dataset_name == "clothing1m":
        pass
    else:
        raise Exception
    
    means = train_dataset.data.mean(axis=(0,1,2)) / 255.0
    stds = train_dataset.data.std(axis=(0,1,2)) / 255.0
    
    return means, stds

In [4]:
def get_transforms(dataset_name, **kwargs):
    means, stds = kwargs["means"], kwargs["stds"]
    
    if dataset_name == "cifar10":
        train_transform = transforms.Compose([transforms.RandomCrop(32, padding=4), 
                                              transforms.RandomHorizontalFlip(),
                                              transforms.ToTensor(),
                                              transforms.Normalize(means, stds)])

        test_transform = transforms.Compose([transforms.ToTensor(), 
                                             transforms.Normalize(means, stds)])
    elif dataset_name == "cifar100":
        pass
    
    elif dataset_name == "clothing1m":
        pass
    
    else:
        raise Exception
    
    return train_transform, test_transform

In [5]:
def get_splits(dataset_name, datapath, **kwargs):
    train_transform, test_transform = kwargs["train_transform"], kwargs["test_transform"]
    
    if dataset_name == "cifar10":
        train_dataset = datasets.CIFAR10(os.path.join(datapath, dataset_name), train=True, transform=train_transform, download=True)
        test_dataset = datasets.CIFAR10(os.path.join(datapath, dataset_name), train=False, transform=test_transform)
    
    elif dataset_name == "cifar100":
        pass
    
    elif dataset_name == "clothing1m":
        pass
    
    else:
        raise Exception
    
    return train_dataset, test_dataset

In [6]:
def get_datasets(dataset_name, datapath):
    means, stds = get_normalization_params(dataset_name, datapath)
    
    transform_params = {"means": means, "stds": stds}
    train_transform, test_transform = get_transforms(dataset_name, **transform_params)
    
    transforms = {"train_transform": train_transform, "test_transform": test_transform}
    train_dataset, test_dataset = get_splits(dataset_name, datapath, **transforms)
    
    return train_dataset, test_dataset

In [7]:
# https://pytorch.org/docs/stable/notes/randomness.html

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    numpy.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)

def get_datta_loaders():
    pass

In [8]:
dataset_name = "cifar10"
train_dataset, test_dataset = get_datasets(dataset_name, datapath)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
print(train_dataset)
print(test_dataset)

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: data/cifar10
    Split: Train
    StandardTransform
Transform: Compose(
               RandomCrop(size=(32, 32), padding=4)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=[0.49139968 0.48215841 0.44653091], std=[0.24703223 0.24348513 0.26158784])
           )
Dataset CIFAR10
    Number of datapoints: 10000
    Root location: data/cifar10
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=[0.49139968 0.48215841 0.44653091], std=[0.24703223 0.24348513 0.26158784])
           )


In [39]:
type(train_dataset.data)

numpy.ndarray

In [40]:
np.save("test.npy", train_dataset.data)

In [41]:
train_data_2 = np.load("test.npy")

In [568]:
np.all(train_dataset.data == train_data_2)

True

In [201]:
def assert_dataset_name_and_noise_mode_combos(dataset_name, noise_mode):
    cifar10_sym_condition = dataset_name == "cifar10" and noise_mode == "sym"
    cifar10_asym_condition = dataset_name == "cifar10" and noise_mode == "asym"
    cifar100_sym_condition = dataset_name == "cifar100" and noise_mode == "sym"
    cifar100_asym_condition = dataset_name == "cifar100" and noise_mode == "asym"
    cifar10_dependent_condition = dataset_name == "cifar10" and noise_mode == "dependent"
    cifar100_dependent_condition = dataset_name == "cifar100" and noise_mode == "dependent"
    cifar10_openset_condition = dataset_name == "cifar10" and noise_mode == "openset"
    
    assert (
        cifar10_sym_condition or cifar10_asym_condition or \
        cifar100_sym_condition or cifar100_asym_condition or \
        cifar10_dependent_condition or cifar100_dependent_condition or \
        cifar10_openset_condition
    )
    

def make_inherent_label_noise(datapath, dataset_name, noise_mode, seed=None):
    # check combos
    assert_dataset_name_and_noise_mode_combos(dataset_name=dataset_name, noise_mode=noise_mode)
    
    if noise_mode in ["sym", "asym"]:
        make_inherent_label_noise_sym_asym(datapath=datapath, dataset_name=dataset_name, noise_mode=noise_mode, seed=seed)
    elif noise_mode == "openset":
        make_inherent_label_noise_openset(datapath=datapath, dataset_name=dataset_name, noise_mode=noise_mode, seed=seed)
    elif noise_mode == "dependent":
        make_inherent_label_noise_dependent(datapath=datapath, dataset_name=dataset_name, noise_mode=noise_mode, seed=None)
    else:
        raise Exception
    
def make_inherent_label_noise_openset(datapath, dataset_name, noise_mode, seed=None):
    if dataset_name == "cifar10":
        train_dataset = datasets.CIFAR10(os.path.join(datapath, dataset_name), train=True, transform=transforms.ToTensor(), download=True)
    else:
        raise Exception
    
    p = 0.4
    noise_rules = make_openset_noise_rules(dataset_name=dataset_name, p=p)
    # only one noise rule for now
    noise_rule = noise_rules[0]
    
    dirty_indicator_indices_per_rule = \
        make_openset_noise(train_dataset=train_dataset, src=noise_rule["src"], dst=noise_rule["dst"], p=noise_rule["p"], seed=seed)
    #noisy_targets[indices_per_rule] = noisy_targets_per_rule[indices_per_rule]
    
    labels_noisy = np.array([False for i in range(len(train_dataset))])
    labels_noisy[dirty_indicator_indices_per_rule] = True

    labels_df = pd.DataFrame(data={"label": train_dataset.targets, "label_noisy": labels_noisy})
    labels_csv_path = f"data/{dataset_name}/label_noisy/openset{p:.1f}_custom.csv" 
    labels_df.to_csv(labels_csv_path, index=False)
    print(f"{labels_csv_path} generated\n")
    
    noise_rules_df = pd.DataFrame(data=noise_rules)
    noise_rules_csv_path = f"data/{dataset_name}/label_noisy/openset{p:.1f}_custom_noise_rules.csv"
    noise_rules_df.to_csv(noise_rules_csv_path)
    print(f"{noise_rules_csv_path} generated\n")

def make_inherent_label_noise_dependent(datapath, dataset_name, noise_mode, seed=None):
    raise NotImplementedError
    
def make_inherent_label_noise_sym_asym(datapath, dataset_name, noise_mode, seed=None):   
    if dataset_name == "cifar10":
        train_dataset = datasets.CIFAR10(os.path.join(datapath, dataset_name), train=True, transform=transforms.ToTensor(), download=True)
    elif dataset_name == "cifar100":
        train_dataset = datasets.CIFAR100(os.path.join(datapath, dataset_name), train=True, transform=transforms.ToTensor(), download=True)
    else:
        raise Exception
    
    if noise_mode == "sym":
        p = 0.4
        noise_rules = make_sym_noise_rules(dataset_name=dataset_name, train_dataset=train_dataset, p=p)
    elif noise_mode == "asym":
        p = 0.4
        noise_rules = make_asym_noise_rules(dataset_name=dataset_name, train_dataset=train_dataset, p=p)
    else:
        raise Exception
    
    targets = torch.tensor(train_dataset.targets)
    noisy_targets = targets.detach().clone()

    for noise_rule in noise_rules:
        indices_per_rule, dirty_indicator_indices_per_rule, noisy_targets_per_rule = \
            make_sym_asym_noise(train_dataset=train_dataset, src=noise_rule["src"], dsts=noise_rule["dsts"], p=noise_rule["p"], seed=seed)
        noisy_targets[indices_per_rule] = noisy_targets_per_rule[indices_per_rule]

    labels_df = pd.DataFrame(data={"label": targets.numpy(), "label_noisy": noisy_targets.numpy()})
    labels_csv_path = f"data/{dataset_name}/label_noisy/{noise_mode}{p:.1f}_custom.csv"
    labels_df.to_csv(labels_csv_path, index=False)
    print(f"{labels_csv_path} generated\n")
    #print(torch.where(targets != noisy_targets)[0])
    #print(torch.where(targets != noisy_targets)[0].size(dim=0) / targets.size(dim=0))
    
    noise_rules_df = pd.DataFrame(data=noise_rules)
    noise_rules_csv_path = f"data/{dataset_name}/label_noisy/{noise_mode}{p:.1f}_custom_noise_rules.csv"
    noise_rules_df.to_csv(noise_rules_csv_path)
    print(f"{noise_rules_csv_path} generated\n")
        
    
def make_sym_asym_noise(train_dataset, src, dsts, p, seed=None):
    # set seed for reprodcuibility
    if seed is not None:
        torch.manual_seed(seed)
    
    # clean targets
    targets = torch.tensor(train_dataset.targets)
    # copy clean targets to noisy targets
    noisy_targets = targets.detach().clone()

    # get all labels
    labels = torch.tensor(list(train_dataset.class_to_idx.values()))
    labels_len = torch.tensor(labels.size(dim=0))

    # get src targets indices (indices in all dataset)
    # asym: get indices of src targets
    indices = torch.where(targets == train_dataset.class_to_idx[src])[0]

    # p_mask eg 0.4 for each src target, each target flips with probability p
    p_mask = torch.ones_like(targets[indices]) * p
    # flip_mask is 0s and 1s  (flip is 1s)
    flip_mask = torch.bernoulli(input=p_mask)

    # keep_mask is inverse of flip_mask (keep is 1s)
    keep_mask = (flip_mask * (-1)) + 1

    # p_mask_label is dst label probability distribution to flip to (length is number of classes), sums to 1.0
    # asym: dst class is 1.0, all else 0.0
    p_mask_label = torch.zeros_like(labels, dtype=torch.float)

    p_mask_label[[train_dataset.class_to_idx[dst] for dst in dsts]] = 1.0 / len(dsts)

    # flip_mask_label is categorical distribution with params p_mask_label for each dst class
    flip_mask_label = torch.distributions.categorical.Categorical(p_mask_label)

    # flipped_targets is dst labels for each src label that the src label can flip to
    # for now, only one dst, so all in flip_targets is dst class label
    flipped_targets = flip_mask_label.sample(sample_shape=targets[indices].shape)

    # mask the flipped_targets to get the actually flipped instances (ones not to be flipped are 0s, ones to be flipped are dst labels)
    masked_flipped_targets = flipped_targets * flip_mask

    # mask the actual targets to keep the ones not flipped (ones not to be flipped are original labels, ones to be flipped are 0s)
    masked_targets = targets[indices] * keep_mask

    # add vectors together - kept ones remain, flipped ones are flipped
    noisy_targets_sub = (masked_targets + masked_flipped_targets).long()

    # insert into noisy_targets the flipped targets
    noisy_targets[indices] = noisy_targets_sub

    # get the indices of the noisy instances (indices in all dataset)
    dirty_indicator_indices = torch.where(targets != noisy_targets)[0]
    
    return indices, dirty_indicator_indices, noisy_targets

def make_openset_noise(train_dataset, src, dst, p, seed=None):
    if dst == "cifar100":
        dst_dataset = datasets.CIFAR100(os.path.join("data", "cifar100"), train=True, download=True)
    else:
        raise Exception
        
    if seed is not None:
        np.random.seed(seed)

    indices_src = np.random.choice(len(train_dataset), int(len(train_dataset) * p), replace=False)

    if seed is not None:
        np.random.seed(seed + 123)

    indices_dst = np.random.choice(len(train_dataset), int(len(train_dataset) * p), replace=False)
    train_dataset.data[indices_src] = dst_dataset.data[indices_dst]
    
    dataset_npy_path = f"data/{src}/label_noisy/openset{p:.1f}_custom.npy"
    np.save(dataset_npy_path, train_dataset.data)
    print(f"{dataset_npy_path} generated\n")
    
    return indices_src

In [202]:
make_inherent_label_noise(datapath=datapath, dataset_name="cifar10", noise_mode="openset", seed=123)

Files already downloaded and verified
Files already downloaded and verified
data/cifar10/label_noisy/openset0.4_custom.npy generated

data/cifar10/label_noisy/openset0.4_custom.csv generated

data/cifar10/label_noisy/openset0.4_custom_noise_rules.csv generated



In [114]:
print(train_dataset.class_to_idx)

{'apple': 0, 'aquarium_fish': 1, 'baby': 2, 'bear': 3, 'beaver': 4, 'bed': 5, 'bee': 6, 'beetle': 7, 'bicycle': 8, 'bottle': 9, 'bowl': 10, 'boy': 11, 'bridge': 12, 'bus': 13, 'butterfly': 14, 'camel': 15, 'can': 16, 'castle': 17, 'caterpillar': 18, 'cattle': 19, 'chair': 20, 'chimpanzee': 21, 'clock': 22, 'cloud': 23, 'cockroach': 24, 'couch': 25, 'crab': 26, 'crocodile': 27, 'cup': 28, 'dinosaur': 29, 'dolphin': 30, 'elephant': 31, 'flatfish': 32, 'forest': 33, 'fox': 34, 'girl': 35, 'hamster': 36, 'house': 37, 'kangaroo': 38, 'keyboard': 39, 'lamp': 40, 'lawn_mower': 41, 'leopard': 42, 'lion': 43, 'lizard': 44, 'lobster': 45, 'man': 46, 'maple_tree': 47, 'motorcycle': 48, 'mountain': 49, 'mouse': 50, 'mushroom': 51, 'oak_tree': 52, 'orange': 53, 'orchid': 54, 'otter': 55, 'palm_tree': 56, 'pear': 57, 'pickup_truck': 58, 'pine_tree': 59, 'plain': 60, 'plate': 61, 'poppy': 62, 'porcupine': 63, 'possum': 64, 'rabbit': 65, 'raccoon': 66, 'ray': 67, 'road': 68, 'rocket': 69, 'rose': 70, 

In [187]:
def make_sym_noise_rules(dataset_name, train_dataset, p=0.4):
    if dataset_name in ["cifar10", "cifar100"]:
        labels = list(train_dataset.class_to_idx.keys())
        noise_rules = []

        for src in labels:
            dsts = labels.copy()
            dsts.remove(src)

            noise_rule = {"src":src, "dsts":dsts, "p":p}
            noise_rules.append(noise_rule)

    else:
        raise Exception
        
    return noise_rules

def make_asym_noise_rules(dataset_name, train_dataset, p=0.4):
    if dataset_name == "cifar10":
        noise_rules = [
            {"src":"truck", "dsts":["automobile"], "p":p},
            {"src":"bird", "dsts":["airplane"], "p":p},
            {"src":"cat", "dsts":["dog"], "p":p},
            {"src":"dog", "dsts":["cat"], "p":p}
        ]

    elif "cifar100":
        labels = np.array(list(train_dataset.class_to_idx.keys()))
        labels_shifted = np.roll(labels, 1)
        noise_rules = []
        for src, dst in zip(labels_shifted, labels):
            noise_rule = {"src":src, "dsts":[dst], "p":p}
            noise_rules.append(noise_rule)
    else:
        raise Exception
        
    return noise_rules

def make_openset_noise_rules(dataset_name, p=0.4):
    if dataset_name == "cifar10":
        noise_rules = [{"src": dataset_name, "dst": "cifar100", "p": p}]
    else:
        raise Exception
        
    return noise_rules

## CIFAR-10

### Sym

In [116]:
dataset_name = "cifar10"
train_dataset = datasets.CIFAR10(os.path.join(datapath, dataset_name), train=True, transform=transforms.ToTensor(), download=True)
noise_rules = make_sym_noise_rules(dataset_name, train_dataset)
print(noise_rules)

targets = torch.tensor(train_dataset.targets)
noisy_targets = targets.detach().clone()

for noise_rule in noise_rules:
    indices_per_rule, dirty_indicator_indices_per_rule, noisy_targets_per_rule = \
        make_sym_asym_noise(train_dataset, noise_rule["src"], noise_rule["dsts"], noise_rule["p"], seed=123)
    noisy_targets[indices_per_rule] = noisy_targets_per_rule[indices_per_rule]

print(torch.where(targets != noisy_targets)[0])
print(torch.where(targets != noisy_targets)[0].size(dim=0) / targets.size(dim=0))

df = pd.DataFrame(data={"label": targets.numpy(), "label_noisy": noisy_targets.numpy()})
print(df)
path_to_csv = "data/cifar10/label_noisy/sym0.4_custom.csv"
df.to_csv(path_to_csv, index=False)

Files already downloaded and verified
[{'src': 'airplane', 'dsts': ['automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'], 'p': 0.4}, {'src': 'automobile', 'dsts': ['airplane', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'], 'p': 0.4}, {'src': 'bird', 'dsts': ['airplane', 'automobile', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'], 'p': 0.4}, {'src': 'cat', 'dsts': ['airplane', 'automobile', 'bird', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'], 'p': 0.4}, {'src': 'deer', 'dsts': ['airplane', 'automobile', 'bird', 'cat', 'dog', 'frog', 'horse', 'ship', 'truck'], 'p': 0.4}, {'src': 'dog', 'dsts': ['airplane', 'automobile', 'bird', 'cat', 'deer', 'frog', 'horse', 'ship', 'truck'], 'p': 0.4}, {'src': 'frog', 'dsts': ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'horse', 'ship', 'truck'], 'p': 0.4}, {'src': 'horse', 'dsts': ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'ship', 'truck'], 'p': 0.4}, {'src': '

### Asym

In [117]:
dataset_name = "cifar10"
train_dataset = datasets.CIFAR10(os.path.join(datapath, dataset_name), train=True, transform=transforms.ToTensor(), download=True)
noise_rules = make_asym_noise_rules(dataset_name, train_dataset)
print(noise_rules)

targets = torch.tensor(train_dataset.targets)
noisy_targets = targets.detach().clone()

for noise_rule in noise_rules:
    indices_per_rule, dirty_indicator_indices_per_rule, noisy_targets_per_rule = \
        make_sym_asym_noise(train_dataset, noise_rule["src"], noise_rule["dsts"], noise_rule["p"], seed=123)
    noisy_targets[indices_per_rule] = noisy_targets_per_rule[indices_per_rule]

print(torch.where(targets != noisy_targets)[0])
print(torch.where(targets != noisy_targets)[0].size(dim=0))
print(torch.where(targets != noisy_targets)[0].size(dim=0) / targets.size(dim=0))

df = pd.DataFrame(data={"label": targets.numpy(), "label_noisy": noisy_targets.numpy()})
print(df)
path_to_csv = "data/cifar10/label_noisy/asym0.4_custom.csv"
df.to_csv(path_to_csv, index=False)

Files already downloaded and verified
[{'src': 'truck', 'dsts': ['automobile'], 'p': 0.4}, {'src': 'bird', 'dsts': ['airplane'], 'p': 0.4}, {'src': 'cat', 'dsts': ['dog'], 'p': 0.4}, {'src': 'dog', 'dsts': ['cat'], 'p': 0.4}]
tensor([    1,     6,     9,  ..., 49982, 49987, 49991])
8068
0.16136
       label  label_noisy
0          6            6
1          9            1
2          9            9
3          4            4
4          1            1
...      ...          ...
49995      2            2
49996      6            6
49997      9            9
49998      1            1
49999      1            1

[50000 rows x 2 columns]


In [118]:
df_in = pd.read_csv(path_to_csv)
df_in

Unnamed: 0,label,label_noisy
0,6,6
1,9,1
2,9,9
3,4,4
4,1,1
...,...,...
49995,2,2
49996,6,6
49997,9,9
49998,1,1


### Openset

In [119]:
train_dataset = datasets.CIFAR10(os.path.join(datapath, dataset_name), train=True, transform=transforms.ToTensor(), download=True)
make_openset_noise(dataset_name, train_dataset, p=0.4, seed=123)

Files already downloaded and verified
Files already downloaded and verified


In [120]:
train_dataset = datasets.CIFAR10(os.path.join(datapath, dataset_name), train=True, transform=transforms.ToTensor(), download=True)
train_dataset.data = np.load("data/cifar10/label_noisy/openset0.4_custom.npy")

Files already downloaded and verified


In [121]:
train_dataset.data.shape

(50000, 32, 32, 3)

### Dependent

Use provided files

## CIFAR100

### Sym

In [161]:
dataset_name = "cifar100"
train_dataset = datasets.CIFAR100(os.path.join(datapath, dataset_name), train=True, transform=transforms.ToTensor(), download=True)
noise_rules = make_sym_noise_rules(dataset_name, train_dataset)
print(noise_rules[:2])

df = pd.DataFrame(data=noise_rules)
print(df)

targets = torch.tensor(train_dataset.targets)
noisy_targets = targets.detach().clone()

for noise_rule in noise_rules:
    indices_per_rule, dirty_indicator_indices_per_rule, noisy_targets_per_rule = \
        make_sym_asym_noise(train_dataset, noise_rule["src"], noise_rule["dsts"], noise_rule["p"], seed=None)
    noisy_targets[indices_per_rule] = noisy_targets_per_rule[indices_per_rule]

print(torch.where(targets != noisy_targets)[0])
print(torch.where(targets != noisy_targets)[0].size(dim=0) / targets.size(dim=0))

df = pd.DataFrame(data={"label": targets.numpy(), "label_noisy": noisy_targets.numpy()})
print(df)
path_to_csv = "data/cifar100/label_noisy/sym0.4_custom.csv"
df.to_csv(path_to_csv, index=False)

Files already downloaded and verified
[{'src': 'apple', 'dsts': ['aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', 'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar', 'sunflower', 'sweet_pepper', 'table', 'tank', 'telephone', 'television', 'tiger', 'tractor', 'train', 'trout',

### Asym

In [160]:
dataset_name = "cifar100"
train_dataset = datasets.CIFAR100(os.path.join(datapath, dataset_name), train=True, transform=transforms.ToTensor(), download=True)
noise_rules = make_asym_noise_rules(dataset_name, train_dataset)

df = pd.DataFrame(data=noise_rules)
print(df)
raise

targets = torch.tensor(train_dataset.targets)
noisy_targets = targets.detach().clone()

for noise_rule in noise_rules:
    indices_per_rule, dirty_indicator_indices_per_rule, noisy_targets_per_rule = \
        make_sym_asym_noise(train_dataset, noise_rule["src"], noise_rule["dsts"], noise_rule["p"], seed=None)
    noisy_targets[indices_per_rule] = noisy_targets_per_rule[indices_per_rule]

print(torch.where(targets != noisy_targets)[0])
print(torch.where(targets != noisy_targets)[0].size(dim=0))
print(torch.where(targets != noisy_targets)[0].size(dim=0) / targets.size(dim=0))

df = pd.DataFrame(data={"label": targets.numpy(), "label_noisy": noisy_targets.numpy()})
print(df)
path_to_csv = "data/cifar100/label_noisy/asym0.4_custom.csv"
df.to_csv(path_to_csv, index=False)

Files already downloaded and verified
              src             dsts    p
0            worm          [apple]  0.4
1           apple  [aquarium_fish]  0.4
2   aquarium_fish           [baby]  0.4
3            baby           [bear]  0.4
4            bear         [beaver]  0.4
..            ...              ...  ...
95       wardrobe          [whale]  0.4
96          whale    [willow_tree]  0.4
97    willow_tree           [wolf]  0.4
98           wolf          [woman]  0.4
99          woman           [worm]  0.4

[100 rows x 3 columns]


RuntimeError: No active exception to reraise

### Openset

No such combination

### Dependent

Use provided files