# Create Channel Coding Dataset

Import relevant libraries:

In [None]:
import json
import random
import sys
import time

sys.path.append('../')
from data_utils.gen_channel_data import *

Define the list of configs to use:

In [None]:
configs = [
    "awgn_narrow",
    "awgn_mid",
    "awgn_mid_higher",
    "awgn_mid_lower",
    "awgn_wide",
    "bursty_hard_narrow",
    "bursty_hard_mid",
    "bursty_hard_mid_high",
    "bursty_hard_mid_low",
    "bursty_hard_wide",
    "mem_hard_narrow",
    "mem_hard_mid",
    "mem_hard_mid_higher",
    "mem_hard_mid_lower",
    "mem_hard_wide",
    "mp_hard_narrow",
    "mp_hard_mid",
    "mp_hard_mid_higher",
    "mp_hard_mid_lower",
    "mp_hard_wide",
    "mix_hard_narrow",
    "mix_hard_wide"
]

configs_dir = '../configs/set_nd_15ts_5cls/'


In [None]:
def get_metric_arrangement(input_metrics, num_copies=1):
    org_metrics = []

    for noise_type, snr_param in input_metrics.items():
        if len(snr_param["snr"]) == 3:
            snr_low, snr_hi, snr_stp = snr_param["snr"]
            snr_list = np.arange(snr_low, snr_hi, snr_stp)
        else:
            snr_list = snr_param["snr"]
        if not snr_param["param"]:
            for snr in snr_list:
                for j in range(num_copies):
                    org_metrics.append(
                        {"noise_type": noise_type, "snr": float(snr)})
            continue

        para_len = len(snr_param["param"][np.random.choice(
            list(snr_param["param"].keys()), size=1, replace=False)[0]])

        for snr in snr_list:
            for i in range(para_len):
                met_dict = {"noise_type": noise_type, "snr": float(snr)}
                for para in snr_param["param"].keys():
                    if noise_type == "t" and para == "vv":
                        assert snr_param["param"][para][i] >= 2, "T channel requires parameter vv to be no less than 2"
                    met_dict[para] = snr_param["param"][para][i]

                for j in range(num_copies):
                    org_metrics.append(met_dict)
                met_dict = {}
    return org_metrics

def sample_a_metric(seed, train_range_dict, debug=False):
    """
    Given ranges of each channel type sample a criteria for data generation
    """
    if debug:
        print("sample a metric with seed ", seed, " dict: ",
                train_range_dict, " noise types ", list(train_range_dict.keys()))
    rng = np.random.RandomState(seed)

    # channel_type_len = len(train_trange_dict)
    selected_channel = rng.choice(list(train_range_dict.keys()), size=1)[0]
    selected_channel_range = train_range_dict[selected_channel]
    if debug:
        print("selected type ", selected_channel,
                " range ", selected_channel_range)

    met_dict = {"noise_type": selected_channel}

    for param, range_list in selected_channel_range.items():
        min_, max_ = range_list[0], range_list[1]
        val = np.random.uniform(min_, max_)
        met_dict[param] = val
    if debug:
        print("RETURNING sampled dict ", met_dict)
    return met_dict

Create the training set:

In [None]:
%%time
# create training dataset
num_noise_setups = 200
num_classes_per_noise_setup = 1000
num_examples_per_class = 50
start = time.time()

# take one config at a time
for i, config in enumerate(configs):
    print('Config ' + str(i + 1) + ': ' + config)

    with open(configs_dir + config + '.json', 'r') as f:
        params = json.load(f)
    train_range_dict = params['train_range_dict']
    image_height = 10

    images_all = []
    labels_all = []
    batch_criteria_all = []

    for noise_setup in range(num_noise_setups):
        seed = random.randint(1, 999999)
        rng = np.random.RandomState(seed)

        images = []
        labels = []
        batch_criteria = sample_a_metric(seed=rng.randint(1, 999999), train_range_dict=train_range_dict)

        for i in range(num_classes_per_noise_setup):
            x_class_data, true_msgs = generate_viterbi_batch(batch_size=num_examples_per_class,
                                                             block_len=image_height,
                                                             batch_criteria=batch_criteria,
                                                             seed=rng.randint(1, 999999))
            x_class_data = np.array(x_class_data, dtype=np.float32)
            images.append(x_class_data)
            labels.append(true_msgs)

        images = np.array(images, dtype=np.float32)
        labels = np.array(labels, dtype=np.float32)

        images_all.append(images)
        labels_all.append(labels)
        batch_criteria_all.append(batch_criteria)

    images_all = np.array(images_all, dtype=np.float32)
    labels_all = np.array(labels_all, dtype=np.float32)

#     diff_support_query_v2
#     np.savez('../datasets/tmp/train/' + config + '_data.npz', train_images=images_all, train_labels=labels_all)
#     with open('../datasets/tmp/train/' + config + '_criteria.json', 'w') as f:
#         json.dump({'criteria': batch_criteria_all}, f)

    print('Elapsed time: {:.1f}'.format(time.time() - start))

Create the testing set - in a similar way as the training set:

In [None]:
%%time
# create test dataset

# we can get any config, the dataset should not depend on it
config = configs[0]

with open(configs_dir + config + '.json', 'r') as f:
    params = json.load(f)


batch_arrange = get_metric_arrangement(params['val_metrics'], num_copies=1)
num_noise_setups = len(batch_arrange)
num_classes_per_noise_setup = 50
num_examples_per_class = 50

images_all_test = []
labels_all_test = []
batch_criteria_all_test = []

for noise_setup in range(num_noise_setups):
    seed = random.randint(1, 999999)
    rng = np.random.RandomState(seed)

    images = []
    labels = []
    batch_criteria = batch_arrange[noise_setup]

    for i in range(num_classes_per_noise_setup):
        x_class_data, true_msgs = generate_viterbi_batch(batch_size=num_examples_per_class,
                                                         block_len=image_height,
                                                         batch_criteria=batch_criteria,
                                                         seed=rng.randint(1, 999999))
        x_class_data = np.array(x_class_data, dtype=np.float32)
        images.append(x_class_data)
        labels.append(true_msgs)

    images = np.array(images, dtype=np.float32)
    labels = np.array(labels, dtype=np.float32)

    images_all_test.append(images)
    labels_all_test.append(labels)
    batch_criteria_all_test.append(batch_criteria)

images_all_test = np.array(images_all_test, dtype=np.float32)
labels_all_test = np.array(labels_all_test, dtype=np.float32)

# np.savez('../datasets/diff_support_query/test/' + 'test_data.npz', test_images=images_all_test, test_labels=labels_all_test)
# with open('../datasets/diff_support_query/test/test_criteria.json', 'w') as f:
#     json.dump({'criteria': batch_criteria_all_test}, f)

Bursty easy requires a different test set - create it here:

In [None]:
%%time
# create test dataset

# we can get any config, the dataset should not depend on it
config = configs[5]

assert 'bursty_easy' in config

with open(configs_dir + config + '.json', 'r') as f:
    params = json.load(f)

image_height = params['image_height']
batch_arrange = get_metric_arrangement(params['val_metrics'], num_copies=1)
num_noise_setups = len(batch_arrange)
num_classes_per_noise_setup = 50
num_examples_per_class = 50

images_all_test = []
labels_all_test = []
batch_criteria_all_test = []

for noise_setup in range(num_noise_setups):
    seed = random.randint(1, 999999)
    rng = np.random.RandomState(seed)

    images = []
    labels = []
    batch_criteria = batch_arrange[noise_setup]

    for i in range(num_classes_per_noise_setup):
        x_class_data, true_msgs = generate_viterbi_batch(batch_size=num_examples_per_class,
                                                         block_len=image_height,
                                                         batch_criteria=batch_criteria,
                                                         seed=rng.randint(1, 999999))
        x_class_data = np.array(x_class_data, dtype=np.float32)
        images.append(x_class_data)
        labels.append(true_msgs)

    images = np.array(images, dtype=np.float32)
    labels = np.array(labels, dtype=np.float32)

    images_all_test.append(images)
    labels_all_test.append(labels)
    batch_criteria_all_test.append(batch_criteria)

images_all_test = np.array(images_all_test, dtype=np.float32)
labels_all_test = np.array(labels_all_test, dtype=np.float32)

# np.savez('../datasets/test/' + 'bursty_easy_test_data.npz', test_images=images_all_test, test_labels=labels_all_test)
# with open('../datasets/test/bursty_easy_test_criteria.json', 'w') as f:
#     json.dump({'criteria': batch_criteria_all_test}, f)

Test if we can use the dataset:

In [None]:
data = np.load('../datasets/test/test_data.npz')

In [None]:
data['test_images'].shape