# Input/Output Similarity/Dissimilarity

In [1]:
import data_utils
import config
import numpy as np
import pandas as pd

In [2]:
def np_to_str(np_arr):
    str_num = ''
    n_digits = np_arr.shape[0]
    for i in range(n_digits):
        str_num += str(np_arr[i])
    return str_num

In [3]:
def np_sim(np1, np2):
    sim = np.sum(np1 == np2) / np1.shape[0]
    return sim

In [20]:
def show_io_sim_dif(operand_digits, operator):
    carry_datasets = data_utils.import_carry_datasets(operand_digits, operator)

    input_arrays = list()
    output_arrays = list()
    carry_arrays = list()

    for carries in carry_datasets.keys():
        input_array = carry_datasets[carries]['input']
        output_array = carry_datasets[carries]['output']
        n_examples = input_array.shape[0]
        input_arrays.append(input_array)
        output_arrays.append(output_array)
        carry_arrays.append(np.full((n_examples), carries, dtype=np.int))

    np_inputs = np.concatenate(input_arrays, axis=0)
    np_outputs = np.concatenate(output_arrays, axis=0)
    np_carry_labels = np.concatenate(carry_arrays, axis=0)

    data = dict()
    data['input'] = np_inputs
    data['carry'] = np_carry_labels
    data['output'] = np_outputs
    data['operator'] = operator

    n_examples = data['input'].shape[0]
    col_src_input = list()
    col_src_output = list()
    col_src_carries = list()
    col_dst_input = list()
    col_dst_output = list()
    col_dst_carries = list()
    col_sim_input = list()
    col_sim_output = list()
    col_sim_io_sum = list()
    col_sim_io_mul = list()
    col_dif_input = list()
    col_dif_output = list()
    col_dif_io_sum = list()
    col_dif_io_mul = list()


    for i in range(n_examples):
        for j in range(n_examples):
            src_input = np_to_str(data['input'][i])
            src_output = np_to_str(data['output'][i])
            src_carries = data['carry'][i]
            dst_input = np_to_str(data['input'][j])
            dst_output = np_to_str(data['output'][j])
            dst_carries = data['carry'][j]

            sim_input = np_sim(data['input'][i], data['input'][j])
            sim_output = np_sim(data['output'][i], data['output'][j])
            sim_io_sum = sim_input + sim_output
            sim_io_mul = sim_input * sim_output

            dif_input = 1 - sim_input
            dif_output = 1 - sim_output
            dif_io_sum = dif_input + dif_output
            dif_io_mul = dif_input * dif_output

            # Appending data.
            col_src_input.append(src_input)
            col_src_output.append(src_output)
            col_src_carries.append(src_carries)
            col_dst_input.append(dst_input)
            col_dst_output.append(dst_output) 
            col_dst_carries.append(dst_carries)


            col_sim_input.append(sim_input)
            col_sim_output.append(sim_output)
            col_sim_io_sum.append(sim_io_sum)
            col_sim_io_mul.append(sim_io_mul)

            col_dif_input.append(dif_input)
            col_dif_output.append(dif_output)
            col_dif_io_sum.append(dif_io_sum)
            col_dif_io_mul.append(dif_io_mul)

    df = pd.DataFrame({
        'src_input':col_src_input,
        'src_output':col_src_output,
        'src_carries':col_src_carries,
        'dst_input':col_dst_input,
        'dst_output':col_dst_output,
        'dst_carries':col_dst_carries,
        'sim_input':col_sim_input,
        'sim_output':col_sim_output,
        'sim_io_sum':col_sim_io_sum,
        'sim_io_mul':col_sim_io_mul,
        'dif_input':col_dif_input,
        'dif_output':col_dif_output,
        'dif_io_sum':col_dif_io_sum,
        'dif_io_mul':col_dif_io_mul

    }) 
    df = df[['src_input', 'src_output', 'src_carries', 
             'dst_input', 'dst_output', 'dst_carries',
             'sim_input', 'sim_output', 'sim_io_sum', 'sim_io_mul',
             'dif_input', 'dif_output', 'dif_io_sum', 'dif_io_mul'
            ]]

    sim_cols = ['sim_input', 'sim_output', 'sim_io_sum', 'sim_io_mul']
    dif_cols = ['dif_input', 'dif_output', 'dif_io_sum', 'dif_io_mul']


    print('{}-bit {}'.format(operand_digits, operator))
    print('\nCount ================================================\n')
    print(df[dif_cols].count())
    print(df.query('src_carries == dst_carries').groupby(['src_carries','dst_carries'])[dif_cols].count())
    print('\nSum ==================================================\n')
    print(df[dif_cols].sum())
    print(df.query('src_carries == dst_carries').groupby(['src_carries','dst_carries'])[dif_cols].sum())
    print('\nMean =================================================\n')
    print(df[dif_cols].mean())
    print(df.query('src_carries == dst_carries').groupby(['src_carries','dst_carries'])[dif_cols].mean())

# For each operator

In [21]:
for operator in config.operators_list():
    print('===============================================')
    show_io_sim_dif(4, operator)

4-bit add


dif_input     65536
dif_output    65536
dif_io_sum    65536
dif_io_mul    65536
dtype: int64
                         dif_input  dif_output  dif_io_sum  dif_io_mul
src_carries dst_carries                                               
0           0                 6561        6561        6561        6561
1           1                 2916        2916        2916        2916
2           2                 2704        2704        2704        2704
3           3                 1764        1764        1764        1764
4           4                  729         729         729         729


dif_input     32768.0
dif_output    20464.0
dif_io_sum    53232.0
dif_io_mul    10402.0
dtype: float64
                         dif_input  dif_output  dif_io_sum  dif_io_mul
src_carries dst_carries                                               
0           0               2916.0      1458.0      4374.0    668.2500
1           1               1399.5       882.0      2281.5    452.2500
2        