In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
# auxiliary functions for data augmentation

def rotate90(df):
    df_90 = df.copy()
    df_90['mfc1'] = df['mfc4']
    df_90['mfc2'] = df['mfc1']
    df_90['mfc3'] = df['mfc2']
    df_90['mfc4'] = df['mfc3']
    for i in [1, 2, 3]:
        df_90[f'x{i}'] = 1 - df[f'y{i}']
        df_90[f'y{i}'] = df[f'x{i}']
    df_90['angle'] = df['angle'] + 90
    return df_90

def flip(df):
    df_flip = df.copy()
    df_flip['mfc1'] = df['mfc2']
    df_flip['mfc2'] = df['mfc1']
    df_flip['mfc3'] = df['mfc4']
    df_flip['mfc4'] = df['mfc3']
    for i in [1, 2, 3]:
        df_flip[f'x{i}'] = 1 - df[f'x{i}']
        df_flip[f'y{i}'] = df[f'y{i}']
    df_flip['flipped'] = True
    return df_flip

In [None]:
# load data
df = pd.read_csv('data.csv', index_col=0)

# augment data
df['angle'] = 0
df = df.append(rotate90(df))
df = df.append(rotate90(df[(df['angle'] == 90)]))
df = df.append(rotate90(df[(df['angle']==180)]))
df['flipped'] = False
df = df.append(flip(df))

# keep only 1-leakage samples (only these are considered in this paper)
df = df[df['n'] == 1]

# normalize total flow so that x_1 + x_2 + x_3 + x_4 = 1
df['mfcsum'] = df['mfc1'] + df['mfc2'] + df['mfc3'] + df['mfc4']
df[['mfc1', 'mfc2', 'mfc3', 'mfc4']] = df[['mfc1', 'mfc2', 'mfc3', 'mfc4']].div(df['mfcsum'], axis=0)

# remove obsolete columns
df.drop(['s1', 'x2', 'y2', 's2', 'x3', 'y3', 's3', 'n', 'mfcsum'], axis=1, inplace=True)

# apply coordinate transformation to get (y_1, y_2) in [-1, 1] x [-1, 1]
df['x1'] = df['x1'].map(lambda z: 2 * z - 1)
df['y1'] = df['y1'].map(lambda z: -2 * z + 1)

In [None]:
# visualize original training data
plt.figure(figsize=(2, 2))
sns.scatterplot(data=df[(df['split'] == 'train') & (df['angle'] == 0) & (df['flipped'] == False)], x='x1', y='y1', color='black', s=5.0)
plt.xlim([-1, 1])
plt.ylim([-1, 1])
plt.grid(True)
plt.box(True)
plt.xticks([-1, 0, 1])
plt.yticks([-1, 0, 1])
plt.gca().set_xticklabels([-1, 0, 1])
plt.gca().set_yticklabels([-1, 0, 1])
plt.gca().set_aspect('equal', 'box')
plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
plt.tick_params(axis='y', which='both', left=False, right=False, labelleft=False)
plt.xlabel('')
plt.ylabel('')
plt.savefig('eusipco_original_training_data.png', bbox_inches='tight', pad_inches=0.05, dpi=200)

# visualize augmented training data
plt.figure(figsize=(2, 2))
sns.scatterplot(data=df[df['split'] == 'train'], x='x1', y='y1', color='black', s=5.0)
plt.xlim([-1, 1])
plt.ylim([-1, 1])
plt.grid(True)
plt.box(True)
plt.xticks([-1, 0, 1])
plt.yticks([-1, 0, 1])
plt.gca().set_xticklabels([-1, 0, 1])
plt.gca().set_yticklabels([-1, 0, 1])
plt.gca().set_aspect('equal', 'box')
plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
plt.tick_params(axis='y', which='both', left=False, right=False, labelleft=False)
plt.xlabel('')
plt.ylabel('')
plt.savefig('eusipco_augmented_training_data.png', bbox_inches='tight', pad_inches=0.05, dpi=200)

# visualize original test data
plt.figure(figsize=(2, 2))
sns.scatterplot(data=df[(df['split'] == 'test') & (df['angle'] == 0) & (df['flipped'] == False)], x='x1', y='y1', color='black', s=5.0)
plt.xlim([-1, 1])
plt.ylim([-1, 1])
plt.grid(True)
plt.box(True)
plt.xticks([-1, 0, 1])
plt.yticks([-1, 0, 1])
plt.gca().set_xticklabels([-1, 0, 1])
plt.gca().set_yticklabels([-1, 0, 1])
plt.gca().set_aspect('equal', 'box')
plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
plt.tick_params(axis='y', which='both', left=False, right=False, labelleft=False)
plt.xlabel('')
plt.ylabel('')
plt.scatter([-.9, -.9, .9, .9], [-.9, .9, -.9, .9], s=50, facecolor='black', linewidth=0.0)
plt.text(-.75, .8, 'MFC1', fontsize=8)
plt.text(.42, .8, 'MFC2', fontsize=8)
plt.text(.42, -.89, 'MFC3', fontsize=8)
plt.text(-.75, -.89, 'MFC4', fontsize=8)
plt.savefig('eusipco_original_test_data.png', bbox_inches='tight', pad_inches=0.05, dpi=200)

# visualize augmented training data
plt.figure(figsize=(2, 2))
sns.scatterplot(data=df[df['split'] == 'test'], x='x1', y='y1', color='black', s=5.0)
plt.xlim([-1, 1])
plt.ylim([-1, 1])
plt.grid(True)
plt.box(True)
plt.xticks([-1, 0, 1])
plt.yticks([-1, 0, 1])
plt.gca().set_xticklabels([-1, 0, 1])
plt.gca().set_yticklabels([-1, 0, 1])
plt.gca().set_aspect('equal', 'box')
plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
plt.tick_params(axis='y', which='both', left=False, right=False, labelleft=False)
plt.xlabel('')
plt.ylabel('')
plt.savefig('eusipco_augmented_test_data.png', bbox_inches='tight', pad_inches=0.05, dpi=200)