In [None]:
import pandas as pd
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
import itertools as it

from pathlib import Path

sns.set(rc={'figure.figsize':(18,9)})
sns.set(font_scale=1.5)
sns.set_style("whitegrid")


# Homograph Injection Bounded Experiment

In the following section we injected 50 homographs (over 4 runs) with varying range of the cardinalities of the values replaced for each homograph. This approach is bounded because it only provides a minimum bound on the cardinality of the replaced values

In [None]:
df_file_path = 'homograph_injection_evaluation/cardinality_experiment/injected_homograph_df.pickle'
save_dir = 'figures/injected_homographs_cardinality_experiment/'

# Create output directory for figures
Path(save_dir).mkdir(parents=True, exist_ok=True)

df = pickle.load(open(df_file_path, 'rb'))
df

In [None]:
# Plot homograph rank vs. average cardinality of replaced values when replaced values per homograph = 2

plt.figure()
ax = sns.scatterplot(data=df, x="replaced_values_avg_cardinality", y="homograph_rank", marker='o')
ax.set(xlabel='Average cardinality of replaced values', ylabel='Injected homograph rank', title='Injected Homograph Rank vs cardinality of replaced values')
plt.tight_layout()

In [None]:
df_cardinality_range = df.groupby(['min_cardinality'], as_index=False).mean()

# Calculate percent of injected homographs in the top 50 ranks in each range of cardinalities
percent_vals_in_top_50 = []
for i in range(0, 501, 100):
    df_at_i_values_replaced = df[df['min_cardinality'] == i]
    percentage = df_at_i_values_replaced[df_at_i_values_replaced['homograph_rank'] <= 50].shape[0] / df_at_i_values_replaced.shape[0]
    percent_vals_in_top_50.append(percentage * 100)
df_cardinality_range['percentage_of_injected_homographs_in_top_50'] = percent_vals_in_top_50

df_cardinality_range

In [None]:
cardinality_range_labels = []
delta = 100
for i in range(0, 501, delta):
    cardinality_range_labels.append('≥ ' + str(i))



# Plot bar char of the range of the replaced values cardinality with the % of injected homographs in the top 50 ranks
ax = sns.barplot(data=df_cardinality_range, x="min_cardinality", y="percentage_of_injected_homographs_in_top_50", color='#3182bd')
ax.set(ylim=(0, 105), title='', xticklabels=cardinality_range_labels)
ax.grid(alpha=0.5)
plt.gca().xaxis.grid(False)

# Fontsize for the axis title and ticks
ax.tick_params(axis='both', which='major', labelsize=25)
ax.set_xlabel(xlabel='Cardinality of replaced values', fontsize=27)
ax.set_ylabel(ylabel='% of injected homographs in top 50', fontsize=27)

# Add values on top of bar charts
cardinalities = range(0, 501, delta)
x_vals = range(0, 6, 1)
for i, x in zip(cardinalities, x_vals):
    y_val = df_cardinality_range[df_cardinality_range['min_cardinality'] == i]['percentage_of_injected_homographs_in_top_50'].values[0]
    ax.text(x, y_val+2, str(y_val)+'%', color='black', ha='center', fontsize=24)

plt.tight_layout()
fig = ax.get_figure()
fig.set_size_inches(16, 9)
fig.savefig(save_dir+'injected_homograph_rank_vs_cardinality.pdf')
