## Import

In [1]:
import os
import sys
import glob
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import pandas as pd

os.chdir('../')
new_current_dir = os.getcwd()  # get new current working directory
print("New current directory:", new_current_dir)
sys.path.append('./src/python/')

New current directory: /home/fiftyfour/Documents/NetworkPricingGraphContraction


## Process data

In [2]:
result_path = '/home/fiftyfour/Documents/Archive/experience_6/max_clique_3/result/max_clique/'

best_files = glob.glob(os.path.join(result_path, '*_my_max_clique_approx_best.txt'))
initial_files = [file.replace('best', 'initial') for file in best_files]
ny_files = [file.replace('_best', '').replace('_my_', '_nx_') for file in best_files]


name = lambda x: os.path.splitext(os.path.basename(x[0]))[0].replace('_my_max_clique_approx_best', '')
files = [(*x, name(x))  for x in zip(best_files, initial_files, ny_files)]

In [None]:
def merging(best_file, initial_file, nx_file, name):

    # same dimension
    df1 = pd.read_csv(best_file, sep='\t')    # maximum clique V2 results
    df2 = pd.read_csv(initial_file, sep='\t') # maximum clique V1 results
    df3 = pd.read_csv(nx_file, sep='\t')      # networkx

    if ((df1.shape == df2.shape) and (df1.shape == df3.shape) and (df3.shape == df2.shape)):
        
        all_equal = (df1['iteration'] == df2['iteration']) & (df1['iteration'] == df3['iteration'])
        
        df1 = df1[all_equal].drop(columns=['iteration'])
        df2 = df2[all_equal].drop(columns=['iteration'])
        df3 = df3[all_equal].drop(columns=['iteration'])

        # Rename the 'time' column in each DataFrame
        df1.rename(columns={'time': 'best_time', 'length': 'best_length',}, inplace=True)
        df2.rename(columns={'time': 'initial_time', 'length': 'initial_length',}, inplace=True)
        df3.rename(columns={'time': 'nx_time', 'length': 'nx_length',}, inplace=True)
    
        # Merge them side by side
        merged_df = pd.concat([df1, df2, df3], axis=1)
        merged_df['name'] = name
    
        # # Optionally, save the merged DataFrame to a new CSV file
        # merged_df.to_csv('./result/max_clique/merged_output.csv', index=False)
        return merged_df
    print(name)
    return None

merged_df_list = []
for args in files:
    merged_df_list.append(merging(*args))
    
big_merged_df = pd.concat(merged_df_list, axis=0)
big_merged_df

## Figure

In [70]:
## Plot settings

# Define colors for each dataset
colors = ['blue', 'orange']#, 'green']

params = {
    'backend': 'pdf',
    #'axes.labelsize': 12,
    # 'text.fontsize': 12,
    # 'legend.fontsize': 12,
    # 'xtick.labelsize': 10,
    # 'ytick.labelsize': 10,
    'text.usetex': True,
    'axes.unicode_minus': True,
    'font.family': 'serif',  # Use serif fonts
    'font.serif': ['Computer Modern Roman'],  # Specify LaTeX serif font
    'font.sans-serif': ['Computer Modern Sans Serif'],  # Optional
    'font.monospace': ['Computer Modern Typewriter']  # Optional
}

plt.rcParams.update(params)


title_font_size   = 32
legend_label_size = 28
label_font_size   = 42
tick_font_size    = 35

# Load the merged DataFrame
merged_df = big_merged_df
#merged_df = pd.read_csv('./result/max_clique/merged_output.csv', index_col='iteration')

In [None]:
### Distribution of percentage increase in execution time and maximum click size compared with the library networkx algorithm
# Create subplots with shared y-axis
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(25, 10))

# Distribution plot for time
# plt.subplot(1, 2, 1)
sns.histplot(100*(merged_df['initial_time']-merged_df['nx_time'])/merged_df['nx_time'], bins=40, kde=False,alpha=0.5, color=colors[0], label='V1', ax=ax1)
sns.histplot(100*(merged_df['best_time']-merged_df['nx_time'])/merged_df['nx_time'], bins=40, kde=False, alpha=0.5,color=colors[1], label='V2', ax=ax1)
ax1.set_xlabel("Augmentation relative du temps (\\%)", fontsize=label_font_size)
ax1.set_ylabel('Fréquence', fontsize=label_font_size)

# Rotate x-tick labels for ax1
for label in ax1.get_xticklabels():
    label.set_rotation(45)
    label.set_horizontalalignment('center')

ax1.tick_params(axis='both', labelsize=tick_font_size)

# Distribution plot for length
# plt.subplot(1, 2, 2)
sns.histplot(100*(merged_df['initial_length']-merged_df['nx_length'])/merged_df['nx_length'], bins=40, kde=False,alpha=0.5, color=colors[0], label='V1',ax=ax2)
sns.histplot(100*(merged_df['best_length']-merged_df['nx_length'])/merged_df['nx_length'], bins=40, kde=False,alpha=0.5, color=colors[1], label='V2', ax=ax2)
ax2.set_xlabel("Augmentation relative de la taille (\\%)", fontsize=label_font_size)

# Rotate x-tick labels for ax1
for label in ax2.get_xticklabels():
    label.set_rotation(45)
    label.set_horizontalalignment('center')

ax2.tick_params(axis='both', labelsize=tick_font_size)
ax2.legend(fontsize=legend_label_size)


plt.tight_layout()
plt.subplots_adjust(wspace=0, hspace=0)
fig.align_xlabels([ax1, ax2])
# Save the figure
plt.savefig('./result/distribution_relative_increase_time_length.pdf', bbox_inches='tight')
plt.close()