In [1]:
import matplotlib.pyplot as plt
import pathlib
import pandas as pd
from typing import List
import seaborn as sbn
from IPython.display import Markdown
sbn.set()

ModuleNotFoundError: No module named 'pandas._libs.join'

In [None]:
# MATPLOTLIB CONFIG
plt.rcParams['axes.facecolor'] = 'white'

In [None]:
# sliwkowy, grafitowy, sloneczny, wrzosowy
colors = ['#965F77', '#3C3C4C', '#FED542', '#B4A0AA']
sbn.set_palette(sbn.color_palette(colors))

In [None]:
PROJECT_MAIN_PATH = pathlib.Path().absolute().parent
CODE_MAIN_PATH = PROJECT_MAIN_PATH / 'src'
DATA_MAIN_PATH = CODE_MAIN_PATH / 'data_gather'
CHARTS_MAIN_PATH = PROJECT_MAIN_PATH / 'charts/PT'

EVENCHECK_GATE_PT_PATH = DATA_MAIN_PATH / 'pt/EVENCHECKgate'

# Metrics

In [None]:
def correct_rate(df: pd.DataFrame, column='potential') -> float:
    return round(df[df[column]==0.0][column].count()/df[column].count() *100, 2)

In [None]:
def mean_number_of_steps(df: pd.DataFrame, calc_column='iteration', filter_column='potential') -> float:
    return round(df[df[filter_column]==0.0][calc_column].mean(), 2)

In [None]:
def calc_mns_series(df: pd.DataFrame, groupby: str) -> pd.DataFrame:
    indices = df[groupby].unique()
    results = {groupby: [], 'mns': []}
    for ind in indices:
        results[groupby].append(ind)
        results['mns'].append(mean_number_of_steps(df[df[groupby]==ind]))
    return pd.DataFrame(results, index=results[groupby])

In [None]:
def calc_cr_series(df: pd.DataFrame, groupby: str) -> pd.DataFrame:
    indices = df[groupby].unique()
    results = {groupby: [], 'cr': []}
    for ind in indices:
        results[groupby].append(ind)
        results['cr'].append(correct_rate(df[df[groupby]==ind]))
    return pd.DataFrame(results, index=results[groupby])

# Data loading

In [None]:
df_evencheck_pt_diff_switch_step = pd.read_csv(EVENCHECK_GATE_PT_PATH / 'pt_example_numsim30_switch_steps1_50_steps5000_pt_const_sa_const.csv')
df_evencheck_pt_example = pd.read_csv(EVENCHECK_GATE_PT_PATH / 'pt_example_numsim300_switch_step10_steps5000_pt_const_sa_const.csv')
df_evencheck_pt_optimal_temp_calc = pd.read_csv(EVENCHECK_GATE_PT_PATH / 'pt_evencheck_numsim300_switch_step10_steps5000_ptscheme_gaussian1_sa_const_optimal_temp_calculation.csv')
df_evencheck_pt_optimal_number_of_copies = pd.read_csv(EVENCHECK_GATE_PT_PATH / 'pt_evencheck_numsim50_switch_step40_steps5000_ptscheme_gaussian1_sa_const_optimal_num_of_copies.csv')
df_evencheck_pt_optimal_number_of_max_steps = pd.read_csv(EVENCHECK_GATE_PT_PATH / 'pt_evencheck_numsim50_switch_step40_steps[100, 1000, 10000]_ptscheme_gaussian1_sa_const_optimal_max_steps.csv')

# Analysis

## Parallel Tempering example

In [None]:
def plot_scatter(dfs: List[pd.DataFrame], 
                 legend: List[str],
                 save_path: pathlib.Path,
                 column: str,
                 title: str,
                 ylabel: str, 
                 xlabel: str,
                 xscale: str = None,
                 yscale: str = None,
                 xlim = None,
                 xticks: List[int] = None,
                ) -> None:
    for df, color in zip(dfs, colors):
        plt.scatter(df.index, df[column], color=color, marker='D')
        plt.plot(df.index, df[column], color=color)
    plt.title(title)
    plt.legend(legend)
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.grid(color='lightgray', linestyle='--', linewidth=1)
    if xscale:
        plt.xscale(xscale)
    if yscale:
        plt.yscale(yscale)
    if xlim:
        plt.xlim(xlim)
    if xticks:
        plt.xticks(xticks)
    plt.tight_layout()
    plt.savefig(save_path / f'{title}.pdf', dpi=300, format='pdf')

In [None]:
def double_axis_plot(dataset_1: pd.DataFrame, 
                     dataset_2: pd.DataFrame,  
                     save_path: pathlib.Path,
                     legend: List[str],
                     plot_type: str,
                     title: str,
                     xlabel: str,
                     y_1_label: str,
                     y_2_label: str,
                     x: str,
                     y_1: str = 'mns',
                     y_2: str = 'cr',
                     xscale: str = None,
                     xlim = None,
                     yscale = None,
                     marker: str = 'D',
                     colors: List[str] = colors,
                     xticks: List[int] = None
                    ) -> None:

    fig, ax1 = plt.subplots()
    
    if plot_type == 'plot':
        
        ax1.set_xlabel(xlabel)
        ax1.set_ylabel(y_1_label, color=colors[0])
        ax1.set_title(title)
        ax1.tick_params(axis='y', labelcolor=colors[0])
        
        ax1.scatter(dataset_1[x], dataset_1[y_1], color=colors[0], marker=marker)
        ax1.plot(dataset_1[x], dataset_1[y_1], color=colors[0])

        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

        ax2.set_ylabel(y_2_label, color=colors[1])  # we already handled the x-label with ax1
        ax2.tick_params(axis='y', labelcolor=colors[1])
        
        ax2.scatter(dataset_1[x], dataset_2[y_2], color=colors[1], marker=marker)
        ax2.plot(dataset_1[x], dataset_2[y_2], color=colors[1])

        fig.tight_layout()  # otherwise the right y-label is slightly clipped
        plt.grid(color='lightgray', linestyle='--', linewidth=1)
        
    if plot_type == 'scatter':
        ax1.set_xlabel(xlabel)
        ax1.set_ylabel(y_1_label, color=colors[0])
        ax1.set_title(title)
        ax1.tick_params(axis='y', labelcolor=colors[0])
        
        ax1.scatter(dataset_1[x], dataset_1[y_1], color=colors[0], marker=marker)
        ax1.plot(dataset_1[x], dataset_1[y_1], color=colors[0])

        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

        ax2.set_ylabel(y_2_label, color=colors[1])  # we already handled the x-label with ax1
        ax2.tick_params(axis='y', labelcolor=colors[1])
        ax2.scatter(dataset_1[x], dataset_2[y_2], color=colors[1], marker=marker)
        ax2.plot(dataset_1[x], dataset_2[y_2], color=colors[1])

        fig.tight_layout()  # otherwise the right y-label is slightly clipped
        plt.grid(color='lightgray', linestyle='--', linewidth=1)
        if xscale:
            plt.xscale(xscale)
        if yscale:
            plt.yscale(yscale)
        if xlim:
            plt.xlim(xlim)
        
    if plot_type == 'bar':
        ax1.set_xlabel(xlabel)
        ax1.set_ylabel(y_1_label, color=colors[0])
        ax1.set_title(title)
        ax1.tick_params(axis='y', labelcolor=colors[0])
        
        ax1.bar(dataset_1[x], dataset_1[y_1], color=colors[0])

        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

        ax2.set_ylabel(y_2_label, color=colors[1])  # we already handled the x-label with ax1
        ax2.tick_params(axis='y', labelcolor=colors[1])
        ax2.bar(dataset_1[x], dataset_2[y_2], color=colors[1])

        fig.tight_layout()  # otherwise the right y-label is slightly clipped
        plt.grid(color='lightgray', linestyle='--', linewidth=1)
    if xticks:
        plt.xticks(xticks)
    plt.savefig(save_path / f'{title}.pdf', dpi=300, format='pdf')

In [None]:
def plot_hist(data, 
              save_path: pathlib.Path=None,  
              title='Hist', 
              legend=None, 
              ylabel='y', 
              xlabel='x', 
              yscale=None,
              bins=None,
              colors=colors
             ):
    """
    Function plots histograms as lines
    
    param data: list of pandas DataFrame's
    type data: list
    """
    if bins:
        bin_values, bin_edges, _ = plt.hist(data, bins=bins, color=colors[0])
    else:
        bin_values, bin_edges, _ = plt.hist(data, color=colors[0])
        
    bin_centers = 0.5*(bin_edges[1:]+bin_edges[:-1])
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    if yscale:
        plt.yscale(yscale)
    plt.title(title)
    if legend:
        plt.legend(legend)
    plt.grid(color='lightgray', linestyle='--', linewidth=1)
    if save_path:
        plt.savefig(save_path / f'{title}.pdf', dpi=300, format='pdf')
    plt.tight_layout()
    plt.show()
    
    return bin_values, bin_edges, bin_centers

In [None]:
def plot_hist_line(data: pd.DataFrame, 
                   column: str, 
                   schemes: List[str], 
                   save_path: pathlib.Path, 
                   title: str, 
                   ylabel='y', 
                   xlabel='x',
                   xscale=None,
                   yscale=None,
                   xlim=(0,4900)
                  ) -> None:
    """
    Function plots histograms as lines
    
    param data: list of pandas DataFrame's
    type data: list
    """
    
#     num_sim = len(data[data.sa_type==schemes[0]][column])
    
#     data_copy = [data[data.sa_type==scheme][data.potential==0.][column] for scheme in schemes]
    
#     bin_values, bin_edges, bin_centers = plot_hist(data_copy, ylabel=ylabel, xlabel=xlabel)
    
#     bin_values = np.array(bin_values) * 100 / num_sim
    
#     for num_set in range(len(data_copy)):
#         if len(data_copy)==1:
#             plt.plot(bin_centers, bin_values)
#         else:
#             plt.plot(bin_centers, bin_values[num_set])

    # data_copy = [data[data.sa_type==scheme][data.potential==0.][column] for scheme in schemes]
    
    fig, ax = plt.subplots(figsize=(7,4))
    sbn.kdeplot(data[column], ax=ax, cumulative=True, bw=0.01)
        
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    if xscale:
        plt.xscale(xscale)
    if yscale:
        plt.yscale(yscale)
    plt.title(title)
    plt.legend(schemes)
    plt.grid(color='lightgray', linestyle='--', linewidth=1)
    plt.xlim(xlim)
    plt.tight_layout()
    plt.savefig(save_path / f'{title}.pdf', dpi=300, format='pdf')
    plt.show()

In [None]:
def plot_bar(data: pd.DataFrame, 
                   column: str, 
                   groupby: str,
                   schemes: List[str], 
                   save_path: pathlib.Path, 
                   title: str, 
                   ylabel='y', 
                   xlabel='x',
                   xscale=None,
                   yscale=None,
                   xlim=(0,4900)
                  ) -> None:
    data.groupby(groupby).mean()[column].plot.bar()
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    if xscale:
        plt.xscale(xscale)
    if yscale:
        plt.yscale(yscale)
    plt.title(title)
    plt.legend(schemes)
    plt.grid(color='lightgray', linestyle='--', linewidth=1)
    plt.tight_layout()
    plt.savefig(save_path / f'{title}.pdf', dpi=300, format='pdf')
    plt.show()

In [None]:
plot_hist_line(df_evencheck_pt_example, 
               schemes=['pt'], 
               column='iteration', 
               save_path=CHARTS_MAIN_PATH, 
               title='Parallel Tempering example run - EVENCHECK',
               ylabel='Cumulative KDE density',
               xlabel='simulation step - when solution was found'
              )

In [None]:
plot_hist(data=df_evencheck_pt_example.switch_ratio,
          save_path=CHARTS_MAIN_PATH,
          title='Histogram of switch_ratio values - PT example',
          xlabel='switch_ratio value',
          ylabel='counts',
          bins=20
         )

## Solution goodness vs switch_step

In [None]:
switch_step_mns = calc_mns_series(df_evencheck_pt_diff_switch_step, groupby='switch_step')

In [None]:
switch_step_cr = calc_cr_series(df_evencheck_pt_diff_switch_step, groupby='switch_step')

In [None]:
plot_bar(switch_step_cr, 
         schemes=['pt'], 
         column='cr',
         groupby='switch_step',
         save_path=CHARTS_MAIN_PATH, 
         title='Parallel Tempering switch_step tuning CR - EVENCHECK',
         ylabel='Correct Rate (CR)',
         xlabel='switch_step'
         )

In [None]:
plot_bar(switch_step_mns, 
         schemes=['pt'], 
         column='mns',
         groupby='switch_step',
         save_path=CHARTS_MAIN_PATH, 
         title='Parallel Tempering switch_step tuning MNS - EVENCHECK',
         ylabel='Mean Number of Steps (MNS)',
         xlabel='switch_step'
         )

## Solution goodness vs Optimal Temperatures ratio

In [None]:
plot_hist_line(df_evencheck_pt_optimal_temp_calc, 
               schemes=['pt'], 
               column='iteration', 
               save_path=CHARTS_MAIN_PATH, 
               title='Parallel Tempering, temperatures calculated - EVENCHECK',
               ylabel='Cumulative KDE density',
               xlabel='simulation step - when solution was found'
              )

#### Comment

Using the technique from the paper on Parallel Tempering - the overall quality of the solution is not better. 

### Acceptance ratio

In [None]:
mean_acc_ratio = df_evencheck_pt_optimal_temp_calc.switch_ratio.mean()

Markdown(f"#### Mean switch ratio: {mean_acc_ratio}")

#### Comment 

The switch ratio is to big for this example. It should be considered to review the parameter in the calculation of optimal temperatures.

In [None]:
plot_hist(data=df_evencheck_pt_optimal_temp_calc.switch_ratio,
          save_path=CHARTS_MAIN_PATH,
          title='Histogram of switch_ratio values - temperature ratio optimization',
          xlabel='switch_ratio value',
          ylabel='counts',
          bins=20
         )

#### Comment

As we can see the acceptance ratio equals to 1 in 2/3 of the simulations. It should be considered as not desired behavior. Therefore the differences in the annealing_parameter between systems should be reconsidered.

## Optimal number of copies

In [None]:
double_axis_plot(calc_mns_series(df_evencheck_pt_optimal_number_of_copies, groupby='pt_copies'),
                 calc_cr_series(df_evencheck_pt_optimal_number_of_copies, groupby='pt_copies'),
                 colors=colors,
                 save_path=CHARTS_MAIN_PATH,
                 legend=['mns', 'cr'],
                 plot_type='scatter',
                 title='CR and MNS for different number_of_copies - Parallel Tempering, EVENCHECK',
                 xlabel='number of pt_copies',
                 y_1_label='Mean Number of Steps',
                 y_2_label='Correct Rate - CR [%]',
                 x='pt_copies',
                 y_1='mns',
                 y_2='cr',
                 xscale=None,
                 xticks=list(df_evencheck_pt_optimal_number_of_copies.pt_copies.unique())
                )

### switch_ratio for the Optimal number of copies

In [None]:
plot_scatter([df_evencheck_pt_optimal_number_of_copies.groupby(['pt_copies']).mean()],
             legend=['evencheck'], 
             save_path=CHARTS_MAIN_PATH, 
             column='switch_ratio',
             title='Mean switch_ratio vs number of pt_copies',
             xlabel='number of pt_copies',
             ylabel='Mean switch_ratio',
             xticks=list(df_evencheck_pt_optimal_number_of_copies.pt_copies.unique())
            )

### optimal number of copies switch_ratio hist

In [None]:
plot_hist(data=df_evencheck_pt_optimal_number_of_copies[df_evencheck_pt_optimal_number_of_copies.pt_copies == 99].switch_ratio,
          save_path=CHARTS_MAIN_PATH,
          title='Histogram of switch_ratio values - number_of_copies optimization',
          xlabel='switch_ratio value',
          ylabel='counts',
          bins=20
         )

## optimal max_steps value

In [None]:
double_axis_plot(calc_mns_series(df_evencheck_pt_optimal_number_of_max_steps, groupby='max_steps'),
                 calc_cr_series(df_evencheck_pt_optimal_number_of_max_steps, groupby='max_steps'),
                 colors=colors,
                 save_path=CHARTS_MAIN_PATH,
                 legend=['mns', 'cr'],
                 plot_type='scatter',
                 title='CR and MNS for different max_steps - Parallel Tempering, EVENCHECK',
                 xlabel='number of max_steps',
                 y_1_label='Mean Number of Steps',
                 y_2_label='Correct Rate - CR [%]',
                 x='max_steps',
                 y_1='mns',
                 y_2='cr',
                 xscale='log',
                 #xticks=list(df_evencheck_pt_optimal_number_of_max_steps.pt_copies.unique())
                )

## KDE for 99 copies

In [None]:
plot_hist_line(df_evencheck_pt_optimal_number_of_copies[df_evencheck_pt_optimal_number_of_copies.pt_copies==99], 
               schemes=['pt'], 
               column='iteration', 
               save_path=CHARTS_MAIN_PATH, 
               title='Parallel Tempering, 99 pt_copies - EVENCHECK',
               ylabel='Cumulative KDE density',
               xlabel='simulation step - when solution was found',
               xlim=(0, 3500)
              )

#### Comment

Seem that more copies of the system could affect the accuracy of the algorithm, however one should say that the experiment was held using the same range of Temperatures, btu different sampling. It seem tought, that number of copies of the system highly affect the accuracy of the solution.