In [59]:
import os
import pandas as pd
import matplotlib.pyplot as plt


In [60]:
WORK_DIR = 'result'
STEEPEST = 'steepest.csv'
GREEDY = 'greedy.csv'
HEURISTIC = 'heuristic.csv'
RANDOM = 'random.csv'

In [97]:
def column_plot(column, df, path):
    mean = df.groupby(by = ['algorithm'])[column].describe()['mean']
    std = df.groupby(by = ['algorithm'])[column].describe()['std']
    best =  df.groupby(by = ['algorithm'])[column].describe()['min']
    f = mean.plot.bar(yerr=std, figsize = (10,10), title = column)
    best.plot.line(ax = f,  style = 'bo')
    plt.savefig(os.path.join(path, column + '_by_algorithm.png'))
    plt.cla()
    
def quality_plot(df, path):
    df['startQuality'] =  df['firstSolutionCost'] - df['optimalSolutionCost']
    df['finalQuality'] =  df['cost'] - df['optimalSolutionCost']
    
    for alg, alg_df in df.groupby(by = ['algorithm']):
        alg_df.plot.scatter('startQuality', 'finalQuality', figsize = (10,10), title = alg)
        plt.savefig(os.path.join(path, 'quality' + '_by_' + alg +'.png'))
        plt.cla()

def join_df(instance_dir, sample_size = 10):
    random_df = pd.read_csv(os.path.join(instance_dir, RANDOM), quotechar = "'").sample(sample_size)
    random_df['algorithm'] = 'random'
    greedy_df = pd.read_csv(os.path.join(instance_dir, GREEDY), quotechar = "'").sample(sample_size)
    greedy_df['algorithm'] = 'greedy'
    heuristic_df = pd.read_csv(os.path.join(instance_dir, HEURISTIC), quotechar = "'").sample(sample_size)
    heuristic_df['algorithm'] = 'heuristic'
    steepest_df = pd.read_csv(os.path.join(instance_dir, STEEPEST), quotechar = "'").sample(sample_size)
    steepest_df['algorithm'] = 'steepest'
    all_df = pd.concat([random_df, greedy_df, heuristic_df, steepest_df])
    return all_df
    
def make_plots(instance_dir):
    path = os.path.join(instance_dir, "plots")
    if not os.path.isdir(path):
        os.mkdir(path)
    sample_df = join_df(instance_dir, sample_size = 10)
    sample_df['efficiency'] =  sample_df['cost'] / sample_df['timeMillis']
    sample_df['quality'] = sample_df['cost'] - sample_df['optimalSolutionCost']
    column_plot('stepsCount', sample_df, path)
    column_plot('solutionsChecked', sample_df, path)
    column_plot('quality', sample_df, path)
    column_plot('efficiency', sample_df, path)
    all_df = join_df(instance_dir, sample_size = 200)
    quality_plot(all_df, path)

    
    
    

In [98]:
for dir in os.listdir(WORK_DIR):
    instance_dir = os.path.join(WORK_DIR, dir)
    print(instance_dir)
    make_plots(instance_dir)

result\bur26b




result\bur26e
result\bur26g
result\nug28
result\tail20a
result\tail2a
result\tail5a
result\tail7a


In [72]:
df = join_df('result\\bur26b')

In [94]:
    df['startQuality'] =  df['firstSolutionCost'] - df['optimalSolutionCost']
    df['finalQuality'] =  df['cost'] - df['optimalSolutionCost']
    
    for alg, alg_df in df.groupby(by = ['algorithm']):
        print(alg_df.head())
        alg_df = alg_df.sort_values(by = 'finalQuality')
        print(alg_df.head())
        break
#         alg_df.plot.scatter('startQuality', 'finalQuality', figsize = (10,10), title = alg)
#       plt.savefig(os.path.join(path, 'quality' + '_by_' + alg +'.png'))
#         plt.cla()

     timeMillis  stepsCount  solutionsChecked       cost  \
185         872         112              7646  3833587.0   
6           428         100              4926  3828036.0   
91          557          94              5494  3841599.0   
165         663         131              7785  3827812.0   
67          339          84              3986  3830626.0   

                                              solution  optimalSolutionCost  \
185  [23, 22, 18, 13, 14, 17, 25, 11, 8, 26, 20, 19...            3817852.0   
6    [12, 11, 2, 6, 8, 3, 1, 16, 4, 7, 18, 9, 14, 1...            3817852.0   
91   [25, 4, 1, 22, 16, 2, 26, 6, 12, 14, 9, 3, 5, ...            3817852.0   
165  [1, 4, 10, 8, 11, 13, 12, 6, 2, 18, 5, 7, 9, 2...            3817852.0   
67   [11, 15, 4, 2, 8, 12, 6, 13, 26, 18, 9, 5, 20,...            3817852.0   

                                       optimalSolution  firstSolutionCost  \
185  [17, 11, 26, 7, 4, 14, 6, 22, 23, 18, 5, 9, 1,...          4223808.0   
6    [17, 

In [55]:
df.head()

Unnamed: 0,timeMillis,stepsCount,solutionsChecked,cost,solution,optimalSolutionCost,optimalSolution,algorithm,quality
0,310,3370,3370,3933839.0,"[10, 6, 11, 14, 8, 23, 1, 13, 26, 15, 20, 2, 1...",3817852.0,"[17, 11, 26, 7, 4, 14, 6, 22, 23, 18, 5, 9, 1,...",random,12689.803226
1,310,3175,3175,3930530.0,"[18, 12, 6, 11, 26, 8, 4, 7, 15, 1, 19, 20, 17...",3817852.0,"[17, 11, 26, 7, 4, 14, 6, 22, 23, 18, 5, 9, 1,...",random,12679.129032
2,310,3384,3384,3956913.0,"[11, 23, 16, 4, 13, 3, 17, 24, 12, 21, 20, 6, ...",3817852.0,"[17, 11, 26, 7, 4, 14, 6, 22, 23, 18, 5, 9, 1,...",random,12764.235484
3,310,3356,3356,3929602.0,"[25, 11, 2, 13, 10, 26, 16, 8, 1, 19, 14, 18, ...",3817852.0,"[17, 11, 26, 7, 4, 14, 6, 22, 23, 18, 5, 9, 1,...",random,12676.135484
4,310,3431,3431,3954514.0,"[21, 24, 22, 7, 12, 20, 3, 1, 15, 10, 6, 11, 9...",3817852.0,"[17, 11, 26, 7, 4, 14, 6, 22, 23, 18, 5, 9, 1,...",random,12756.496774
