In [1]:
import pandas as pd
import numpy as np

In [2]:
def maze_summary(dataframe, category):
    '''
    Calculate the means and standard deviations of certain categories in the dataframe.
    dataframe: data table, pandas dataframe
    category: user specified categories for calculation, python list
    '''
    maze_list = dataframe['maze'].unique().tolist()
    mode_list = dataframe['mode'].unique().tolist()
    summary_mean = pd.DataFrame()
    summary_std = pd.DataFrame()
    for maze in maze_list:
        for mode in mode_list:
            data = dataframe[(dataframe['maze'] == maze) & (dataframe['mode'] == mode)][category]
            data_mean = np.around(data.mean().values, decimals = 2).tolist()
            data_std = np.around(data.std().values, decimals = 2).tolist()
            summary_mean = summary_mean.append([[maze, mode] + data_mean], ignore_index = True)
            summary_std = summary_std.append([[maze, mode] + data_std], ignore_index = True)
    return (summary_mean, summary_std)

In [3]:
def file_process(file_list):
    '''
    Process files for maze_summary.
    file_list: file name list, python list
    '''
    summary_mean = pd.DataFrame()
    summary_std = pd.DataFrame()
    for file in file_list:
        dataframe = pd.read_csv(file, sep = ',')
        category = dataframe.columns.values.tolist()[2:]
        summary = maze_summary(dataframe = dataframe, category = category)
        summary_mean = summary_mean.append(summary[0], ignore_index = True)
        summary_std = summary_std.append(summary[1], ignore_index = True)
    return (summary_mean, summary_std)

In [4]:
def summary_process(dataframe_list):
    '''
    Merge statistics of multiple dataframe in a list.
    dataframe_list: dataframe list, python list
    '''
    summary_mean = pd.DataFrame()
    summary_std = pd.DataFrame()
    for dataframe in dataframe_list:
        category = dataframe.columns.values.tolist()[2:]
        summary = maze_summary(dataframe = dataframe, category = category)
        summary_mean = summary_mean.append(summary[0], ignore_index = True)
        summary_std = summary_std.append(summary[1], ignore_index = True)
    return (summary_mean, summary_std)

In [5]:
file_list = ['test_result_maze_01.csv', 'test_result_maze_02.csv', 'test_result_maze_03.csv']

In [6]:
dataframe_list = list()
for file in file_list:
    dataframe_list.append(pd.read_csv(file, sep = ','))

In [7]:
# Take a look at the data
dataframe_list[0].head()

Unnamed: 0,maze,mode,num_actions_1,length_movement_1,num_actions_2,length_movement_2,true_coverage,score
0,1,complete,219.0,284.0,17.0,32.0,1.0,24.866667
1,1,complete,189.0,230.0,17.0,32.0,1.0,23.866667
2,1,complete,191.0,222.0,17.0,32.0,1.0,23.933333
3,1,complete,195.0,240.0,17.0,32.0,1.0,24.066667
4,1,complete,213.0,262.0,17.0,32.0,1.0,24.666667


In [8]:
# Add maze_size information
# Calculate exploration efficiency
maze_size_dict = {1: 144, 2: 196, 3: 256}
for i in xrange(len(dataframe_list)):
    dataframe_list[i]['maze_size'] = dataframe_list[i]['maze'].map(lambda x: maze_size_dict[x])
    dataframe_list[i]['num_grid_per_action_1'] = dataframe_list[i]['maze_size'] * dataframe_list[i]['true_coverage'] / dataframe_list[i]['num_actions_1']
    dataframe_list[i]['num_grid_per_length_1'] = dataframe_list[i]['maze_size'] * dataframe_list[i]['true_coverage'] / dataframe_list[i]['length_movement_1']

In [9]:
dataframe_list[0].head()

Unnamed: 0,maze,mode,num_actions_1,length_movement_1,num_actions_2,length_movement_2,true_coverage,score,maze_size,num_grid_per_action_1,num_grid_per_length_1
0,1,complete,219.0,284.0,17.0,32.0,1.0,24.866667,144,0.657534,0.507042
1,1,complete,189.0,230.0,17.0,32.0,1.0,23.866667,144,0.761905,0.626087
2,1,complete,191.0,222.0,17.0,32.0,1.0,23.933333,144,0.753927,0.648649
3,1,complete,195.0,240.0,17.0,32.0,1.0,24.066667,144,0.738462,0.6
4,1,complete,213.0,262.0,17.0,32.0,1.0,24.666667,144,0.676056,0.549618


In [10]:
column_names = ['Maze', 'Mode', 'Time Steps in First Run', 'Path Length in First Run', 
                'Time Steps in Second Run', 'Path Length in Second Run', 'Coverage', 'Score', 'Maze Size', 'Number of Grid Visited Per Time Step of First Run', 'Number of Grid Visited Per Path Length of First Run']

In [11]:
summary = summary_process(dataframe_list)
summary_mean = summary[0]
summary_std = summary[1]

In [12]:
summary_mean.columns = column_names
summary_std.columns = column_names

In [13]:
summary_mean

Unnamed: 0,Maze,Mode,Time Steps in First Run,Path Length in First Run,Time Steps in Second Run,Path Length in Second Run,Coverage,Score,Maze Size,Number of Grid Visited Per Time Step of First Run,Number of Grid Visited Per Path Length of First Run
0,1,complete,201.35,248.1,17.0,32.0,1.0,24.28,144.0,0.72,0.58
1,1,incomplete,138.3,167.6,17.65,32.3,0.71,22.85,144.0,0.74,0.61
2,2,complete,278.95,341.0,22.0,43.0,1.0,32.03,196.0,0.7,0.58
3,2,incomplete,172.55,207.3,24.1,43.7,0.65,30.66,196.0,0.74,0.61
4,3,complete,353.1,442.1,25.0,49.0,1.0,37.6,256.0,0.73,0.58
5,3,incomplete,189.35,233.0,26.55,50.8,0.56,33.75,256.0,0.75,0.61


In [14]:
summary_std

Unnamed: 0,Maze,Mode,Time Steps in First Run,Path Length in First Run,Time Steps in Second Run,Path Length in Second Run,Coverage,Score,Maze Size,Number of Grid Visited Per Time Step of First Run,Number of Grid Visited Per Path Length of First Run
0,1,complete,8.29,15.97,0.0,0.0,0.0,0.28,0.0,0.03,0.04
1,1,incomplete,49.79,61.42,1.04,1.49,0.24,1.5,0.0,0.03,0.04
2,2,complete,10.39,17.25,0.0,0.0,0.0,0.35,0.0,0.03,0.03
3,2,incomplete,49.42,60.03,2.47,1.63,0.19,1.81,0.0,0.04,0.04
4,3,complete,16.15,29.81,0.0,0.0,0.0,0.54,0.0,0.03,0.04
5,3,incomplete,75.16,92.34,1.28,2.04,0.23,1.72,0.0,0.04,0.05


In [15]:
summary_mean.to_csv('summary_mean.csv', sep = ',', index = False)
summary_std.to_csv('summary_std.csv', sep = ',', index = False)