# Extract data from Gurobi solve logs and plot results

In [73]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gurobi_logtools as glt

Set path to log files

In [74]:
PATH = ".\experiments\logs"

Parse log files

In [75]:
result = glt.parse(PATH+"\*.log")
summary = result.summary()


Get node log

In [76]:

nl = result.progress("nodelog")
print(nl[~nl['Incumbent'].isna()])
nl.head()

      CurrentNode  RemainingNodes      Obj  Depth  IntInf  Incumbent  \
11          41072          2475.0      NaN    NaN     NaN   0.000979   
12          42799          2278.0      NaN   68.0     NaN   0.000980   
13          48825          2749.0      NaN    NaN     NaN   0.000925   
14          49687          2812.0      NaN    NaN     NaN   0.000925   
15          52387          3268.0      NaN   62.0     NaN   0.000930   
...           ...             ...      ...    ...     ...        ...   
3547      4330323        373050.0      NaN    NaN     NaN   0.000206   
3548      4335351        373708.0      NaN    NaN     NaN   0.000206   
3549      4340509        374557.0  0.00021   68.0    50.0   0.000210   
3550      4345310        372320.0      NaN    NaN     NaN   0.000206   
3551      4345318             NaN      NaN    NaN     NaN   0.000206   

        BestBd       Gap  ItPerNode     Time  Pruned NewSolution  \
11    0.000210  0.790000       37.2    13.00     NaN           H   

  left=pd.concat(progress),


Unnamed: 0,CurrentNode,RemainingNodes,Obj,Depth,IntInf,Incumbent,BestBd,Gap,ItPerNode,Time,Pruned,NewSolution,LogFilePath,LogNumber,Seed,Version
0,0,0.0,0.00021,0.0,57.0,,0.00021,,,0.0,,,.\experiments\logs\Traj_all_r1_c1.log,1,0,11.0.2
1,0,0.0,0.00021,0.0,57.0,,0.00021,,,0.0,,,.\experiments\logs\Traj_all_r1_c1.log,1,0,11.0.2
2,0,0.0,0.00021,0.0,102.0,,0.00021,,,0.0,,,.\experiments\logs\Traj_all_r1_c1.log,1,0,11.0.2
3,0,0.0,0.00021,0.0,102.0,,0.00021,,,0.0,,,.\experiments\logs\Traj_all_r1_c1.log,1,0,11.0.2
4,0,0.0,0.00021,0.0,107.0,,0.00021,,,0.0,,,.\experiments\logs\Traj_all_r1_c1.log,1,0,11.0.2


Add repetition and configuration details to node log dataframe

In [77]:
regex_pattern = r'_r(\d+)_c(\d+)\.log'
nl[['Repetition', 'Config']] = nl['LogFilePath'].str.extract(regex_pattern)
nl.head()

# nl['Repetition'] = pd.to_numeric(nl['Repetition'])
# nl = nl[nl['Repetition'] > 4] # only r5
# print(nl['Repetition'].unique())
# print(nl)

Unnamed: 0,CurrentNode,RemainingNodes,Obj,Depth,IntInf,Incumbent,BestBd,Gap,ItPerNode,Time,Pruned,NewSolution,LogFilePath,LogNumber,Seed,Version,Repetition,Config
0,0,0.0,0.00021,0.0,57.0,,0.00021,,,0.0,,,.\experiments\logs\Traj_all_r1_c1.log,1,0,11.0.2,1,1
1,0,0.0,0.00021,0.0,57.0,,0.00021,,,0.0,,,.\experiments\logs\Traj_all_r1_c1.log,1,0,11.0.2,1,1
2,0,0.0,0.00021,0.0,102.0,,0.00021,,,0.0,,,.\experiments\logs\Traj_all_r1_c1.log,1,0,11.0.2,1,1
3,0,0.0,0.00021,0.0,102.0,,0.00021,,,0.0,,,.\experiments\logs\Traj_all_r1_c1.log,1,0,11.0.2,1,1
4,0,0.0,0.00021,0.0,107.0,,0.00021,,,0.0,,,.\experiments\logs\Traj_all_r1_c1.log,1,0,11.0.2,1,1


Get metrics and average them over repetitions for each configuration

In [80]:

# Ensure columns are numeric, converting non-numeric entries to NaN
for col in ['Time', 'CurrentNode', 'NewSolution', 'Gap', 'Incumbent']:
    nl[col] = pd.to_numeric(nl[col], errors='coerce')

# Define helper functions for individual column aggregations
def time_to_first_feasible(series):
    feasible_times = series.dropna()
    return feasible_times.min() if not feasible_times.empty else np.nan

def solutions(series):
    feasible_times = series.dropna()
    return feasible_times.min() if not feasible_times.empty else np.nan

def final_value(series):
    valid_values = series.dropna()
    return valid_values.iloc[-1] if not valid_values.empty else np.nan

def unique_solutions(series):
    return series.dropna().nunique()

# Aggregate metrics per Config and Repetition
config_metrics = (
    nl.groupby(['Config', 'Repetition']).agg(
        Time_to_optimal=('Time', 'max'),
        Time_to_first_feasible=('Time', time_to_first_feasible),
        Nodes_explored=('CurrentNode', 'max'),
        Solutions_found=('Incumbent', unique_solutions),
        Final_gap=('Gap', final_value),
        Objective_value=('Incumbent', final_value)
    )
    .reset_index()
)

# Average the metrics over the two repetitions for each configuration
average_metrics = config_metrics.groupby('Config').mean(numeric_only=True).reset_index()

# Display the resulting DataFrame
average_metrics.columns = [
    'Config', 'Avg_Time_to_optimal', 'Avg_Time_to_first_feasible', 'Avg_Nodes_explored',
    'Avg_Solutions_found', 'Avg_Final_gap', 'Avg_Objective_value'
]

average_metrics

Unnamed: 0,Config,Avg_Time_to_optimal,Avg_Time_to_first_feasible,Avg_Nodes_explored,Avg_Solutions_found,Avg_Final_gap,Avg_Objective_value
0,1,2792.62,0.0,9162223.0,79.0,4e-06,0.000206
1,2,3600.09,0.0,11584869.0,43.0,0.397267,0.000341
2,3,771.34,0.0,2257005.0,69.0,4e-06,0.000206
3,4,790.78,0.0,2358770.0,22.0,0.0,0.000206
4,5,1421.75,0.0,4345318.0,55.0,8.7e-05,0.000206
5,6,384.18,0.0,1091379.0,51.0,6.4e-05,0.000206
6,7,3600.01,0.0,50605152.0,0.0,,


Get MEAN and STD of objective bounds

In [None]:
grouped_nl = nl.groupby(['Config', 'Time']).agg({
    'Incumbent': ['mean', 'std'],
    'BestBd': ['mean', 'std']
}).reset_index()

configs = grouped_nl['Config'].unique()
grouped_nl.head()

Unnamed: 0_level_0,Config,Time,Incumbent,Incumbent,BestBd,BestBd
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std


In [None]:
grouped_nl.columns = ['Config', 'Time', 'Incumbent_mean', 'Incumbent_std', 'BestBd_mean', 'BestBd_std']
grouped_nl["Incumbent_std"] = grouped_nl["Incumbent_std"].fillna(0)
grouped_nl["BestBd_std"] = grouped_nl["BestBd_std"].fillna(0)
grouped_nl.head()

Unnamed: 0,Config,Time,Incumbent_mean,Incumbent_std,BestBd_mean,BestBd_std


Give names to the different configurations

In [None]:
combinations = {
    'All': {'LN_I': 1, 'MHA_I': 1, 'MHA_D': 1, 'MHA_MC': 1},
    'No_MC': {'LN_I': 1, 'MHA_I': 1, 'MHA_D': 1, 'MHA_MC': 0},
    'I_only': {'LN_I': 1, 'MHA_I': 1, 'MHA_D': 0, 'MHA_MC': 0},
    'LN_only': {'LN_I': 1, 'MHA_I': 0, 'MHA_D': 0, 'MHA_MC': 0},
    'Prop_MC': {'LN_I': 1, 'MHA_I': 0, 'MHA_D': 1, 'MHA_MC': 1},
    'LN_prop': {'LN_I': 1, 'MHA_I': 0, 'MHA_D': 1, 'MHA_MC': 0},
    'None': {'LN_I': 0, 'MHA_I': 0, 'MHA_D': 0, 'MHA_MC': 0},
}


Plot the objective bounds convergence over time

In [None]:

individ = False
if individ:
    grouped_nl = nl
    #combi = grouped_nl[['Config']].drop_duplicates()#grouped_nl[['Repetition', 'Config']].drop_duplicates()
    configs = grouped_nl['Config'].unique()#combi.to_records(index=False).tolist()
else:
    # Find the global min and max values for both the x and y axes across all configurations
    x_min = 10#grouped_nl['Time'].min()
    x_max = grouped_nl['Time'].max()

    y_min = 1e-4 #min(grouped_nl['Incumbent_mean'].min(), grouped_nl['BestBd_mean'].min())
    y_max = max(grouped_nl['Incumbent_mean'].max(), grouped_nl['BestBd_mean'].max())


# Initialize the figure size
colors = plt.get_cmap('tab10', len(grouped_nl['Config'].unique()))
config_names = list(combinations.keys())
    
# Plot each configuration using the same x and y axis limits
for i, config in enumerate(configs):
    print(config)
    plt.figure(i, figsize=(4, 3))
    color = colors(i)

    # Plot Incumbent mean
    if not individ:
        config_data = grouped_nl[grouped_nl['Config'] == config]
        plt.plot(config_data['Time'], config_data['Incumbent_mean'], '-', label=f'{config_names[i]} - Incumbent', color=color)
        plt.plot(config_data['Time'], config_data['BestBd_mean'], linestyle='--', label=f'{config_names[i]} - BestBd', color='k')
    else:
        config_data = grouped_nl[(grouped_nl['Config'] == config)] # & (grouped_nl['Repetition'] == config[0])]
        plt.plot(config_data['Time'], config_data['Incumbent'], linestyle='-', label=f'{config}_{i}')
        plt.plot(config_data['Time'], config_data['BestBd'], linestyle='--', label=f'{config}_{i}')

    
    # Set to log scale
    plt.yscale('log')
    plt.xscale('log')

    # Set the same x and y limits for all plots
    if not individ:   
        plt.xlim(x_min, x_max)
        plt.ylim(y_min, y_max)

    # Add labels
    plt.grid(True)
    plt.xlabel('Time (seconds)')
    plt.ylabel('Objective Value')
    plt.legend()
    

    # PATH = r"C:\Users\sian_\OneDrive\Documents\Thesis"
    # plt.savefig(PATH+f'\images\{config_names[i]}_time_conv.png')  
    # #plt.show()
     
