In [21]:
import os
import json
import pandas as pd

def collect_data(main_folder):
    """ Collects the minimum test loss and corresponding parameters across all subfolders in the main folder. """
    results = []

    # Traverse through each subfolder in the main folder
    for subfolder in os.listdir(main_folder):
        subfolder_path = os.path.join(main_folder, subfolder)
        progress_file = os.path.join(subfolder_path, 'progress.csv')
        params_file = os.path.join(subfolder_path, 'params.json')
        
        # Check if both necessary files exist
        if os.path.exists(progress_file) and os.path.exists(params_file):
            try:
                # Read progress.csv and find the minimum test loss
                data = pd.read_csv(progress_file)
                if 'test_loss' in data.columns:
                    min_loss = data['test_loss'].min()
                    # Read params.json
                    with open(params_file, 'r') as file:
                        params = json.load(file)
                        # Collect required params and the corresponding test loss
                        result = {
                            'learning_rate': params.get('learning_rate'),
                            'context_size': params.get('context_size'),
                            'samples': params.get('samples'),
                            'best_test_loss': min_loss
                        }
                        results.append(result)
            except Exception as e:
                print(f"Error processing files in {subfolder_path}: {e}")

    return results

def create_results_table(main_folder):
    """ Creates a table of the minimum test losses for each combination of learning_rate, context_size, and samples. """
    data = collect_data(main_folder)
    if data:
        # Create DataFrame from collected data
        df = pd.DataFrame(data)
        # Group by the parameters and find the row with the minimum test_loss
        result_df = df.groupby(['learning_rate', 'context_size', 'samples']).agg({'best_test_loss': 'min'}).reset_index()
        sorted_result_df = result_df.sort_values(by='best_test_loss', ascending=True)
        print(sorted_result_df)
    else:
        print("No data collected. Check the contents of your directories.")


In [22]:
create_results_table(os.getcwd() + '/ray_results/run_2024-07-11_05-11-10')

   learning_rate  context_size  samples  best_test_loss
5         0.0010             8        2        5.604943
4         0.0010             8        1        7.172198
1         0.0005             8        1        7.583963
7         0.0100             8        1        7.768374
0         0.0005             8        0        8.799016
2         0.0005             8        2        9.354083
6         0.0100             8        0       11.646391
3         0.0010             8        0       12.060120
8         0.0100             8        2       36.306425


In [23]:
create_results_table('/user/ml4723/Prj/NIC/ray_results/run_2024-07-11_11-39-40')

    learning_rate  context_size  samples  best_test_loss
0          0.0005             4        0        5.605410
4          0.0010             4        1        6.441049
5          0.0010             4        2        7.344255
1          0.0005             4        1        7.762958
3          0.0010             4        0        8.088158
10         0.0100             4        3       11.489364
7          0.0100             4        0       11.512672
6          0.0010             4        3       13.766644
9          0.0100             4        2       14.642271
8          0.0100             4        1       16.623400
2          0.0005             4        2       30.744198


In [24]:
create_results_table('/user/ml4723/Prj/NIC/ray_results/run_2024-07-11_17-01-38')

    learning_rate  context_size  samples  best_test_loss
4          0.0005             2        4        6.900608
5          0.0010             2        0        7.325198
1          0.0005             2        1        7.735133
6          0.0010             2        1        7.857705
9          0.0010             2        4        8.699912
11         0.0100             2        1       10.317407
12         0.0100             2        2       11.460215
0          0.0005             2        0       12.818644
13         0.0100             2        3       13.487335
2          0.0005             2        2       16.386970
14         0.0100             2        4       16.590300
7          0.0010             2        2       16.957243
8          0.0010             2        3       18.019910
3          0.0005             2        3       18.183999
10         0.0100             2        0       25.067751


# Below is for n_stores = 50

In [25]:
create_results_table('/user/ml4723/Prj/NIC/ray_results/run_2024-07-11_05-27-18')

   learning_rate  context_size  samples  best_test_loss
0         0.0005           128        0        5.412806
3         0.0010           128        1        5.415018
1         0.0005           128        1        5.506853
2         0.0010           128        0        5.781743
4         0.0100           128        0        7.152602
5         0.0100           128        1       16.189754


In [26]:
create_results_table('/user/ml4723/Prj/NIC/ray_results/run_2024-07-11_07-52-50')

   learning_rate  context_size  samples  best_test_loss
2         0.0010            64        0        5.409906
0         0.0005            64        0        7.076955
3         0.0010            64        1        7.972043
1         0.0005            64        1        8.897128
4         0.0100            64        0       11.613703
5         0.0100            64        1       17.898005


In [27]:
create_results_table('/user/ml4723/Prj/NIC/ray_results/run_2024-07-11_09-41-43')

   learning_rate  context_size  samples  best_test_loss
0         0.0005            32        0        5.411628
1         0.0010            32        0        7.393025
2         0.0100            32        0       16.262605
3         0.0100            32        1       16.342239


In [28]:
create_results_table('/user/ml4723/Prj/NIC/ray_results/run_2024-07-11_11-52-10')

    learning_rate  context_size  samples  best_test_loss
7          0.0010            16        2        5.412956
4          0.0005            16        4        6.980137
3          0.0005            16        3        7.714144
0          0.0005            16        0        8.068696
5          0.0010            16        0        8.379202
8          0.0010            16        3        8.604063
2          0.0005            16        2       11.359692
6          0.0010            16        1       13.784768
13         0.0100            16        3       14.719614
11         0.0100            16        1       15.860225
14         0.0100            16        4       16.307592
1          0.0005            16        1       16.750998
12         0.0100            16        2       19.036855
10         0.0100            16        0       21.494105
9          0.0010            16        4       22.327871


In [29]:
create_results_table('/user/ml4723/Prj/NIC/ray_results/run_2024-07-11_19-32-22')

    learning_rate  context_size  samples  best_test_loss
4          0.0010             8        1        5.413410
6          0.0010             8        3        7.537731
1          0.0005             8        1        7.735478
2          0.0005             8        2        8.919952
0          0.0005             8        0       10.710806
7          0.0100             8        0       11.243040
3          0.0010             8        0       13.570966
8          0.0100             8        1       13.993180
10         0.0100             8        3       16.343363
9          0.0100             8        2       22.620466
5          0.0010             8        2       36.146848


In [30]:
create_results_table('/user/ml4723/Prj/NIC/ray_results/run_2024-07-12_01-44-35')

   learning_rate  context_size  samples  best_test_loss
3         0.0010             4        0        6.828720
6         0.0100             4        0        8.461158
0         0.0005             4        0        8.570793
1         0.0005             4        1       10.900044
4         0.0010             4        1       11.170525
5         0.0010             4        2       15.930729
8         0.0100             4        2       20.107402
2         0.0005             4        2       23.392702
7         0.0100             4        1       27.114183
