In [16]:
import pandas as pd

def parse_train_log(file_path, return_type='df'):
    """
    Parses the train_log.txt file and returns the data as either a dictionary or a pandas DataFrame.
    Data is nested under 'val_seen' and 'val_unseen' keys, excluding these keys from the nested dictionaries.

    Parameters:
    - file_path (str): The path to the train_log.txt file.
    - return_type (str): The type of data structure to return. Can be 'dict' or 'df'.

    Returns:
    - data (dict or DataFrame): The parsed data.
    """
    # Read the file
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Initialize the data structure
    data = {'val_seen': [], 'val_unseen': []}

    # Parse each line
    for line in lines:
        if line.startswith('val_seen:'):
            # Split the line into key-value pairs
            pairs = line.split(', ')
            # Create a dictionary for each line, excluding 'val_seen' and 'val_unseen' as keys
            row_dict = {}
            for pair in pairs:
                key, value = pair.split(':')
                if key == 'val_seen' or key == 'val_unseen':
                    continue
                # Convert numeric values to float
                if value.replace('.', '', 1).isdigit():
                    value = float(value)
                # Remove newline character from 'trajectory steps' value
                if key == 'trajectory steps':
                    value = value.strip()
                row_dict[key] = value
            data['val_seen'].append(row_dict)
        elif line.startswith('val_unseen:'):
            # Similar parsing logic for 'val_unseen', excluding 'val_unseen' as a key
            pairs = line.split(', ')
            row_dict = {}
            for pair in pairs:
                key, value = pair.split(':')
                if key == 'val_seen' or key == 'val_unseen':
                    continue
                if value.replace('.', '', 1).isdigit():
                    value = float(value)
                # Remove newline character from 'trajectory steps' value
                if key == 'trajectory steps':
                    value = value.strip()
                row_dict[key] = value
            data['val_unseen'].append(row_dict)

    # Convert to DataFrame if requested
    if return_type == 'df':
        # Convert nested dictionaries to DataFrame
        df_seen = pd.DataFrame(data['val_seen'])
        df_unseen = pd.DataFrame(data['val_unseen'])
        # Combine DataFrames if needed
        return df_seen, df_unseen
    elif return_type == 'dict':
        return data
    else:
        raise ValueError("Invalid return_type. Must be either 'dict' or 'df'.")

# Example usage
file_path = '/home/qid/minghanli/HC3D_simulator/tasks/DT_miniGPT/models/miniGPT_random_teacher_reward_strategy_3/train_log.txt'
data_dict = parse_train_log(file_path, return_type='dict')
data_df = parse_train_log(file_path, return_type='df')

print("Data as dictionary:")
print(data_dict)

print("\nData as DataFrame:")
print(data_df)

Data as dictionary:
{'val_seen': [{'length': ' 6.726', 'nav_error': ' 9.511', 'oracle success_rate': ' 0.154', 'success_rate': ' 0.131', 'spl': ' nan', 'total_hits_rate': ' 0.497', 'hits_rate': ' 0.191', 'politeness_rate': ' 0.126', 'successes_nohits_rate': ' 0.106', 'hits_weighted_success_rate': ' 0.108', 'trajectory steps': '9.508'}], 'val_unseen': [{'length': ' 6.621', 'nav_error': ' 9.536', 'oracle success_rate': ' 0.107', 'success_rate': ' 0.085', 'spl': ' nan', 'total_hits_rate': ' 0.499', 'hits_rate': ' 0.132', 'politeness_rate': ' 0.082', 'successes_nohits_rate': ' 0.063', 'hits_weighted_success_rate': ' 0.086', 'trajectory steps': '10.022'}]}

Data as DataFrame:
(   length nav_error oracle success_rate success_rate   spl total_hits_rate  \
0   6.726     9.511               0.154        0.131   nan           0.497   

  hits_rate politeness_rate successes_nohits_rate hits_weighted_success_rate  \
0     0.191           0.126                 0.106                      0.108   

 

In [17]:
data_df[0]

Unnamed: 0,length,nav_error,oracle success_rate,success_rate,spl,total_hits_rate,hits_rate,politeness_rate,successes_nohits_rate,hits_weighted_success_rate,trajectory steps
0,6.726,9.511,0.154,0.131,,0.497,0.191,0.126,0.106,0.108,9.508


In [18]:
data_df[1]

Unnamed: 0,length,nav_error,oracle success_rate,success_rate,spl,total_hits_rate,hits_rate,politeness_rate,successes_nohits_rate,hits_weighted_success_rate,trajectory steps
0,6.621,9.536,0.107,0.085,,0.499,0.132,0.082,0.063,0.086,10.022
