In [None]:
# output of the GPT 4v csv
data_file = "../keyword_correctness/runs/prasann_experiment_step_1_variant_0_27204515.csv"

# log file of the GPT 4v run (it will be .txt file, simply rename it to .log)
log_file = '../keyword_correctness/executionlogs.log'

In [None]:
import pandas as pd
results_df = pd.read_csv(data_file)

In [None]:
print(results_df.columns)

In [None]:
len(results_df)

In [None]:
failed_rows = results_df[results_df['Status'] == 'Failed']
sorted_failed_rows = failed_rows.sort_values(by='inputs.row_index')
# print(sorted_failed_rows)

len(sorted_failed_rows)

### Load the log file into a Dataframe

In [None]:
import pandas as pd
import re
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)


def parse_log_file(log_file_path):
    with open(log_file_path, 'r') as file:
        lines = file.readlines()

    data = []
    for line in lines:
        match = re.match(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \+\d{4})\s+(\d+)\s+(\w+)\s+(\w+)\s+(.*)', line)
        if match:
            data.append(match.groups())

    
    df = pd.DataFrame(data, columns=['timestamp', 'process_id', 'module', 'log_level', 'message'])
    df['timestamp'] = pd.to_datetime(df['timestamp']).dt.tz_localize(None)

    # Set data types for each column
    df = df.astype({
        'timestamp': 'datetime64[ns]',  # convert to datetime type
        'process_id': 'int32',  # convert to integer type
        'module': 'string',  # convert to string type
        'log_level': 'category',  # convert to categorical type
        'message': 'string'
    })
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    return df

log_df = parse_log_file(log_file_path=log_file)

In [None]:
import re

error_rows = log_df[log_df['log_level'] == 'ERROR'].copy()

# set the failed line number from the log message
error_rows['failed_line_num'] = error_rows['message'].str.extract(r'line (\d+) failed')
error_rows = error_rows.dropna(subset=['failed_line_num'])
error_rows['failed_line_num'] = error_rows['failed_line_num'].astype('int64')

error_rows = error_rows.dropna(subset=['timestamp', 'process_id', 'module', 'log_level', 'message', 'failed_line_num'])

bad_request_rows = error_rows[error_rows['message'].str.contains('BadRequestError', regex=False)]

errors_not_bad_requests = error_rows[~error_rows['message'].str.contains('BadRequestError', regex=False)]

print(f"Total errors: {len(error_rows)}")
print(f"Total bad requests: {len(bad_request_rows)}")
print(f"Not bad requests: {len(errors_not_bad_requests)}")

In [None]:
combine_logs_and_error_dfs = sorted_failed_rows.merge(error_rows, left_on='Line number', right_on='failed_line_num', how='left')

In [None]:
combine_logs_and_error_dfs.columns

In [None]:
# Errored FSNs that are not in the logs

rows_not_in_logs = combine_logs_and_error_dfs[combine_logs_and_error_dfs['failed_line_num'].isna() | combine_logs_and_error_dfs['failed_line_num'].eq('')]
rows_not_in_logs.columns

print(rows_not_in_logs[['Line number','inputs.fsn']].to_string(index=False))


In [None]:
print(bad_request_rows['message'])