In [1]:
from pathlib import Path
import re
import pandas as pd

# Base directory for logs (change if needed)
BASE_LOG_DIR = Path('/code/Logic-LLM-main/baselines/logs')

print('Using log base dir:', BASE_LOG_DIR.resolve())

Using log base dir: /code/Logic-LLM-main/baselines/logs


In [2]:
test_path = next(BASE_LOG_DIR.rglob('*.log'), None)
print('Example log file:' , test_path)

Example log file: /code/Logic-LLM-main/baselines/logs/qwen14/RAG_embedding/FOLIO__ProofWriter/shot4.log


In [3]:
# Regex patterns for the two kinds of summary lines
PAT_GSM8K = re.compile(r"GSM8K Exact Match Accuracy:\s*([0-9.]+)%\s*\((\d+)/(\d+)\)")
PAT_EM = re.compile(r"EM:\s*([0-9.]+)")

def parse_log_file(path: Path):
    """Parse a single log file and return a dict with metadata + metrics.
    Returns None if no EM line is found.
    """
    text = path.read_text(encoding='utf-8', errors='ignore')

    em = None
    em_type = None
    correct = None
    total = None

    # Try GSM8K-style line first
    m = PAT_GSM8K.search(text)
    if m:
        em = float(m.group(1)) / 100.0
        correct = int(m.group(2))
        total = int(m.group(3))
        em_type = 'gsm8k_exact_match'
    else:
        # Try generic EM line
        m2 = PAT_EM.search(text)
        if m2:
            em = float(m2.group(1))
            em_type = 'em_generic'

    if em is None:
        return None

    # Parse metadata from path: logs/MODEL/MODE_DBTYPE/SRC__TGT/shotX.log
    # Example: logs/qwen3/RAG_bm25/gsm8k__ProntoQA/shot2.log
    parts = path.parts
    # find 'logs' in parts to be robust
    try:
        idx = parts.index('logs')
    except ValueError:
        # fallback: assume direct structure
        idx = 0

    model_name = parts[idx+1] if len(parts) > idx+1 else None
    mode_dbtype = parts[idx+2] if len(parts) > idx+2 else None
    src_tgt = parts[idx+3] if len(parts) > idx+3 else None
    shot_file = parts[idx+4] if len(parts) > idx+4 else parts[-1]

    src, tgt = None, None
    if src_tgt and '__' in src_tgt:
        src, tgt = src_tgt.split('__', 1)
    
    mode, db_type = None, None
    if mode_dbtype and '_' in mode_dbtype:
        mode, db_type = mode_dbtype.split('_')
    
    shot = None
    m_shot = re.match(r"shot(\d+)\.log", shot_file)
    if m_shot:
        shot = int(m_shot.group(1))

    return {
        'log_path': str(path),
        'model_name': model_name,
        'mode': mode,
        'db_type': db_type,
        'source_domain': src,
        'target_domain': tgt,
        'shot': shot,
        'em': em,
        'em_type': em_type,
        'correct': correct,
        'total': total,
    }

test_path = next(BASE_LOG_DIR.rglob('*.log'), None)
print('Example log file:' , test_path)

Example log file: /code/Logic-LLM-main/baselines/logs/qwen14/RAG_embedding/FOLIO__ProofWriter/shot4.log


In [4]:
# Walk through all .log files and parse results
records = []
for log_file in BASE_LOG_DIR.rglob('*.log'):
    rec = parse_log_file(log_file)
    if rec is not None:
        records.append(rec)

print(f'Parsed {len(records)} log files with EM info.')
df = pd.DataFrame(records)
df

Parsed 162 log files with EM info.


Unnamed: 0,log_path,model_name,mode,db_type,source_domain,target_domain,shot,em,em_type,correct,total
0,/code/Logic-LLM-main/baselines/logs/qwen14/RAG...,qwen14,RAG,embedding,FOLIO,ProofWriter,4.0,0.708333,em_generic,,
1,/code/Logic-LLM-main/baselines/logs/qwen14/RAG...,qwen14,RAG,embedding,FOLIO,ProofWriter,2.0,0.681667,em_generic,,
2,/code/Logic-LLM-main/baselines/logs/qwen14/RAG...,qwen14,RAG,embedding,FOLIO,ProofWriter,3.0,0.691667,em_generic,,
3,/code/Logic-LLM-main/baselines/logs/qwen14/RAG...,qwen14,RAG,embedding,FOLIO,ProofWriter,1.0,0.676667,em_generic,,
4,/code/Logic-LLM-main/baselines/logs/qwen14/RAG...,qwen14,RAG,embedding,gsm8k,FOLIO,4.0,0.710784,em_generic,,
...,...,...,...,...,...,...,...,...,...,...,...
157,/code/Logic-LLM-main/baselines/logs/qwen7/RAG_...,qwen7,RAG,embedding,ProofWriter,AR-LSAT,1.0,0.239130,em_generic,,
158,/code/Logic-LLM-main/baselines/logs/qwen7/RAG_...,qwen7,RAG,embedding,LogicalDeduction,FOLIO,4.0,0.681373,em_generic,,
159,/code/Logic-LLM-main/baselines/logs/qwen7/RAG_...,qwen7,RAG,embedding,LogicalDeduction,FOLIO,2.0,0.681373,em_generic,,
160,/code/Logic-LLM-main/baselines/logs/qwen7/RAG_...,qwen7,RAG,embedding,LogicalDeduction,FOLIO,3.0,0.676471,em_generic,,


In [5]:
df.to_csv('./all_results.csv', index=False)