In [1]:
import re
import os
import pandas as pd
from typing import List, Tuple, Dict, Optional
from glob import glob

In [14]:
log_file_loc = "/group/glastonbury/alex/yolov8_workspace/SAM_FT/SLURM"

In [15]:
def extract_metrics_from_log(file_path: str) -> List[Dict[str, any]]:
    """
    Extract class dsc and iou metrics from a log file.
    
    Args:
        file_path (str): Path to the log file
        
    Returns:
        List[Dict]: List of dictionaries containing extracted metrics
    """
    # Regular expressions to match the patterns
    dsc_pattern = r'class dsc: tensor\(\[([\d\.\-e,\s]+)\]\)'
    iou_pattern = r'class iou: tensor\(\[([\d\.\-e,\s]+)\]\)'
    
    results = []
    
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            lines = file.readlines()
            
        i = 0
        while i < len(lines):
            line = lines[i].strip()
            
            # Look for dsc pattern
            dsc_match = re.search(dsc_pattern, line)
            if dsc_match:
                dsc_values_str = dsc_match.group(1)
                dsc_values = [float(x.strip()) for x in dsc_values_str.split(',')]
                
                # Look for corresponding iou pattern in the next few lines
                iou_values = None
                for j in range(i + 1, min(i + 5, len(lines))):  # Check next 4 lines
                    next_line = lines[j].strip()
                    iou_match = re.search(iou_pattern, next_line)
                    if iou_match:
                        iou_values_str = iou_match.group(1)
                        iou_values = [float(x.strip()) for x in iou_values_str.split(',')]
                        break
                
                # Store the results
                result = {
                    'line_number_dsc': i + 1,
                    'dsc_values': dsc_values,
                    'dsc_class_0': dsc_values[0] if len(dsc_values) > 0 else None,
                    'dsc_class_1': dsc_values[1] if len(dsc_values) > 1 else None,
                    'line_number_iou': j + 1 if iou_values else None,
                    'iou_values': iou_values,
                    'iou_class_0': iou_values[0] if iou_values and len(iou_values) > 0 else None,
                    'iou_class_1': iou_values[1] if iou_values and len(iou_values) > 1 else None
                }
                results.append(result)
            
            i += 1
                
    except FileNotFoundError:
        print(f"File not found: {file_path}")
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
    
    return results

def process_multiple_logs(log_directory: str, pattern: str = "*.log") -> pd.DataFrame:
    """
    Process multiple log files in a directory.
    
    Args:
        log_directory (str): Directory containing log files
        pattern (str): File pattern to match (default: "*.log")
        
    Returns:
        pd.DataFrame: Combined results from all log files
    """
    import glob
    
    all_results = []
    log_files = glob.glob(os.path.join(log_directory, pattern))
    
    for log_file in log_files:
        print(f"Processing: {log_file}")
        results = extract_metrics_from_log(log_file)
        
        # Add filename to each result
        for result in results:
            result['filename'] = os.path.basename(log_file)
            
        all_results.extend(results)
    
    return pd.DataFrame(all_results)

def extract_metrics_simple(file_path: str) -> List[Tuple[str, str]]:
    """
    Simple extraction that returns the raw lines containing the metrics.
    
    Args:
        file_path (str): Path to the log file
        
    Returns:
        List[Tuple[str, str]]: List of (dsc_line, iou_line) pairs
    """
    dsc_pattern = r'class dsc: tensor\(\[.*?\]\)'
    iou_pattern = r'class iou: tensor\(\[.*?\]\)'
    
    results = []
    
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            lines = file.readlines()
            
        i = 0
        while i < len(lines):
            line = lines[i].strip()
            
            if re.search(dsc_pattern, line):
                dsc_line = line
                iou_line = None
                
                # Look for corresponding iou line
                for j in range(i + 1, min(i + 5, len(lines))):
                    next_line = lines[j].strip()
                    if re.search(iou_pattern, next_line):
                        iou_line = next_line
                        break
                
                results.append((dsc_line, iou_line))
            
            i += 1
                
    except Exception as e:
        print(f"Error processing file: {e}")
    
    return results

In [17]:
logs = glob(os.path.join(log_file_loc, "*.log"))
dfs = []
for log_file in logs:
    results = extract_metrics_from_log(log_file)
    if not results:
        print(f"No results found in {log_file}")
        continue
    if len(results) > 1:
        # Choose the 2nd entry (index 1) - first one is presumed to be the prefinetune
        selected_result = results[1]
    else:
        selected_result = results[0]
    df = pd.DataFrame([selected_result])
    df['filename'] = os.path.basename(log_file)
    dfs.append(df)
df = pd.concat(dfs, ignore_index=True)
df = df.drop(columns=['line_number_dsc', 'dsc_values', 'dsc_class_0', 'line_number_iou', 'iou_values', 'iou_class_0'])
df

No results found in /group/glastonbury/alex/yolov8_workspace/SAM_FT/SLURM/ftSAM_instRealBBox_adp-pathosam-Tiles-Biggest_with_coords-l1-bndT-bgT-vdscF-all-instT-bbox-pdist0.0_22515866.log
No results found in /group/glastonbury/alex/yolov8_workspace/SAM_FT/SLURM/ftSAM_instRealBBox_lora-pathosam-Tiles-Biggest_with_coords-l2-bndT-bgT-vdscF-all-instT-bbox-pdist0.0_22515898.log
No results found in /group/glastonbury/alex/yolov8_workspace/SAM_FT/SLURM/ftSAM_instRealBBox_lora-pathosam-Tiles-Biggest_with_coords-l2-bndT-bgT-vdscF-all-instT-nobbox-pdist0.0_22506908.log
No results found in /group/glastonbury/alex/yolov8_workspace/SAM_FT/SLURM/ftSAM_instRealBBox_adp-sam-Tiles-Biggest_with_coords-l2-bndF-bgF-vdscF-all-instT-nobbox-pdist0.0_22515936.log
No results found in /group/glastonbury/alex/yolov8_workspace/SAM_FT/SLURM/ftSAM_instRealBBox_adp-pathosam-Tiles-Biggest_with_coords-l1-bndT-bgT-vdscF-all-instT-bbox-pdist0.0_22515971.log
No results found in /group/glastonbury/alex/yolov8_workspace/SAM

Unnamed: 0,dsc_class_1,iou_class_1,filename
0,6.890000e-02,0.038400,ftSAM_instRealBBox_adp-pathosam-Tiles-Biggest_...
1,5.604000e-01,0.406000,ftSAM_V2adp-sam-Tiles-Biggest-l-1-bndT-bgF-vds...
2,7.709500e-05,0.000039,ftSAM_BBox_adp-pathosam-Tiles-Biggest-l0-bndT-...
3,1.356000e-08,0.000000,ftSAM_instBBox_adp-sam-Tiles-Biggest-l0-bndT-b...
4,5.677000e-01,0.415200,ftSAM_V2lora-sam-Tiles-Biggest-l2-bndT-bgF-vds...
...,...,...,...
203,1.356000e-08,0.000000,ftSAM_instBBox_adp-sam-Tiles-Biggest-l0-bndF-b...
204,5.556000e-01,0.401700,ftSAM_V2adp-sam-Tiles-Biggest-l-1-bndF-bgT-vds...
205,5.603000e-01,0.406400,ftSAM_V2lora-sam-Tiles-Biggest-l-1-bndT-bgT-vd...
206,4.807000e-01,0.336600,ftSAM_V2lora-sam-Tiles-Biggest-l-1-bndF-bgF-vd...
