In [11]:
import re
import pandas as pd

def extract_metrics(text):
    """Parse training log text and extract relevant metrics."""
    results = []
    
    # Split text into different training sessions
    sessions = text.strip().split("Train:")
    sessions = [s for s in sessions if s.strip()]
    
    for session in sessions:
        # Extract file paths
        lines = session.strip().split('\n')
        train_path = lines[0].split()[0].strip('./').replace('.csv', '')
        test_path = lines[0].split('Test:')[1].strip().strip('./').replace('.csv', '')
        train_path=train_path[-5:]
        print(train_path)
        test_path=test_path[-5:]
        print(test_path)
        # Extract accuracy
        accuracy = float(re.search(r'Test Accuracy: (\d+\.\d+)%', session).group(1)) / 100
        
        # Extract F1 score
        f1_score = float(re.search(r'macro F1-Score: (\d+\.\d+)', session).group(1))
        
        # Extract times
        train_time = float(re.search(r'train_time: (\d+\.\d+)', session).group(1))
        test_time = float(re.search(r'test_time: (\d+\.\d+)', session).group(1))
        
        # Create row
        row = {
            'Dataset': f'{train_path}@{test_path}',
            'T': 0,
            'CV': 1,
            'ML': 'BERT',
            'Acc': round(accuracy, 3),
            'b_Acc': float('nan'),
            'Prec': float('nan'),
            'Rec': float('nan'),
            'F1': round(f1_score, 4),
            'kap': float('nan'),
            'ROC': float('nan'),
            'tra-T': round(train_time, 8),
            'test-T': round(test_time, 7),
            'Al-Time': 0
        }
        results.append(row)
    
    return results

def create_csv_files(input_text):
    """Create CSV files from the parsed metrics."""
    results = extract_metrics(input_text)
    
    for result in results:
        # Create filename

        
        filename =f"BERT@{result['Dataset']}.csv"
        
        # Create DataFrame with single row
        df = pd.DataFrame([result])
        
        # Save to CSV without index
        df.to_csv(filename, index=False)
        print(f"Created file: {filename}")

# Example usage
with open('input.txt', 'r') as file:
    log_text = file.read()
    create_csv_files(log_text)

AD-S1
DI-S1
DI-S1
AD-S1
AD-S1
DI-S2
DI-S2
AD-S1
AD-S2
DI-S2
DI-S2
AD-S2
AD-S2
DI-S1
DI-S1
AD-S2
Created file: BERT@AD-S1@DI-S1.csv
Created file: BERT@DI-S1@AD-S1.csv
Created file: BERT@AD-S1@DI-S2.csv
Created file: BERT@DI-S2@AD-S1.csv
Created file: BERT@AD-S2@DI-S2.csv
Created file: BERT@DI-S2@AD-S2.csv
Created file: BERT@AD-S2@DI-S1.csv
Created file: BERT@DI-S1@AD-S2.csv


['',
 ' ./small/AD-S1.csv Test: ./small/DI-S1.csv\nEpoch [1/7], Loss: 1.6034\nEpoch [2/7], Loss: 0.8258\nEpoch [3/7], Loss: 0.6879\nEpoch [4/7], Loss: 0.6282\nEpoch [5/7], Loss: 0.5902\nEpoch [6/7], Loss: 0.5551\nEpoch [7/7], Loss: 0.5299\ntrain_time: 13550.62489247322\ntest_time: 946.7961132526398\nmacro F1-Score: 0.5366\nTest Accuracy: 54.30%\n',
 ' ./small/DI-S1.csv Test: ./small/AD-S1.csv\nEpoch [1/7], Loss: 1.7350\nEpoch [2/7], Loss: 0.9516\nEpoch [3/7], Loss: 0.8347\nEpoch [4/7], Loss: 0.7852\nEpoch [5/7], Loss: 0.7533\nEpoch [6/7], Loss: 0.7191\nEpoch [7/7], Loss: 0.6929\ntrain_time: 13841.40567278862\ntest_time: 255.47177910804749\nmacro F1-Score: 0.6452\nTest Accuracy: 64.25%']