In [1]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['mathtext.fontset'] = 'stix'
plt.rcParams['font.family'] = 'STIXGeneral'
import os
import re
from glob import glob
import numpy as np

In [6]:
folder = 'D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Easy_Multidigit_Addition_Decimal/'
folder_specific = f'{folder}v4-Balanced_Frequencies/'

epsilons = [0.1, 0.2, 0.3, 0.5, 0.7,
            1, 1.5,
            2, 2.5,
            3,
            5, 
            7.5,
            10
           ]

for epsilon in epsilons:
    root_folder = f'{folder_specific}Results_models/AP_{epsilon}'
    
    output_dir = f'{folder_specific}STIMULI-Analyzed_data'

    model_pattern = r'\d{4}_\d{2}_\d{2}_\d{2}_\d{2}_\d{2}'
    epoch_pattern = r'Epoch (\d+),'
    loss_pattern = r'Loss: ([\d\.]+),'
    correct_pattern = r'CP: (\d+),'
    correct_pattern_test = r'CPT: (\d+),'
    correct_pattern_small_no_carry_test = r'CPSNCT: (\d+),'
    correct_pattern_small_carry_test = r'CPSCT: (\d+),'
    correct_pattern_large_no_carry_test = r'CPLNCT: (\d+),'
    correct_pattern_large_carry_test = r'CPLCT: (\d+)'
    
    all_data = []
    
    for file_path in glob(os.path.join(root_folder, '**', '*.txt'), recursive=True):
        with open(file_path, 'r') as file:
            lines = file.readlines()
        file_name = os.path.basename(file_path)
        model_match = re.search(model_pattern, file_name)
        if model_match:
            current_model = model_match.group(0)
                
        for line in lines:               
            loss_match = re.search(loss_pattern, line)
            if loss_match:
                current_loss = float(loss_match.group(1))
                
            correct_match = re.search(correct_pattern, line)
            if correct_match:
                current_predictions = int(correct_match.group(1)) 

            correct_match_test = re.search(correct_pattern_test, line)
            if correct_match_test:
                current_predictions_test = int(correct_match_test.group(1))

            correct_match_small_no_carry_test = re.search(correct_pattern_small_no_carry_test, line)
            if correct_match_small_no_carry_test:
                current_predictions_small_no_carry_test = int(correct_match_small_no_carry_test.group(1))

            correct_match_small_carry_test = re.search(correct_pattern_small_carry_test, line)
            if correct_match_small_carry_test:
                current_predictions_small_carry_test = int(correct_match_small_carry_test.group(1))

            correct_match_large_no_carry_test = re.search(correct_pattern_large_no_carry_test, line)
            if correct_match_large_no_carry_test:
                current_predictions_large_no_carry_test = int(correct_match_large_no_carry_test.group(1))

            correct_match_large_carry_test = re.search(correct_pattern_large_carry_test, line)
            if correct_match_large_carry_test:
                current_predictions_large_carry_test = int(correct_match_large_carry_test.group(1))
            
            epoch_match = re.search(epoch_pattern, line)
            if epoch_match:
                current_epoch = float(epoch_match.group(1))
                    
                all_data.append({
                    'model': current_model,
                    'epoch': current_epoch,
                    'loss': current_loss,
                    'correct_predictions': current_predictions,
                    'correct_predictions_test': current_predictions_test,
                    'correct_predictions_small_no_carry_test': current_predictions_small_no_carry_test,
                    'correct_predictions_small_carry_test': current_predictions_small_carry_test,
                    'correct_predictions_large_no_carry_test': current_predictions_large_no_carry_test,
                    'correct_predictions_large_carry_test': current_predictions_large_carry_test
                })
    
    df_all_stages = pd.DataFrame(all_data)
    output_file = f'{output_dir}/STIMULI-all_results-AP_{epsilon}.csv'
    print(f'File STIMULI-all_results-AP_{epsilon}.csv saved.')
    
    os.makedirs(output_dir, exist_ok=True)
    df_all_stages.to_csv(output_file, sep=';', decimal=',', index=False)
    
    output_file

File STIMULI-all_results-AP_0.1.csv saved.
File STIMULI-all_results-AP_0.2.csv saved.
File STIMULI-all_results-AP_0.3.csv saved.
File STIMULI-all_results-AP_0.5.csv saved.
File STIMULI-all_results-AP_0.7.csv saved.
File STIMULI-all_results-AP_1.csv saved.
File STIMULI-all_results-AP_1.5.csv saved.
File STIMULI-all_results-AP_2.csv saved.
File STIMULI-all_results-AP_2.5.csv saved.
File STIMULI-all_results-AP_3.csv saved.
File STIMULI-all_results-AP_5.csv saved.
File STIMULI-all_results-AP_7.5.csv saved.
File STIMULI-all_results-AP_10.csv saved.


In [7]:
consolidated_data = []

for epsilon in epsilons:
    input_file = f'{output_dir}/STIMULI-all_results-AP_{epsilon}.csv'
    
    if os.path.exists(input_file):
        df = pd.read_csv(input_file, sep=';', decimal=',')

        numerical_df = df.select_dtypes(include=[np.number])
        grouped = numerical_df.groupby(df['epoch']).mean()#.reset_index()
        
        grouped.insert(0, 'epsilon', epsilon)  # Agregar la columna epsilon
        consolidated_data.append(grouped)
    else:
        print(f'Archivo no encontrado: {input_file}')

# Crear un DataFrame final consolidado
final_df = pd.concat(consolidated_data, ignore_index=True)

# Guardar como archivo Excel
output_file = os.path.join(folder_specific, 'STIMULI_Analyzed_Data.xlsx')
final_df.to_excel(output_file, index=False)
print(f'Archivo guardado en: {output_file}')

Archivo guardado en: D:/OneDrive - Universidad Complutense de Madrid (UCM)/Doctorado/Curriculum_Learning/Easy_Multidigit_Addition_Decimal/v4-Balanced_Frequencies/STIMULI_Analyzed_Data.xlsx
