In [11]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from colorama import Fore, Style

def extract_data_from_txt(path):
    with open(path, 'r') as file:
        content = file.read()
        lines = content.splitlines()  
        
        # RF EMP-STD
        line_5 = lines[4]  
        rf_emp_std = re.search(r"RF EMP-STD:\s+(\d+\.\d+)", line_5)

        # WB EMP-STD
        line_6 = lines[3]  
        wb_emp_std = re.search(r"WB EMP-STD:\s+(\d+\.\d+)", line_6)

        # WB MSE
        line_40 = lines[40]  
        wb_mse = re.search(r"WB MSE IPCW:\s+(\d+\.\d+)", line_40)

        # RF MSE
        line_41 = lines[41]  
        rf_mse = re.search(r"RF MSE IPCW:\s+(\d+\.\d+)", line_41)

        # Events und Censored
        events_prop = re.search(r"Events:\s+(\d+\.\d+)\s+%", content)
        censored_prop = re.search(r"Censored:\s+(\d+\.\d+)\s+%", content)

        # IJK 
        ijk_std = re.search(r"IJK STD \(for RF\) Mean-est\s+:\s+(\d+\.\d+)", content)
        ijk_std_rel_error = re.search(r"rel\. Abweichung zu emp\. std ([\-\d\.]+) %", content)
        ijk_std_cv = re.search(r"std\. des schätzers (\d+\.\d+)", content)
        
        # IJK biased
        line_11 = lines[10] 
        ijk_biased_std = re.search(r"IJK STD - biased \(for RF\) Mean-est\s*:\s*(\d+\.\d+)", line_11)
        line_12 = lines[11]  
        ijk_biased_std_rel_error = re.search(r"rel\. Abweichung zu emp\. std ([\-\d\.]+) %", line_12)
        line_13 = lines[12] 
        ijk_biased_std_cv = re.search(r"std\. des schätzers\s+(\d+\.\d+)", line_13)
        
        # JK-AB 
        jkab_std = re.search(r"JK-AB\(un-weighted\) STD \(for RF\) Mean-est:\s+(\d+\.\d+)", content)
        line_16 = lines[15] 
        jkab_std_rel_error = re.search(r"rel\. Abweichung zu emp\. std ([\-\d\.]+) %", line_16)
        line_17 = lines[16]
        jkab_std_cv= re.search(r"std\. des schätzers (\d+\.\d+)", line_17)

        # Prediction Results
        true_y = re.search(r"True Y:\s+(\d+\.\d+)", content)
        rf_y_pred = re.search(r"RF Y_pred:\s+(\d+\.\d+)", content)
        wb_y_pred = re.search(r"WB Y_pred:\s+(\d+\.\d+)", content)

        # Erstellen eines Dictionaries mit den extrahierten Daten
        data = {
            'censored_proportion': round(float(censored_prop.group(1))/100,1) if censored_prop else None,
            'events_proportion': round(float(events_prop.group(1))/100,2) if events_prop else None,
            'ijk_std': float(ijk_std.group(1)) if ijk_std else None,
            'ijk_std_rel_error(%)': float(ijk_std_rel_error.group(1)) if ijk_std_rel_error else None,
            'ijk_std_cv': float(ijk_std_cv.group(1))/float(ijk_std.group(1)) if ijk_std_cv and ijk_std else None,
            'jkab_std': float(jkab_std.group(1)) if jkab_std else None,
            'jkab_std_rel_error(%)': float(jkab_std_rel_error.group(1)) if jkab_std_rel_error else None,
            'jkab_std_cv': float(jkab_std_cv.group(1))/float(jkab_std.group(1)) if jkab_std_cv and jkab_std else None,
            'True_Y': float(true_y.group(1)) if true_y else None,
            'RF_Y_pred': float(rf_y_pred.group(1)) if rf_y_pred else None,
            'WB_Y_pred': float(wb_y_pred.group(1)) if wb_y_pred else None,
            'wb_emp_std': float(wb_emp_std.group(1)) if wb_emp_std else None,
            'rf_emp_std': float(rf_emp_std.group(1)) if rf_emp_std else None,
            'ijk_biased_std': float(ijk_biased_std.group(1)) if ijk_biased_std else None,
            'ijk_biased_std_rel_error(%)': float(ijk_biased_std_rel_error.group(1)) if ijk_biased_std_rel_error else None,
            'ijk_biased_std_cv': float(ijk_biased_std_cv.group(1))/float(ijk_biased_std.group(1)) if ijk_biased_std_cv and ijk_biased_std else None,
            'wb_mse': float(wb_mse.group(1)) if wb_mse else None,
            'rf_mse': float(rf_mse.group(1)) if rf_mse else None
        }
        return data


def process_folders(main_folder, save_path_for_csv):
    data_list = []

    for folder_name in os.listdir(main_folder):
        folder_path = os.path.join(main_folder, folder_name)

        if os.path.isdir(folder_path):
            result_file_path = os.path.join(folder_path, 'results.txt')

            if os.path.exists(result_file_path):
                data = extract_data_from_txt(result_file_path)
                data_list.append(data)

    df = pd.DataFrame(data_list)
    df_sorted = df.sort_values(by=['censored_proportion', 'events_proportion'], ascending=[True, True])
    df_sorted['events_bin'] = ['EE1', 'EE2', 'EE3', 'EE4', 
                               'EE1', 'EE2', 'EE3', 'EE4',
                               'EE1', 'EE2', 'EE3', 'EE4',
                               'EE1', 'EE2', 'EE3', 'EE4']
    df_sorted.to_csv(os.path.join(save_path_for_csv, 'results_summary.csv'), index=False)

    return df_sorted


def save_plot(save_path, data, n, B, shape):
    
    censored_values = sorted(data['censored_proportion'].unique())
    events_bins = sorted(data['events_bin'].unique())

    censored_indices = {value: idx for idx, value in enumerate(censored_values)}
    events_indices = {label: idx for idx, label in enumerate(events_bins)}
    
    fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(16, 12), sharex=True, sharey=True)

    # Listen zum Sammeln der Handles und Labels
    handles = []
    labels = []
    legend_added = False  # Flag, um sicherzustellen, dass die Legende nur einmal hinzugefügt wird

    for idx, row in data.iterrows():
        censored = row['censored_proportion']
        event_bin = row['events_bin']
        True_Y = row['True_Y']
        RF_Y_pred = row['RF_Y_pred']
        WB_Y_pred = row['WB_Y_pred']
        wb_mse = row['wb_mse']
        rf_mse = row['rf_mse']
        
        row_idx = censored_indices[censored]
        col_idx = events_indices[event_bin]
        ax = axes[row_idx, col_idx]
        
        rf_emp_std = row['rf_emp_std']
        wb_emp_std = row['wb_emp_std']
        
        error_rf = 1.96 * rf_emp_std
        error_wb = 1.96 * wb_emp_std
        
        # Plot mit Labels für die Legende
        dtbd_plot = ax.errorbar(0, RF_Y_pred, yerr=error_rf, fmt='o', color='black', ecolor='black',
                                capsize=5, label='DTBD prediction')
        waft_plot = ax.errorbar(1., WB_Y_pred, yerr=error_wb, fmt='o', color='darkgreen', ecolor='black',
                                capsize=5, label='W-AFT prediction')
        true_y_plot = ax.axhline(y=True_Y, color='red', linestyle='--', linewidth=2, label='True Survival Probability')
        
        ax.set_ylim(0.5, 1)
        ax.set_title(f'[ Event Prop: {row["events_proportion"]} ]', fontsize=10)

        if col_idx == 0:
            ax.set_ylabel(f'[ Cens Prop: {censored} ] \n Survival Probability', fontsize=10)
        
        fz = 12
        aaa = 0.05
        
        # Geänderte Textzeilen ohne Überstrich
        ax.text(0.12, 0.2-aaa, 'W-AFT IPCW MSE:', fontsize=fz, transform=ax.transAxes)
        ax.text(0.55, 0.2-aaa, wb_mse, fontsize=fz, transform=ax.transAxes)

        ax.text(0.12, 0.1-aaa, 'DTBC IPCW MSE:', fontsize=fz, transform=ax.transAxes)
        ax.text(0.55, 0.1-aaa, rf_mse, fontsize=fz, transform=ax.transAxes)

        ax.grid(True, linestyle='--', alpha=1.)
        ax.set_xticklabels([])

        # Sammle Handles und Labels einmalig
        if not legend_added:
            handles.extend([dtbd_plot, waft_plot, true_y_plot])
            labels.extend(['DTBC prediction', 'W-AFT prediction', 'True Survival Probability'])
            legend_added = True  # Stelle sicher, dass dies nur einmal geschieht

    # Leere Subplots ausblenden
    for i in range(4):
        for j in range(4):
            ax = axes[i, j]
            # Überprüfen, ob der Subplot Daten enthält
            if not (ax.lines or ax.collections):
                ax.axis('off')

    # Füge eine figure-weite Legende hinzu ohne Titel und mit Rahmen
    fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, 0.98), ncol=3, frameon=True,edgecolor='black', fontsize=12)

    plt.tight_layout(rect=[0, 0, 1, 0.95])  # Anpassung des Layouts, um Platz für die Legende zu schaffen
    fig.savefig(os.path.join(save_path, f'n_train{n}_B_{B}_shape_{shape}.png'), dpi=300)
    plt.close(fig)  # Schließt die Figur, um Speicher freizugeben


In [12]:
B = [500,1000,2000,4000]
n = [499,999,1999,3999]


save_ordner_name = '2_plot_sims_model'   ######

save_path_shape_1 = f'C:\\Users\\rehan\\meine_repos\\Masterarbeit\\Chapter 3\\{save_ordner_name}\\shape1'
save_path_shape_1_5 = f'C:\\Users\\rehan\\meine_repos\\Masterarbeit\\Chapter 3\\{save_ordner_name}\\shape1_5'

if not os.path.exists(save_path_shape_1):
    os.makedirs(save_path_shape_1)
if not os.path.exists(save_path_shape_1_5):
    os.makedirs(save_path_shape_1_5)

for n_i in n:
    for b in B:     
        path_1 =   f'C:\\Users\\rehan\\meine_Repos\\Masterarbeit\\Chapter 3\\1_sims\\sim_shape_1___B{b}\\{n_i}'
        path_1_5 = f'C:\\Users\\rehan\\meine_Repos\\Masterarbeit\\Chapter 3\\1_sims\\sim_shape_1_5_B{b}\\{n_i}'
        
        results_df = process_folders(path_1,path_1)
        results_df_1_5 = process_folders(path_1_5,path_1_5)

        save_plot(save_path_shape_1, results_df, n_i, b, '1')
        save_plot(save_path_shape_1_5, results_df_1_5, n_i, b, '1_5')
