In [1]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from colorama import Fore, Style

# Funktion zum Extrahieren der benötigten Informationen aus der Datei results.txt
def extract_data_from_txt(path):
    with open(path, 'r') as file:
        content = file.read()

        ###CENSORED & EVENTS PROPORTION###
        events_prop = re.search(r"Events:\s+(\d+\.\d+)\s+%", content)
        censored_prop = re.search(r"Censored:\s+(\d+\.\d+)\s+%", content)

        ###IJK###
        ijk_std = re.search(r"IJK STD \(for RF\) Mean-est\s+:\s+(\d+\.\d+)", content)
        ijk_std_rel_error = re.search(r"rel\. Abweichung zu emp\. std ([\-\d\.]+) %", content)
        ijk_std_cv = re.search(r"std\. des schätzers (\d+\.\d+)", content)

        ###JK-AB###
        jkab_std = re.search(r"JK-AB\(un-weighted\) STD \(for RF\) Mean-est:\s+(\d+\.\d+)", content)

        lines = content.splitlines()  
        if len(lines) >= 12:
            line_12 = lines[15] 
            jkab_std_rel_error = re.search(r"rel\. Abweichung zu emp\. std ([\d\.]+) %", line_12)
        else:
            jkab_std_rel_error = None  
        
        line_13 = lines[16]
        jkab_std_cv= re.search(r"std\. des schätzers (\d+\.\d+)", line_13)
        
        ###IJK-BIASED###
        if len(lines) > 10:
            line_11 = lines[10]  
            ijk_biased_std = re.search(r"IJK STD - biased \(for RF\) Mean-est\s*:\s*(\d+\.\d+)", line_11)
        else:
            ijk_biased_std = None

        if len(lines) > 11:
            line_12 = lines[11]  
            ijk_biased_std_rel_error = re.search(r"rel\. Abweichung zu emp\. std\s+([\d\.]+)\s*%", line_12)
        else:
            ijk_biased_std_rel_error = None

        if len(lines) > 12:
            line_13 = lines[12]  
            ijk_biased_std_cv = re.search(r"std\. des schätzers\s+(\d+\.\d+)", line_13)
        else:
            ijk_biased_std_cv = None
        
        ####BOOT###            
        if len(lines) > 18:
            line_19 = lines[18]  
            boot_std = re.search(r"Boot STD \(for RF\) Mean-est\s*:\s*(\d+\.\d+)", line_19)
        else:
            boot_std = None

        if len(lines) > 19:
            line_20 = lines[19]  
            boot_std_rel_error = re.search(r"rel\. Abweichung zu emp\. std\s+([-\d\.]+)\s*%", line_20)
        else:
            boot_std_rel_error = None

        if len(lines) > 20:
            line_21 = lines[20] 
            boot_std_cv = re.search(r"std\. des schätzers\s+(\d+\.\d+)", line_21)
        else:
            boot_std_cv = None
            
        ###RF EMP-STD###
        if len(lines) >= 5:
            line_5 = lines[4]  
            rf_emp_std = re.search(r"RF EMP-STD:\s+(\d+\.\d+)", line_5)
        else:
            rf_emp_std = None
        

        # Extrahieren der Prediction Results
        true_y = re.search(r"True Y:\s+(\d+\.\d+)", content)
        rf_y_pred = re.search(r"RF Y_pred:\s+(\d+\.\d+)", content)

        # Erstellen eines Dictionaries mit den extrahierten Daten
        data = {
            'censored_proportion': round(float(censored_prop.group(1))/100,1) if censored_prop else None,
            'events_proportion': round(float(events_prop.group(1))/100,2) if events_prop else None,
            'ijk_std': float(ijk_std.group(1)) if ijk_std else None,
            'ijk_std_rel_error(%)': float(ijk_std_rel_error.group(1)) if ijk_std_rel_error else None,
            'ijk_std_cv': float(ijk_std_cv.group(1))/float(ijk_std.group(1)) if ijk_std_cv else None,
            'jkab_std': float(jkab_std.group(1)) if jkab_std else None,
            'jkab_std_rel_error(%)': float(jkab_std_rel_error.group(1)) if jkab_std_rel_error else None,
            'jkab_std_cv': float(jkab_std_cv.group(1))/float(jkab_std.group(1)) if jkab_std_cv else None,
            'True_Y': float(true_y.group(1)) if true_y else None,
            'RF_Y_pred': float(rf_y_pred.group(1)) if rf_y_pred else None,
            'rf_emp_std': float(rf_emp_std.group(1)) if rf_emp_std else None,
            'ijk_biased_std': float(ijk_biased_std.group(1)) if ijk_biased_std else None,
            'ijk_biased_std_rel_error(%)': float(ijk_biased_std_rel_error.group(1)) if ijk_biased_std_rel_error else None,
            'ijk_biased_std_cv': float(ijk_biased_std_cv.group(1))/float(ijk_biased_std.group(1)) if ijk_biased_std_cv else None,
            'boot_std': float(boot_std.group(1)) if boot_std else None,
            'boot_std_rel_error(%)': float(boot_std_rel_error.group(1)) if boot_std_rel_error else None,
            'boot_std_cv': float(boot_std_cv.group(1))/(float(boot_std.group(1))+0.001) if boot_std_cv else None                                 ################anpassen 
        }
        
        return data

# Hauptfunktion, um alle Unterordner zu durchlaufen und die Daten zu extrahieren
def process_folders(main_folder,save_path_for_csv):
    data_list = []

    # Durchlaufen aller Ordner im Hauptordner
    for folder_name in os.listdir(main_folder):
        folder_path = os.path.join(main_folder, folder_name)

        # Überprüfen, ob der Pfad ein Verzeichnis ist
        if os.path.isdir(folder_path):
            result_file_path = os.path.join(folder_path, 'results.txt')

            # Überprüfen, ob die results.txt existiert
            if os.path.exists(result_file_path):
                # Daten aus der Datei extrahieren
                data = extract_data_from_txt(result_file_path)
                data_list.append(data)

    # Erstellen eines DataFrames aus der Liste der extrahierten Daten
    df = pd.DataFrame(data_list)
    
    df_sorted = df.sort_values(by=['censored_proportion', 'events_proportion'], ascending=[True, True])
    df_sorted['events_bin'] = ['EE1', 'EE2', 'EE3', 'EE4', 'EE1', 'EE2', 'EE3', 'EE4', 'EE1', 'EE2', 'EE3', 'EE4', 'EE1', 'EE2', 'EE3', 'EE4']
    df_sorted.to_csv(os.path.join(save_path_for_csv, 'results_summary.csv'), index=False)

    # Anzeigen des DataFrames
    return df_sorted

def save_plot(save_path, data, n, B, shape):
    
    censored_values = sorted(data['censored_proportion'].unique())
    events_bins = sorted(data['events_bin'].unique())

    # Dictionaries für die Indizierung erstellen
    censored_indices = {value: idx for idx, value in enumerate(censored_values)}
    events_indices = {label: idx for idx, label in enumerate(events_bins)}
    
    # Erstellen des 4x4 Grids
    fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(16, 12), sharex=True, sharey=True)

    # Definieren der Plot-Elemente für die Legende
    # Dummy-Plots erstellen, um die Handles und Labels für die Legende zu sammeln
    ax_dummy = axes[0,0]
    ax_dummy.errorbar([], [], yerr=[], fmt='o', color='black', ecolor='black', capsize=5, label='IJK-AWB-U (IJK-U)')
    ax_dummy.errorbar([], [], yerr=[], fmt='o', color='gray', ecolor='black', capsize=5, label='IJK-AWB (IJK)')
    ax_dummy.errorbar([], [], yerr=[], fmt='o', color='violet', ecolor='black', capsize=5, label='JK-AB-U (JKA)')
    ax_dummy.errorbar([], [], yerr=[], fmt='o', color='cyan', ecolor='black', capsize=5, label="Boot     (all markers are DTBC's prediction)")
    ax_dummy.axhline(y=0, color='red', linestyle='--', linewidth=2, label='True survival probability')
    # Temporäre Legende hinzufügen (wird später entfernt)
    ax_dummy.legend(loc='upper right')

    # Plotten der Daten mit Errorbars und Textinformationen
    for idx, row in data.iterrows():
        censored = row['censored_proportion']
        event_bin = row['events_bin']
        True_Y = row['True_Y']
        RF_Y_pred = row['RF_Y_pred']
        ijk_std = row['ijk_std']
        jkab_std = row['jkab_std']
        ijk_std_rel_error = row['ijk_std_rel_error(%)']
        ijk_std_cv = row['ijk_std_cv']
        jkab_std_rel_error = row['jkab_std_rel_error(%)']
        jkab_std_cv = row['jkab_std_cv']
        rf_emp_std = row['rf_emp_std']
        ijk_biased_std = row['ijk_biased_std']
        ijk_biased_std_rel_error = row['ijk_biased_std_rel_error(%)']
        ijk_biased_std_cv = row['ijk_biased_std_cv']
        boot_std = row['boot_std']
        boot_std_rel_error = row['boot_std_rel_error(%)']
        boot_std_cv = row['boot_std_cv']
        
        row_idx = censored_indices[censored]
        col_idx = events_indices[event_bin]
        ax = axes[row_idx, col_idx]
        
        # Fehlerbalken berechnen: 1.96 * std
        error_ijk = 1.96 * ijk_std if ijk_std else 0
        error_jkab = 1.96 * jkab_std if jkab_std else 0
        error_ijk_biased = 1.96 * ijk_biased_std if ijk_biased_std else 0
        error_boot = 1.96 * boot_std if boot_std else 0
        
        # Zeichne die Errorbars
        if ijk_std:
            ax.errorbar(0, RF_Y_pred, yerr=error_ijk, fmt='o', color='black', ecolor='black', capsize=5)
        if ijk_biased_std:
            ax.errorbar(0.26, RF_Y_pred, yerr=error_ijk_biased, fmt='o', color='gray', ecolor='black', capsize=5)
        if jkab_std:
            ax.errorbar(0.8, RF_Y_pred, yerr=error_jkab, fmt='o', color='violet', ecolor='black', capsize=5)
        if boot_std:
            ax.errorbar(1., RF_Y_pred, yerr=error_boot, fmt='o', color='cyan', ecolor='black', capsize=5)
        
        # Rote horizontale Linie für True_Y
        ax.axhline(y=True_Y, color='red', linestyle='--', linewidth=2)
        
        # Setze die Y-Achse auf den Bereich [0, 1]
        ax.set_ylim(0.5, 1)
        
        # Titel setzen (events_proportion über dem Plot)
        ax.set_title(f'[ Event Prop: {row["events_proportion"]} ]', fontsize=10)
        
        # Y-Achsenbeschriftung für die erste Spalte
        if col_idx == 0:
            ax.set_ylabel(f'[ Cens Prop: {censored} ] \n predicted probability', fontsize=10)
        
        # Werte für ijk_std_rel_error, ijk_std_cv, jkab_std_rel_error und jkab_std_cv als Text hinzufügen
        fz = 12
        a = 0.35
        b = 0.5
        c_pos = 0.65  # 'c' ist ein eingebauter Name, daher umbenannt
        d = 0.8
        
        ax.text(0.1, 0.4, f'DTBC emp. std:', fontsize=fz, transform=ax.transAxes)
        ax.text(b, 0.4, rf_emp_std, fontsize=fz, transform=ax.transAxes)
        ax.text(0.1, 0.3, f'est.:', fontsize=fz, transform=ax.transAxes)
        ax.text(0.1, 0.2, f'mrb(%):', fontsize=fz, transform=ax.transAxes)
        ax.text(0.1, 0.1, f'cv:', fontsize=fz, transform=ax.transAxes)
        
        ax.text(a, 0.3, f'IJK-U', fontsize=fz, transform=ax.transAxes)
        ax.text(b, 0.3, f'IJK', fontsize=fz, transform=ax.transAxes)
        ax.text(c_pos, 0.3, f'JKA', fontsize=fz, transform=ax.transAxes)
        ax.text(d, 0.3, f'Boot', fontsize=fz, transform=ax.transAxes)
        
        if ijk_std_cv is not None:
            ax.text(a, 0.1, f'{ijk_std_cv:.2f}', fontsize=fz, transform=ax.transAxes)
        if ijk_biased_std_cv is not None:
            ax.text(b, 0.1, f'{ijk_biased_std_cv:.2f}  ', fontsize=fz, transform=ax.transAxes)
        if jkab_std_cv is not None:
            ax.text(c_pos, 0.1, f'{jkab_std_cv:.2f}  ', fontsize=fz, transform=ax.transAxes)
        if boot_std_cv is not None:
            ax.text(d, 0.1, f'{boot_std_cv:.2f}  ', fontsize=fz, transform=ax.transAxes)
        
        if ijk_std_rel_error is not None:
            if round(abs(ijk_std_rel_error),0) <=10:
                ax.text(a, 0.2, f'{ijk_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes, color='darkgreen')
            elif round(abs(ijk_std_rel_error),0) >10 and round(abs(ijk_std_rel_error),0) <=20:
                ax.text(a, 0.2, f'{ijk_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes,  color='#FF8C00')
            else:
                ax.text(a, 0.2, f'{ijk_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes, color='red')
        
        if ijk_biased_std_rel_error is not None:
            if round(abs(ijk_biased_std_rel_error),0) <=10:
                ax.text(b, 0.2, f'{ijk_biased_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes, color='darkgreen')
            elif round(abs(ijk_biased_std_rel_error),0) >10 and round(abs(ijk_biased_std_rel_error),0) <=20:
                ax.text(b, 0.2, f'{ijk_biased_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes,  color='#FF8C00')
            else:
                ax.text(b, 0.2, f'{ijk_biased_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes, color='red')
        
        if jkab_std_rel_error is not None:
            if round(abs(jkab_std_rel_error),0) <=10:
                ax.text(c_pos, 0.2, f'{jkab_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes, color='darkgreen')
            elif round(abs(jkab_std_rel_error),0) >10 and round(abs(jkab_std_rel_error),0) <=20:
                ax.text(c_pos, 0.2, f'{jkab_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes,  color='#FF8C00')
            else:
                ax.text(c_pos, 0.2, f'{jkab_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes, color='red')
        
        if boot_std_rel_error is not None:
            if round(abs(boot_std_rel_error),0) <=10:
                ax.text(d, 0.2, f'{boot_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes, color='darkgreen')
            elif round(abs(boot_std_rel_error),0) >10 and round(abs(boot_std_rel_error),0) <=20:
                ax.text(d, 0.2, f'{boot_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes,  color='#FF8C00')
            else:
                ax.text(d, 0.2, f'{boot_std_rel_error:.0f}', fontsize=fz, transform=ax.transAxes, color='red')
            
        # Gridlines optional hinzufügen
        ax.grid(True, linestyle='--', alpha=1.)
        
        # X-Achsenbeschriftung entfernen
        ax.set_xticklabels([])

    # Leere Unterplots deaktivieren
    for i in range(4):
        for j in range(4):
            ax = axes[i, j]
            if not ax.has_data():
                ax.axis('off')
    # Entfernen der temporären Legende
    ax_dummy.legend_.remove()

    # Sammeln der Handles und Labels für die endgültige Legende
    handles, labels = ax_dummy.get_legend_handles_labels()
    
    # Bestimmen der Legendenposition basierend auf dem durchschnittlichen True_Y
    avg_true_y = data['True_Y'].mean()

    legend_loc = 'upper center'

    # Hinzufügen der Legende zur gesamten Figur mit Rahmen
    fig.legend(
        handles,
        labels,
        loc=legend_loc,
        ncol=5,
        fontsize='large',
        frameon=True,          # Rahmen einschalten
        edgecolor='black',     # Farbe des Rahmenrandes
        facecolor='white',     # Hintergrundfarbe der Legende
        framealpha=1,          # Transparenz des Rahmens
        shadow=True            # Schatteneffekt hinzufügen (optional)
    )
    
    # Layout anpassen, um Platz für die Legende zu schaffen
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    
    # Plot speichern
    fig.savefig(os.path.join(save_path, f'n_train{n}_B_{B}_shape_{shape}.png'), dpi=300)
    plt.close(fig)  # Schließt die Figur, um Speicher zu sparen

In [2]:

save_path_shape = r'C:\Users\rehan\meine_Repos\Masterarbeit\\Chapter 3//2_plot_sims_variance\sim_with_boot_std__shape_1_5__B_1000_N_2000'

if not os.path.exists(save_path_shape):
    os.makedirs(save_path_shape)

origin_path =   r'C:\Users\rehan\meine_Repos\Masterarbeit\Chapter 3\0_raw_sims\b_sim_shape_1_5'


results_df = process_folders(origin_path,origin_path)


save_plot(save_path_shape, results_df, 2000, 1000, '1_5')
        
