In [1]:
import os
import glob
import pandas as pd

In [None]:
# Define the path to the CSV files
csv_folder = "../../acc_rate_csvs"
csv_path = os.path.join(csv_folder, "*.csv")

# Initialize a list to store each dataset's results
results = []

# Get a list of all CSV files in the folder
csv_files = glob.glob(csv_path)

# Check if any CSV files are found
if not csv_files:
    print(f"No CSV files found in the folder '{csv_folder}'. Please check the path.")
else:
    # Process each CSV file
    for file in csv_files:
        # Extract the dataset name from the filename (assuming filename without extension is the dataset name)
        dataset_name = os.path.splitext(os.path.basename(file))[0]
        
        # Read the CSV file into a DataFrame
        try:
            df = pd.read_csv(file)
        except Exception as e:
            print(f"Error reading {file}: {e}")
           
            continue  # Skip this file and continue with the next
        
        # Check if required columns exist
        if not {'method', 'fold', 'acc'}.issubset(df.columns):
            print(f"File {file} does not contain the required columns: 'method', 'fold', 'acc'. Skipping.")
            continue
        
        # Group by 'method' and calculate mean and standard deviation
        stats = df.groupby('method')['acc'].agg(['mean', 'std']).reset_index()
        
        # Create a dictionary for the current dataset
        dataset_dict = {'Dataset': dataset_name}
        for _, row in stats.iterrows():
            method = row['method']
            mean = row['mean']
            std_dev = row['std']
            # Format as "mean ± standard deviation"
            dataset_dict[method] = f"{mean:.2f} ± {std_dev:.2f}"
        
        # Append the dictionary to the results list
        results.append(dataset_dict)
    
    if results:
        # Create a DataFrame from the results
        result_df = pd.DataFrame(results)
        
        # Optional: Sort the DataFrame by Dataset name
        result_df = result_df.sort_values(by='Dataset').reset_index(drop=True)
        
        # Optional: Set 'Dataset' as the first column
        cols = result_df.columns.tolist()
        cols.insert(0, cols.pop(cols.index('Dataset')))
        result_df = result_df[cols]
        
        # Display the final table
        print(result_df)
        
        # Optional: Export the table to a CSV file
        output_file = "acc_summary_table.csv"
        try:
            result_df.to_csv(output_file, index=False)
            print(f"\nSummary table has been saved to '{output_file}'.")
        except Exception as e:
            print(f"Error saving the summary table: {e}")
    else:
        print("No valid data to display.")

In [3]:
# Define the path to the CSV files
csv_folder = "../acc_rate_csvs_systematic_detailed_cancer"
csv_path = os.path.join(csv_folder, "*.csv")

# Initialize a list to store each dataset's results
results = []

# Get a list of all CSV files in the folder
csv_files = glob.glob(csv_path)

# Check if any CSV files are found
if not csv_files:
    print(f"No CSV files found in the folder '{csv_folder}'. Please check the path.")
else:
    # Process each CSV file
    for file in csv_files:
        # Extract the dataset name from the filename (assuming filename without extension is the dataset name)
        dataset_name = os.path.splitext(os.path.basename(file))[0]
        
        # Read the CSV file into a DataFrame
        try:
            df = pd.read_csv(file)
        except Exception as e:
            print(f"Error reading {file}: {e}")
           
            continue  # Skip this file and continue with the next
        
        # Check if required columns exist
        if not {'method', 'fold', 'acc'}.issubset(df.columns):
            print(f"File {file} does not contain the required columns: 'method', 'fold', 'acc'. Skipping.")
            continue
        
        # Group by 'method' and calculate mean and standard deviation
        stats = df.groupby('method')['acc'].agg(['mean', 'std']).reset_index()
        
        # Create a dictionary for the current dataset
        dataset_dict = {'Dataset': dataset_name}
        for _, row in stats.iterrows():
            method = row['method']
            mean = row['mean']
            std_dev = row['std']
            # Format as "mean ± standard deviation"
            dataset_dict[method] = f"{mean:.2f} ± {std_dev:.2f}"
        
        # Append the dictionary to the results list
        results.append(dataset_dict)
    
    if results:
        # Create a DataFrame from the results
        result_df = pd.DataFrame(results)
        
        # Optional: Sort the DataFrame by Dataset name
        result_df = result_df.sort_values(by='Dataset').reset_index(drop=True)
        
        # Optional: Set 'Dataset' as the first column
        cols = result_df.columns.tolist()
        cols.insert(0, cols.pop(cols.index('Dataset')))
        result_df = result_df[cols]
        
        # Display the final table
        print(result_df)
        
        # Optional: Export the table to a CSV file
        output_file = "acc_summary_table_cancer_system_detailed.csv"
        try:
            result_df.to_csv(output_file, index=False)
            print(f"\nSummary table has been saved to '{output_file}'.")
        except Exception as e:
            print(f"Error saving the summary table: {e}")
    else:
        print("No valid data to display.")

      Dataset   AFT_XGboost       constant           gru        linear  \
0      cancer  71.70 ± 4.10  63.37 ± 14.93  79.58 ± 1.96  79.55 ± 0.70   
1    detailed  93.91 ± 1.20   67.45 ± 6.42  95.32 ± 0.53  93.60 ± 1.07   
2  systematic  97.37 ± 0.75   56.87 ± 9.87  98.19 ± 0.47  96.81 ± 0.46   

           lstm           mlp          mmit            rnn  
0  77.29 ± 0.52  78.62 ± 0.04  75.97 ± 2.90   75.42 ± 2.12  
1  93.26 ± 0.91  94.95 ± 0.49  93.80 ± 1.47   86.26 ± 4.64  
2  96.64 ± 0.32  97.92 ± 0.80  97.16 ± 0.72  72.87 ± 29.01  

Summary table has been saved to 'acc_summary_table_cancer_system_detailed.csv'.
