In [49]:
import pandas as pd
import numpy as np
import os

root_dir = "/your/path/counterfactuals"
original_class = ["MB", "EP", "PA", "BG"]
desired_class = ["MB", "EP", "PA", "BG"]
src = f"{root_dir}/formatted_cfe_from_json"
dest = f"{root_dir}/statistics_of_cfes"

In [50]:
def create_dir(write_path, write_file):
    try:
        os.mkdir(write_path)
    except OSError as error: 
        pass 
    
    try:
        os.mkdir(write_file)
    except OSError as error: 
        pass 

In [51]:
for original in original_class:
    create_dir(dest, f'{dest}/{original}')
    for desired in desired_class:
        read_file = f'{src}/{original}/{original}_to_{desired}_merged_data.xlsx'
        write_file = f'{dest}/{original}/{original}_to_{desired}.xlsx'

        # Read the Excel file
        df = pd.read_excel(read_file, header=0)

        # Define the columns of interest
        columns_of_interest = ['T2_Tumor', 'T2_Parenchyma', 'T2_Ratio', 'FLAIR_Tumor', 'FLAIR_Parenchyma',
                                'FLAIR_Ratio', 'DWI_Tumor', 'DWI_Parenchyma', 'DWI_Ratio', 'ADC_Tumor',
                                'ADC_Parenchyma', 'ADC_Ratio', 'T1_Tumor', 'T1_Parenchyma', 'T1_Ratio',
                                    'T1CE_Tumor', 'T1CE_Parenchyma', 'T1CE_Ratio', 'TUMOR_TYPE', 'Baseline']

        # Create a new column to track the baseline rows
        df['Baseline'] = False

        # Create a dictionary to store the change counts for each column
        change_counts = {column: 0 for column in columns_of_interest}

        # Initialize the pattern count
        pattern_count = 0

        # Iterate over the rows
        for i, row in df.iterrows():
            # Check if the row is a baseline row
            if i % 6 == 0:
                baseline_row = row
                df.at[i, 'Baseline'] = True
                pattern_count += 1
            else:
                # Check if the row is the same as the baseline row
                for column in df.columns:
                    if row[column] == baseline_row[column]:
                        df.at[i, column] = '-'
                    if np.abs(row[column] - baseline_row[column]) < 0.01:
                        df.at[i, column] = '-'
                    if baseline_row[column] > 200:
                        if np.abs(row[column] - baseline_row[column]) < 2:
                            df.at[i, column] = '-'
                    if baseline_row[column] > 50:
                        if np.abs(row[column] - baseline_row[column]) < 1:
                            df.at[i, column] = '-'
                    
                    if df.at[i, column] != '-':
                            change_counts[column] += 1

        # Save the updated data to a new Excel file
        df.to_excel(write_file, index=False)

        # Create a list of tuples with column name and change count
        change_counts_list = [(column, count) for column, count in change_counts.items()]

        # Remove the last 2 tuples from the list
        change_counts_list = change_counts_list[:-2]

        # Sort the list based on change count in descending order
        change_counts_list.sort(key=lambda x: x[1], reverse=True)

        # Print the change counts for each column
        for column, count in change_counts_list:
            print(f"{column}: {count} changes")

        # Define the file path
        file_path = f'{dest}/{original}/{original}_to_{desired}_summary.txt'

        # Open the file in write mode
        with open(file_path, 'w') as file:
            # Write the pattern count
            file.write(f"Number of patients: {pattern_count}\n")
            file.write(f"Number of counterfactuals: {pattern_count * 5}\n\n")

            # Write the change counts for each column
            for column, count in change_counts_list:
                file.write(f"{column}: {count} changes\n")

            # Write the max counted 3 features
            file.write("\nMax counted 3 features:\n")
            for column, count in change_counts_list[:3]:
                file.write(f"{column}: {count} changes\n")



DWI_Tumor: 22 changes
T1CE_Ratio: 21 changes
T1_Ratio: 18 changes
T1_Tumor: 17 changes
FLAIR_Ratio: 15 changes
DWI_Ratio: 15 changes
T2_Tumor: 14 changes
ADC_Ratio: 14 changes
T2_Ratio: 13 changes
ADC_Tumor: 11 changes
T1CE_Tumor: 10 changes
FLAIR_Tumor: 4 changes
T2_Parenchyma: 0 changes
FLAIR_Parenchyma: 0 changes
DWI_Parenchyma: 0 changes
ADC_Parenchyma: 0 changes
T1_Parenchyma: 0 changes
T1CE_Parenchyma: 0 changes
FLAIR_Tumor: 71 changes
ADC_Tumor: 33 changes
ADC_Ratio: 29 changes
DWI_Ratio: 18 changes
FLAIR_Ratio: 17 changes
DWI_Tumor: 12 changes
T1_Tumor: 10 changes
T1CE_Ratio: 7 changes
T2_Ratio: 6 changes
T1_Ratio: 6 changes
T1CE_Tumor: 6 changes
T2_Tumor: 3 changes
T2_Parenchyma: 0 changes
FLAIR_Parenchyma: 0 changes
DWI_Parenchyma: 0 changes
ADC_Parenchyma: 0 changes
T1_Parenchyma: 0 changes
T1CE_Parenchyma: 0 changes
T2_Ratio: 87 changes
T2_Tumor: 55 changes
ADC_Tumor: 43 changes
ADC_Ratio: 33 changes
FLAIR_Ratio: 18 changes
T1CE_Ratio: 12 changes
FLAIR_Tumor: 9 changes
T1CE

____

In [None]:
# # Print the pattern count
# print(f"Number of 5-row patterns: {pattern_count}")
# print(f"Number of counterfactuals: {pattern_count * 5}")

# # Create a list of tuples with column name and change count
# change_counts_list = [(column, count) for column, count in change_counts.items()]

# # Remove the last 2 tuples from the list
# change_counts_list = change_counts_list[:-2]

# # Sort the list based on change count in descending order
# change_counts_list.sort(key=lambda x: x[1], reverse=True)

# # Print the change counts for each column
# for column, count in change_counts_list:
#     print(f"{column}: {count} changes")

# # Define the file path
# file_path = f'{root_dir}data/updated_data/{original_class}/{original_class}_to_{desired_class}_summary.txt'

# # Open the file in write mode
# with open(file_path, 'w') as file:
#     # Write the pattern count
#     file.write(f"Number of patients: {pattern_count}\n")
#     file.write(f"Number of counterfactuals: {pattern_count * 5}\n\n")

#     # Write the change counts for each column
#     for column, count in change_counts_list:
#         file.write(f"{column}: {count} changes\n")

#     # Write the max counted 3 features
#     file.write("\nMax counted 3 features:\n")
#     for column, count in change_counts_list[:3]:
#         file.write(f"{column}: {count} changes\n")


Number of 5-row patterns: 25
Number of counterfactuals: 125
T2_Ratio: 95 changes
T1CE_Tumor: 66 changes
T1CE_Ratio: 54 changes
T2_Tumor: 15 changes
ADC_Tumor: 13 changes
T1_Ratio: 13 changes
FLAIR_Tumor: 12 changes
DWI_Ratio: 12 changes
DWI_Tumor: 11 changes
T1_Tumor: 9 changes
FLAIR_Ratio: 8 changes
ADC_Ratio: 8 changes
T2_Parenchyma: 3 changes
FLAIR_Parenchyma: 3 changes
DWI_Parenchyma: 3 changes
ADC_Parenchyma: 3 changes
T1_Parenchyma: 3 changes
T1CE_Parenchyma: 3 changes
