In [None]:
# import DiCE
import dice_ml
from dice_ml import Dice
from dice_ml.utils import helpers  # helper functions
import pandas as pd

from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
root_dir = "/your/path"

#entire_parameters = pd.read_excel(f"{root_dir}/data/MB-EP-PA-BG-mpMRI_basic.xlsx")
dataset = pd.read_excel(f"{root_dir}/data/file.xlsx")

#dataset = entire_parameters.drop(["T2_Parenchyma", "FLAIR_Parenchyma","T1_Parenchyma", "T1CE_Parenchyma", "DWI_Parenchyma", "ADC_Parenchyma"], axis=1)
dataset["TUMOR_TYPE"] = dataset["TUMOR_TYPE"].apply(lambda x: 0 if x == "MEDULLOBLASTOMA" else x)
dataset["TUMOR_TYPE"] = dataset["TUMOR_TYPE"].apply(lambda x: 1 if x == "EPENDYMOMA" else x)
dataset["TUMOR_TYPE"] = dataset["TUMOR_TYPE"].apply(lambda x: 2 if x == "PILOCYTIC ASTROCYTOMA" else x)
dataset["TUMOR_TYPE"] = dataset["TUMOR_TYPE"].apply(lambda x: 3 if x == "GLIOMA" else x)

In [None]:
filtered_mb = dataset[dataset["TUMOR_TYPE"] == 0].head(25)
filtered_ep = dataset[dataset["TUMOR_TYPE"] == 1]
filtered_pa = dataset[dataset["TUMOR_TYPE"] == 2].head(25)
filtered_bg = dataset[dataset["TUMOR_TYPE"] == 3].head(25)

filtered_dataset = pd.concat([filtered_mb, filtered_ep, filtered_pa, filtered_bg])

In [None]:
filtered_dataset["TUMOR_TYPE"].value_counts()

In [None]:
selected_data = filtered_dataset
target = filtered_dataset["TUMOR_TYPE"]

# Split data into train and test
datasetX = filtered_dataset.drop("TUMOR_TYPE", axis=1)
# x_train, x_test, y_train, y_test = train_test_split(datasetX,
#                                                     target,
#                                                     test_size=0.25,
#                                                     random_state=42,
#                                                     stratify=target)

x_train, y_train = datasetX, target


# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(steps=[('preprocessor', StandardScaler()),
                      ('classifier', LogisticRegression(random_state=42))])
model = clf.fit(x_train, y_train)

In [None]:
print(y_train.value_counts())

In [None]:
column_list = selected_data.columns[1:]

d = dice_ml.Data(dataframe=selected_data, continuous_features= list(column_list), outcome_name='TUMOR_TYPE')
m = dice_ml.Model(model=model, backend="sklearn")

In [None]:
MB = 0; EP = 1; PA = 2; BG = 3

In [None]:
MB_query_indices = datasetX[target == MB].index
EP_query_indices = datasetX[target == EP].index
PA_query_indices = datasetX[target == PA].index
BG_query_indices = datasetX[target == BG].index

In [None]:
MB_queries = dataset.iloc[MB_query_indices, 1:]
EP_queries = dataset.iloc[EP_query_indices, 1:]
PA_queries = dataset.iloc[PA_query_indices, 1:]
BG_queries = dataset.iloc[BG_query_indices, 1:]

In [None]:
exp = Dice(d, m, method="random")
tumors = [0, 1, 2, 3]
tumor_query_list = [MB_queries, EP_queries, PA_queries, BG_queries]

for tumor_queries in tumor_query_list:
    for idx, row in tumor_queries.iterrows():
        query = pd.DataFrame(columns=row.index, data=[row.values], index=[idx])
        for tumor in tumors:
            e1 = exp.generate_counterfactuals(query, total_CFs=5, desired_class=tumor, 
                                        features_to_vary=['T2_Tumor', 'T2_Ratio', 'FLAIR_Tumor', 'FLAIR_Ratio', 'DWI_Tumor',
                                                        'DWI_Ratio', 'ADC_Tumor', 'ADC_Ratio', 'T1_Tumor', 'T1_Ratio',
                                                        'T1CE_Tumor', 'T1CE_Ratio'], random_seed=42)
            
            e1.visualize_as_dataframe(show_only_changes=True)
            
            dataset["TUMOR_TYPE"] = dataset["TUMOR_TYPE"].apply(lambda x: "MB" if x == 0 else x)
            dataset["TUMOR_TYPE"] = dataset["TUMOR_TYPE"].apply(lambda x: "EP" if x == 1 else x)
            dataset["TUMOR_TYPE"] = dataset["TUMOR_TYPE"].apply(lambda x: "PA" if x == 2 else x)
            dataset["TUMOR_TYPE"] = dataset["TUMOR_TYPE"].apply(lambda x: "BG" if x == 3 else x)
            tumor_type_src = dataset[(dataset.index.values == dataset.index[query.index[0]])]["TUMOR_TYPE"]

            if tumor == 0:
                tumor = 'MB'
            elif tumor == 1:
                tumor = 'EP'
            elif tumor == 2:
                tumor = 'PA'
            else:
                tumor = 'BG'

            json_str = e1.to_json()
            jsonFile = open(f"{root_dir}/data/explanations/patient_{query.index[0]}_{tumor_type_src.values[0]}_to_{tumor}.json", "w")
            jsonFile.write(json_str)
            jsonFile.close()


In [None]:
with open("/Users/toygar/Desktop/CfEM/data/explanations/patient_0_MB_to_EP.json", "r") as f:
    cfe = e1.from_json(f.read())
    print([o.visualize_as_dataframe(show_only_changes=True) for o in cfe.cf_examples_list])