In [None]:
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

# User Study for multi-cohort MSI in CRC transformer paper

The notebook evaluates the results of the pathologist evaluation of high attention patches of the multi-cohort model in the paper.

Data properties:
* 40 patients from the YCR-BCIP cohort
* 20 patients with MSI-H ground truth, 20 patients with MSS ground truth
* for each group 10 patients with the lowest classification scores and 10 patients with the highest classification scores were selected
* 4 patches of the top100 highest scored attention patches per patient
* 2 with the highest classification scores, 2 with the lowest classification scores

In [None]:
# path to excel file with expert evaluations
path = Path('evaluations_user_study.xlsx')
df = pd.read_excel(path, sheet_name='evaluation_expert1')
df2 = pd.read_excel(path, sheet_name='evaluation_expert2')

In [None]:
# merge every 5 rows per patient to one row per patient
categories = []
for i in range(4):
    categories.extend(list(df[f'tile {i}'].dropna().unique()))
    categories.extend(list(df2[f'tile {i}'].dropna().unique()))
categories = list(set(categories))
categories.sort()

In [None]:
# ignore categories with max 2 occurences
categories.pop(categories.index('goblet cells'))
categories.pop(categories.index('signet ring cells'))

In [None]:
# merge with classification scores of the model's predictions
scores = pd.read_excel(path, sheet_name='selected patients_10_low_high')
df = pd.merge(df, scores, on='FILENAME')
df2 = pd.merge(df2, scores, on='FILENAME')

### Check the frequency of patterns for the different cases

In [None]:
%%capture
# tile 0 and 1 with high classification scores
eval_df, eval_df2 = {}, {}
for c in categories:
    eval_df[c] = [0] * 4
    eval_df2[c] = [0] * 4
for gt in range(2):
    for hl in range(2):
        for tile in range(2):
            counts = df[df['ground_truth'] == gt][df['high_low'] == hl][f'tile {tile}'].value_counts()
            counts2 = df2[df2['ground_truth'] == gt][df2['high_low'] == hl][f'tile {tile}'].value_counts()

            for c in counts.keys():
                try:
                    eval_df[c][2 * gt + hl] += counts[c]
                except KeyError:
                    continue            
            for c in counts2.keys():
                try:
                    eval_df2[c][2 * gt + hl] += counts2[c]
                except KeyError:
                    continue
eval_df = pd.DataFrame(eval_df, index=['TN', 'FP', 'FN', 'TP']).T
eval_df2 = pd.DataFrame(eval_df2, index=['TN', 'FP', 'FN', 'TP']).T

In [None]:
df.keys(
)

In [None]:
eval_df, eval_df2

In [None]:
%%capture
# tile 0 and 1 with high classification scores
eval_df, eval_df2 = {}, {}
for c in categories:
    eval_df[c] = [0] * 4
    eval_df2[c] = [0] * 4
for gt in range(2):
    for hl in range(2):
        for tile in range(2):
            try:
                counts = df[df['ground_truth'] == gt][df['high_low'] == hl][f'tile {tile + 2}'].value_counts()
            except KeyError:
                continue
            try:
                counts2 = df2[df2['ground_truth'] == gt][df2['high_low'] == hl][f'tile {tile + 2}'].value_counts()
            except KeyError:
                continue
            for c in counts.keys():
                try:
                    eval_df[c][2 * gt + hl] += counts[c]
                except KeyError:
                    continue            
            for c in counts2.keys():
                try:
                    eval_df2[c][2 * gt + hl] += counts2[c]
                except KeyError:
                    continue
eval_df = pd.DataFrame(eval_df, index=['TN', 'FP', 'FN', 'TP']).T
eval_df2 = pd.DataFrame(eval_df2, index=['TN', 'FP', 'FN', 'TP']).T

In [None]:
eval_df, eval_df2

In [None]:
%%capture
# tile 0 and 1 with high classification scores
eval_df, eval_df2 = {}, {}
for c in categories:
    eval_df[c] = [0] * 4
    eval_df2[c] = [0] * 4
for gt in range(2):
    for hl in range(2):
        for tile in range(4):
            try:
                counts = df[df['ground_truth'] == gt][df['high_low'] == hl][f'tile {tile}'].value_counts()
            except KeyError:
                continue
            try:
                counts2 = df2[df2['ground_truth'] == gt][df2['high_low'] == hl][f'tile {tile}'].value_counts()
            except KeyError:
                continue
            for c in counts.keys():
                try:
                    eval_df[c][2 * gt + hl] += counts[c]
                except KeyError:
                    continue            
            for c in counts2.keys():
                try:
                    eval_df2[c][2 * gt + hl] += counts2[c]
                except KeyError:
                    continue
eval_df = pd.DataFrame(eval_df, index=['TN', 'FP', 'FN', 'TP']).T
eval_df2 = pd.DataFrame(eval_df2, index=['TN', 'FP', 'FN', 'TP']).T

In [None]:
eval_df, eval_df2

In [None]:
%%capture
# all tiles with high attention scores
eval_df, eval_df2 = {}, {}
for c in categories:
    eval_df[c] = [0] * 2
    eval_df2[c] = [0] * 2
for tile in range(4):
    counts = df[f'tile {tile}'].value_counts()
    counts2 = df2[f'tile {tile}'].value_counts()
    for c in counts.keys():
        try:
            eval_df[c][tile // 2] += counts[c]
        except KeyError:
            pass    
    for c in counts2.keys():
        try:
            eval_df2[c][tile // 2] += counts2[c]
        except KeyError:
            pass
eval_df = pd.DataFrame(eval_df, index=['high', 'low']).T
eval_df2 = pd.DataFrame(eval_df2, index=['high', 'low']).T

In [None]:
eval_df, eval_df2

In [None]:
results = {}
for hl in ['high', 'low']:
    results[hl] = {
        'mean': np.mean([eval_df[hl].values, eval_df2[hl].values], axis=0),
        'std': np.std([eval_df[hl].values, eval_df2[hl].values], axis=0)
    }

In [None]:
results

### Calculate statistisc whether patches with high and low classification scores follow the same distribution

In [None]:
from scipy.stats import chi2_contingency

# Create a contingency table from the two columns
observed = np.array([results['high']['mean'], results['low']['mean']])

# Calculate chi-square test and obtain the p-value
chi2, p_value, _, _ = chi2_contingency(observed)

print("Chi-squared statistic:", chi2)
print("P-value:", p_value)


### Plot the frequency per tissue category

In [None]:
eval_df = eval_df.sort_values(by=['high'], ascending=False)

In [None]:
eval_df

In [None]:
ind = results['high']['mean'].argsort()[::-1]

In [None]:
case = 'high'
n = len(results[case]['mean'])
fig, ax = plt.subplots(figsize=(16, 2))
bars = ax.bar(np.arange(n), results[case]['mean'][ind] / 80, yerr=results[case]['std'][ind] / 80, width=0.6, color=colors)
labels = [f"{(results[case]['mean'][ind][i] / 80):.2f}" for i in range(n)]
ax.bar_label(bars, labels=labels, label_type='edge')
plt.axis('off')
# plt.savefig(figure_path / f'bar_{case}_error_bars.svg',  format='svg', bbox_inches='tight', pad_inches=0)

In [None]:
case = 'low'
n = len(results[case]['mean'])
fig, ax = plt.subplots(figsize=(16, 2))
bars = ax.bar(np.arange(n), -results[case]['mean'][ind] / 80, yerr=results[case]['std'][ind] / 80, width=0.6, color=colors)
labels = [f"{(results[case]['mean'][ind][i] / 80):.2f}" for i in range(n)]
ax.bar_label(bars, labels=labels, label_type='edge')
plt.axis('off')
# plt.savefig(figure_path / f'bar_{case}_error_bars.svg',  format='svg', bbox_inches='tight', pad_inches=0)

In [None]:
for i in range(n):
    print(f'{i}, {categories[ind[i]]}')