In [1]:
import pandas as pd

from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

In [2]:
biopsy_method = {
    '127': 'gross', '39': 'gross', '128': 'gross',
    '81': 'gross', '106': 'gross', '115': 'gross',
    '48': 'gross', '66': 'punch', '91': 'gross',
    '152': 'gross', '104': 'gross', '86': 'gross',
    '83': 'gross', '53': 'gross', '89': 'fine',
    '110': 'gross', '40': 'gross', '73': 'punch',
    '70': 'fine', '55': 'gross', '125': 'gross',
    '124': 'gross', '95': 'fine', '31': 'punch',
    '96': 'fine'
}

In [3]:
meta = pd.read_csv(
    '/Users/greg/Dropbox (HMS)/Baker_QC_2021/nat_methods_submission/REVISION/figures_tables/'
    'online_figs/TOPACIOTiles/metadata.csv'
)
meta['Subject'] = [i.split('_')[1].lstrip('0') for i in meta['Subject']]

meta = meta[meta['Subject'].isin(biopsy_method.keys())].copy()

meta['BOR'] = [
    'ND' if i in ['Not Done', 'Not Evaluable']
    else 'progression' if i in ['SD', 'PD'] 
    else 'response' if i in ['PR', 'CR'] 
    else i for i in meta['Confirmed BOR']
]

meta['biopsy_method'] = [biopsy_method[i] for i in meta['Subject']]

meta = meta[['Subject', 'biopsy_method', 'BOR']]

In [4]:
counts = pd.read_csv(
    '/Users/greg/Dropbox (HMS)/Baker_QC_2021/nat_methods_submission/REVISION/figures_tables/'
    'online_figs/TOPACIOTiles/counts.csv'
)
counts['sample'] = counts['sample'].astype('str')
counts.rename(columns={'sample': 'Subject'}, inplace=True)
counts = counts[['Subject', 'tile_count']]

In [5]:
res = counts.merge(meta, how='inner', on='Subject')

In [6]:
# perform statistics analysis for biopsy method

fine, gross, punch = res.groupby('biopsy_method')['tile_count']

f_stat, p_value = f_oneway(fine[1].values, gross[1].values, punch[1].values)

print('Biopsy method results:')
print('F-statistic:', f_stat)
print('P-value:', p_value)
print()

all_data = fine[1].values.tolist() + gross[1].values.tolist() + punch[1].values.tolist()
labels = (
    ['fine'] * len(fine[1].values) +
    ['gross'] * len(gross[1].values) +
    ['punch'] * len(punch[1].values)
)

tukey_results = pairwise_tukeyhsd(all_data, labels)

print(tukey_results)
print()

Biopsy method results:
F-statistic: 10.267888428837896
P-value: 0.0007085188841202632

  Multiple Comparison of Means - Tukey HSD, FWER=0.05  
group1 group2 meandiff p-adj    lower    upper   reject
-------------------------------------------------------
  fine  gross 204.8333 0.0145   37.9077  371.759   True
  fine  punch -78.1667 0.6758 -308.8076 152.4742  False
 gross  punch   -283.0 0.0029 -471.3175 -94.6825   True
-------------------------------------------------------



In [7]:
# perform statistics analysis for treament response

nd, prog, resp = res.groupby('BOR')['tile_count']

f_stat, p_value = f_oneway(nd[1].values, prog[1].values, resp[1].values)

print('Treatment response results:')
print('F-statistic:', f_stat)
print('P-value:', p_value)
print()

all_data = nd[1].values.tolist() + prog[1].values.tolist() + resp[1].values.tolist()
labels = (
    ['ND'] * len(nd[1].values) +
    ['progression'] * len(prog[1].values) +
    ['response'] * len(resp[1].values)
)

tukey_results = pairwise_tukeyhsd(all_data, labels)

print(tukey_results)

Treatment response results:
F-statistic: 0.4047792432572477
P-value: 0.6719915635694808

       Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1      group2   meandiff p-adj    lower    upper   reject
-----------------------------------------------------------------
         ND progression -76.2667 0.6462 -289.2183  136.685  False
         ND    response    -58.2 0.8422 -319.0115 202.6115  False
progression    response  18.0667 0.9753  -194.885 231.0183  False
-----------------------------------------------------------------
