### Imports

In [None]:
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')

### Data

In [None]:
from pathlib import Path
DATA = Path('~/Data/crush-rig-paper1/histo_scoring_data.csv')

In [None]:
df = pd.read_csv(DATA)

In [None]:
df.head()

In [None]:
simple_names = ['id', 'tissue', 'crush', 'weight', 'pressure', 'thickness']
df.columns = simple_names
df.head()

Change patient identifiers to start at 1, shift all by -1.

In [None]:
for i in range(2, 7):
    current_id = f'P{i}'
    new_id = f'P{i - 1}'
    df.loc[df.id == current_id, 'id'] = new_id

In [None]:
control = df.loc[df.crush == 'No', :]
crush = df.loc[df.crush == 'Yes', :]

In [None]:
control = control.rename(columns={'thickness': 'control_thickness'}).drop('crush', axis=1)
crush = crush.rename(columns={'thickness': 'crush_thickness'}).drop('crush', axis=1)

In [None]:
control_group = control.groupby(by=['id', 'weight'])
crush_group = crush.groupby(by=['id', 'weight'])

In [None]:
control_mean = control_group.mean().reset_index()
crush_mean = crush_group.mean().reset_index()

control_mean['control_stdev'] = control_group.std().reset_index().control_thickness
crush_mean['crush_stdev'] = crush_group.std().reset_index().crush_thickness

In [None]:
df = control_mean.merge(crush_mean)
df.head()

In [None]:
df['delta_thickness'] = df.crush_thickness - df.control_thickness
df['percent_delta'] = 100 * df.delta_thickness / df.control_thickness
df.head()

## Stats

In [None]:
from scipy.stats import ttest_ind_from_stats

In [None]:
n = 5
res = ttest_ind_from_stats(df.control_thickness, df.control_stdev, n,
                           df.crush_thickness, df.crush_stdev, n,
                           equal_var=False)

In [None]:
df['two_tailed_pvalue'] = res.pvalue
df['one_tailed_pvalue'] = df.two_tailed_pvalue / 2
df.head()

Note that a one-tailed p-value is more appropriate for this problem because it a compression will only reduce the serosa thickness, not increase it.

In [None]:
df['load_rounded'] = df.pressure.values.round(-1).astype(np.int)
df['percent_delta_abs'] = df.percent_delta.abs()
df.head()

## Average deformation per weight

In [None]:
load_df = df.groupby(by=['pressure']).mean().reset_index()
load_df

In [None]:
plt.figure(figsize=(12, 8))
plt.bar(x=load_df.load_rounded, height=load_df.percent_delta_abs, width=50)
plt.xticks(load_df.load_rounded.values)
plt.ylim([0, 65])
yticks = plt.gca().get_yticks()
plt.gca().set_yticklabels(['{:.0f}%'.format(tick) for tick in yticks])
plt.xlabel('Pressure Load (kPa)')
plt.ylabel('Tissue Deformation')

## Box plot of tissue deformation

In [None]:
df.boxplot(by='load_rounded', column='percent_delta_abs', figsize=(12, 8), whis='range')
plt.ylim([-5, 75])
yticks = plt.gca().get_yticks()
plt.gca().set_yticklabels(['{:.0f}%'.format(tick) for tick in yticks])
plt.xlabel('Pressure Load (kPa)')
plt.ylabel('Tissue Deformation')
plt.title('')
plt.suptitle('')

## Patient deformation per weight

In [None]:
n_patients = 5
c = 'mediumslateblue'
c_special = 'red'

fig, axes = plt.subplots(nrows=n_patients, ncols=1, sharex=True, figsize=(12, 10))
ids = df.id.unique()
for i, ax in enumerate(axes):
    plt.sca(ax)
    specific_df = df[df.id == ids[i]]
    
    # Line plot
    x = specific_df.load_rounded
    y = specific_df.percent_delta_abs
    plt.plot(x, y, c=c)
    plt.xticks(x.values)
    plt.yticks([0, 25, 50, 75])
    plt.ylim([-5, 80])
    yticks = ax.get_yticks()
    ax.set_yticklabels(['{:.0f}%'.format(tick) for tick in yticks])
    plt.ylabel(ids[i])
    if i == 0:
        plt.title('Patient Tissue Deformation')
    if i == n_patients - 1:
        plt.xlabel('Pressure Load (kPa)')
    
    # Marks for insignificant delta
    x = specific_df.loc[specific_df.one_tailed_pvalue >= 0.05,
                        'load_rounded']
    y = specific_df.loc[specific_df.one_tailed_pvalue >= 0.05,
                        'percent_delta_abs']
    plt.scatter(x, y, s=30, marker='o', c=c)
    
    # Marks for significant delta
    x = specific_df.loc[specific_df.one_tailed_pvalue < 0.05,
                        'load_rounded']
    y = specific_df.loc[specific_df.one_tailed_pvalue < 0.05,
                        'percent_delta_abs']
    plt.scatter(x, y, s=90, marker="x", c=c_special, zorder=100)

## Patient average serosa thickness

In [None]:
patient_df = df.groupby(by=['id']).mean().reset_index()
patient_df

In [None]:
plt.figure(figsize=(12, 8))
plt.bar(x=patient_df.id, height=patient_df.control_thickness)
plt.ylim([0, 800])
plt.xlabel('Patient')
plt.ylabel('Average Control Serosa Thickness (\u03BCm)')