In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import os.path as op
import numpy as np
import pandas as pd
from mriqc.viz.misc import (
    raters_variability_plot, plot_abide_stripplots, plot_corrmat, plot_histograms, figure1
)
from pkg_resources import resource_filename as pkgrf
from mriqc.classifier.data import read_dataset, zscore_dataset
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sn
sn.set(style="whitegrid")

data_path = '/home/oesteban/Google Drive/mriqc'

In [None]:
mit_csv = '/home/oesteban/mriqc/mit-satra/T1-mit.csv'
abide_csv = op.join(data_path, 'runs/20170505_0.9.3-2017-04-23-2ba2c2e40c39/T1w.csv')


In [None]:
mit_df = pd.read_csv(mit_csv, index_col=False, dtype={'subject_id': object})
abide_df, pp_cols = read_dataset(abide_csv, op.join(data_path, 'ABIDE_QC_all.csv'), rate_label='rater_1')

In [None]:
mit_df['rater'] = [1] * len(mit_df)
mit_df['site'] = ['MIT'] * len(mit_df)
abide_df['rater'] = [0] * len(abide_df)

del abide_df['rater_1']
mdata = pd.concat([abide_df, mit_df], axis=0)


In [None]:

zscored = zscore_dataset(
        mdata, excl_columns=['rater', 'size_x', 'size_y', 'size_z',
                             'spacing_x', 'spacing_y', 'spacing_z'])

colnames = [col for col in sorted(pp_cols)
            if not (col.startswith('spacing') or col.startswith('summary') or col.startswith('size'))]

nrows = len(colnames)
# palette = ['dodgerblue', 'darkorange']

fig = plt.figure(figsize=(18, 2 * nrows))
gs = GridSpec(nrows, 2, hspace=0.2)

for i, col in enumerate(sorted(colnames)):
    ax_nzs = plt.subplot(gs[i, 0])
    ax_zsd = plt.subplot(gs[i, 1])

    sn.distplot(mdata.loc[(mdata.rater == 0), col], norm_hist=False,
                label='ABIDE', ax=ax_nzs, color='dodgerblue')
    sn.distplot(mdata.loc[(mdata.rater == 1), col], norm_hist=False,
                label='MIT', ax=ax_nzs, color='darkorange')
    ax_nzs.legend()

    sn.distplot(zscored.loc[(zscored.rater == 0), col], norm_hist=False,
                label='ABIDE', ax=ax_zsd, color='dodgerblue')
    sn.distplot(zscored.loc[(zscored.rater == 1), col], norm_hist=False,
                label='MIT', ax=ax_zsd, color='darkorange')

    alldata = mdata[[col]].values.ravel().tolist()
    minv = np.percentile(alldata, 0.2)
    maxv = np.percentile(alldata, 99.8)
    ax_nzs.set_xlim([minv, maxv])

    alldata = zscored[[col]].values.ravel().tolist()
    minv = np.percentile(alldata, 0.2)
    maxv = np.percentile(alldata, 99.8)
    ax_zsd.set_xlim([minv, maxv])
    
    ax_zsd.set_ylabel(col)
fig.savefig('/home/oesteban/tmp/mriqc-ml-tests-2/histograms-mit.svg', format='svg', pad_inches=0, dpi=100)


In [None]:
abide_df, pp_cols = read_dataset(abide_csv, op.join(data_path, 'ABIDE_QC_all.csv'), rate_label='rater_1')

In [None]:
accept = abide_df[abide_df.rater_1 == 0]
exclude = abide_df[abide_df.rater_1 == 1]

In [None]:
mit_df = pd.read_csv(mit_csv, index_col=False, dtype={'subject_id': object})

means = {}
for i, col in enumerate(sorted(colnames)):
    means[col] = np.median(accept[[col]].values)
    mit_copy = mit_df.copy()
    mit_copy[[col]] = [means[col]] * len(mit_copy)
    
    mit_copy.to_csv('/home/oesteban/tmp/mriqc-ml-tests-2/mit_t1_%s.csv' % col, index=False)
    
    bad_m = np.median(exclude[[col]].values)
    print('%s: %f +- %f :: %f +- %f' % (col, means[col], accept[[col]].std(), bad_m, exclude[[col]].std()))

In [None]:
pred = pd.read_csv('/home/oesteban/tmp/mriqc-ml-tests-2/predicted_orig.csv', index_col=False)

In [None]:
for i, col in enumerate(sorted(colnames)):
    pred[col] = pd.read_csv('/home/oesteban/tmp/mriqc-ml-tests-2/predicted_mit_t1_%s.csv' % col).prediction.values

In [None]:
pred.to_csv('/home/oesteban/tmp/mriqc-ml-tests-2/predictions_wrt_iqms.csv', index=False)

In [None]:
pred.describe()