# Dependencies

In [None]:
# For up-to-date mriqc performance:
print('running mriqc_performance.ipynb from plots.ipynb: ')
%run '/Users/rodgersleejg/data/hpc/NNDSP/anal/mriqc_files/analysis_notebooks/mriqc_performance.ipynb'


In [None]:
# For up to date euler:
%run '/Users/rodgersleejg/data/hpc/NNDSP/anal/mriqc_files/analysis_notebooks/euler.ipynb'

In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import subprocess
from datetime import date
import re
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('max_colwidth',500)
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
from importlib import reload



## EDIT THESE VARIABLES
analysis_version = "2017_10_19"
project_dir = Path('/Users/rodgersleejg/data/hpc/NNDSP') # needs to be pathlib.Path object

bids_dir = project_dir.joinpath('bids_2017_07_14')

# conf_script = mriqc_dir.joinpath('conf' + analysis_version + '.sh')

mriqc_dir  = project_dir.joinpath('anal/mriqc_files/other_files')
if not mriqc_dir.exists():
    mriqc_dir.mkdir()
output_folder =  project_dir / 'derivatives' / 'mriqc'
if not output_folder.exists():
    output_folder.mkdir()
classifier_output =  output_folder.joinpath('classifier')
if not classifier_output.exists():
    classifier_output.mkdir()
base_work_dir = output_folder.joinpath('work')
if not base_work_dir.exists():
    base_work_dir.mkdir()
log_dir = mriqc_dir.joinpath('swarm_output_' +  analysis_version)
if not log_dir.exists():
    log_dir.mkdir()
manual_qc = output_folder.joinpath('manual_qc_round_2.tsv')
# swarm_path = mriqc_dir.joinpath('mriqc_' + analysis_version + '.cmd')


df_qc_full_pkl = Path('anal/mriqc_files/other_files/qc_pickle_for_v2_exploration.pklz')
mriqc_with_predictions = Path('derivatives/mriqc/with_mriqc_predictions.csv')
plottable_data = Path('derivatives/mriqc/classifer_plot_data.pklz')
plottable_data_euler = Path('derivatives/mriqc/classifer_plot_data_euler.pklz')


%pwd
%cd {project_dir}
%pwd

# Mriqc

### Plot the ROC curves

The number of scans classified as bad for each manual metric:

In [None]:
# Can load data using
df_roc_mriqc = pd.read_pickle(plottable_data)
df_roc_euler = pd.read_pickle(plottable_data_euler)

## Scans that didn't make it through Freesurfer within 60 hours

+ 0150 r 2
+ 0414 r 1
+ 0936 r1
+ 1998 r 2

In [None]:
 df_roc_mriqc.merge(df_roc_euler,on = ['MASKID','run','manual_qc_type'],how = 'outer',suffixes = ["_mriqc","_euler"],indicator = True).query('_merge.str.contains("only")')

In [None]:
print(len(df_roc_euler.query('manual_qc_type == "MPRAGE"')))
df_roc_euler.head()
df_roc_qc = df_roc_mriqc.merge(df_roc_euler,on = ['MASKID','run','manual_qc_type'],how = 'outer',suffixes = ["_mriqc","_euler"],indicator = True)
print(len(df_roc_qc.query('manual_qc_type == "MPRAGE"')))

In [None]:
df_roc_qc.head()

In [None]:
qc_ver = '_mriqc'
df_roc_qc.groupby('manual_qc_type').sum()['thresholded' + qc_ver]

In [None]:
from sklearn.metrics import roc_auc_score
for m in df_roc_qc.manual_qc_type.unique():
    df = df_roc_qc.query('manual_qc_type == @m')
    area_under_curve = roc_auc_score(df['thresholded' + qc_ver],df['prob_y' + qc_ver],average = None)
    print(m, ': ',area_under_curve)

In [None]:
def wrap_plot(x,y, **kwargs):
    return plt.plot(x,y,**kwargs)

def plot_with_line(x,y, **kwargs):
    return plt.plot(x,y,'-',**kwargs)

def plot_with_marker(x,y, **kwargs):
    return plt.plot(x,y,'o',**kwargs)

### ROC curve (mriqc prediction manual)

In [None]:
qc_ver = '_mriqc'
df = df_roc_qc.query("manual_qc_type == 'MPRAGE'")
good_x = df.loc[(df.MASKID == '1923') & (df.run == 'run-001'),'fpr' + qc_ver].values[0]
good_y = df.loc[(df.MASKID == '1923') & (df.run == 'run-001'),'tpr' + qc_ver].values[0]
bad_x = df.loc[(df.MASKID == '1700') & (df.run == 'run-001'),'fpr' + qc_ver].values[0]
bad_y = df.loc[(df.MASKID == '1700') & (df.run == 'run-001'),'tpr' + qc_ver].values[0]
print(good_x,good_y,bad_x,bad_y)

with sns.plotting_context('notebook',font_scale=1.5):
    # Initialize a grid of plots with an Axes for each walk
    grid = sns.FacetGrid(df.loc[df.manual_qc_type == "MPRAGE",:].sort_values('fpr' + qc_ver), col="manual_qc_type",size = 5,margin_titles=False,)
    grid.map(wrap_plot,x=  [0,1] , y=  [0,1], linestyle ="--")  # random predictions curve
    

    # Draw a line plot to show the trajectory of each random walk
    grid.map(plot_with_line, "fpr" + qc_ver, "tpr" + qc_ver, ms=5)
    grid.map(plot_with_marker,x=  [good_x] , y=  [good_y],marker = "o", markerfacecolor = 'g',markersize = 15)  # random predictions curve
    grid.map(plot_with_marker,x=  [bad_x] , y=  [bad_y],marker = "o", markerfacecolor = 'r',markersize = 15)  # random predictions curve


    #grid.map(plt.plot, "fpr", "tpr", marker="o", ms=1)
    grid.set_axis_labels("False Positive Rate", "True Positive Rate");

    grid.set_titles('mriqc software',size = 30)

grid.savefig('/Users/rodgersleejg/Documents/nih/ohbm_2018_abstract/mriqc_roc.png',figsize=(7, 7))

# grid.fig.tight_layout(w_pad=1)

### Number of scans to assess vs Number of bad scans detected

In [None]:
# sns.set(style="ticks")
# # Initialize a grid of plots with an Axes for each walk
# grid = sns.FacetGrid(df_roc.sort_values('tp'), col="manual_qc_type",size = 5)
# grid.map(wrap_plot,x=  [0,1] , y=  [0,1], linestyle ="--")  # random predictions curve

# # Draw a line plot to show the trajectory of each random walk
# grid.map(plot_with_marker, "tp", "positive", ms=5)
# #grid.map(plt.plot, "fpr", "tpr", marker="o", ms=1)
# grid.set_axis_labels("Bad scans detected", "Number of scans to screen");

# # Adjust the arrangement of the plots
# grid.fig.tight_layout(w_pad=1)

# Euler

### Plot the ROC curves

The number of scans classified as bad for each manual metric:

In [None]:
qc_ver = '_euler'
df_roc_qc.groupby('manual_qc_type').sum()['thresholded' + qc_ver]

In [None]:
from sklearn.metrics import roc_auc_score

for m in manual_metrics:
    df = df_roc_qc.query('(manual_qc_type == @m) & (prob_y_euler.notnull())')
    area_under_curve = roc_auc_score(df['thresholded' + qc_ver],df['prob_y' + qc_ver],average = None)
    print(m, ': ',area_under_curve)

In [None]:
def wrap_plot(x,y, **kwargs):
    return plt.plot(x,y,**kwargs)

def plot_with_line(x,y, **kwargs):
    return plt.plot(x,y,'-',**kwargs)

def plot_with_marker(x,y, **kwargs):
    return plt.plot(x,y,'o',**kwargs)

# ROC curve

In [None]:
from matplotlib import pyplot as plt

In [None]:
qc_ver = '_euler'

df = df_roc_qc.query("(manual_qc_type == 'MPRAGE') & (prob_y_euler.notnull())")

good_x = df.loc[(df.MASKID == '1923') & (df.run == 'run-001'),'fpr' + qc_ver].values[0]
good_y = df.loc[(df.MASKID == '1923') & (df.run == 'run-001'),'tpr' + qc_ver].values[0]
bad_x = df.loc[(df.MASKID == '1700') & (df.run == 'run-001'),'fpr' + qc_ver].values[0]
bad_y = df.loc[(df.MASKID == '1700') & (df.run == 'run-001'),'tpr' + qc_ver].values[0]
print(good_x,good_y,bad_x,bad_y)

with sns.plotting_context('notebook',font_scale=1.5):
    # Initialize a grid of plots with an Axes for each walk
    grid = sns.FacetGrid(df.loc[df.manual_qc_type == "MPRAGE",:].sort_values('fpr' + qc_ver), col="manual_qc_type",size = 5,margin_titles=False,)
    grid.map(wrap_plot,x=  [0,1] , y=  [0,1], linestyle ="--")  # random predictions curve
    

    # Draw a line plot to show the trajectory of each random walk
    grid.map(plot_with_line, "fpr" + qc_ver, "tpr" + qc_ver, ms=5)
    grid.map(plot_with_marker,x=  [good_x] , y=  [good_y],marker = "o", markerfacecolor = 'g',markersize = 15)  # random predictions curve
    grid.map(plot_with_marker,x=  [bad_x] , y=  [bad_y],marker = "o", markerfacecolor = 'r',markersize = 15)  # random predictions curve


    #grid.map(plt.plot, "fpr", "tpr", marker="o", ms=1)
    grid.set_axis_labels("False Positive Rate", "True Positive Rate");

    grid.set_titles('euler number',size = 30)

grid.savefig('/Users/rodgersleejg/Documents/nih/ohbm_2018_abstract/euler_roc.png',figsize=(4,1))



In [None]:
# sns.set(style="ticks")
# # Initialize a grid of plots with an Axes for each walk
# grid = sns.FacetGrid(df_roc_euler.sort_values('tp'), col="manual_qc_type",size = 5)
# grid.map(wrap_plot,x=  [0,1] , y=  [0,1], linestyle ="--")  # random predictions curve

# # Draw a line plot to show the trajectory of each random walk
# grid.map(plot_with_marker, "tp", "positive", ms=5)
# #grid.map(plt.plot, "fpr", "tpr", marker="o", ms=1)
# grid.set_axis_labels("Bad scans detected", "Number of scans to screen");

# # Adjust the arrangement of the plots
# grid.fig.tight_layout(w_pad=1)