# Permutation Significance Testing


## Imports

In [1]:
import pandas as pd
import os
import sys
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import Image
import numpy as np
import datetime as dt

sep = os.path.sep

## Read in and merge data

In [2]:
project_path = f'C:{sep}Users{sep}kyle{sep}UWM{sep}SRP Paper 01-Group - Documents{sep}Analytics{sep}'
validation = pd.read_csv(f'{project_path}FinalModelConsolidatedResults_renamed.csv')
permutations = pd.read_csv(f'{project_path}PermutationTesting_Results_renamed.csv')
test_data = pd.concat([validation, permutations])


In [3]:

# test_data[['split_index','test_accuracy','BIPOLAR|PredictedBIPOLAR_percent','BIPOLAR|PredictedADHD_percent','BIPOLAR|PredictedSCHZ_percent','ADHD|PredictedBIPOLAR_percent','ADHD|PredictedADHD_percent','ADHD|PredictedSCHZ_percent','SCHZ|PredictedBIPOLAR_percent','SCHZ|PredictedADHD_percent','SCHZ|PredictedSCHZ_percent']]
test_data[['test_accuracy','macro avg|precision', 'macro avg|recall', 'macro avg|f1-score']]

Unnamed: 0,test_accuracy,macro avg|precision,macro avg|recall,macro avg|f1-score
0,0.453571,,,
0,0.331081,0.327398,0.328402,0.327820
1,0.317568,0.315620,0.311792,0.312846
2,0.277027,0.257236,0.266111,0.257510
3,0.297297,0.291667,0.291200,0.290989
...,...,...,...,...
9995,0.344595,0.327778,0.325169,0.323939
9996,0.317568,0.304683,0.304005,0.302370
9997,0.418919,0.437063,0.423896,0.411694
9998,0.385135,0.388390,0.410509,0.383098


## Visualized and evaluate results

In [5]:
result_columns = [
  'test_accuracy'#,
  # 'BIPOLAR|PredictedBIPOLAR_percent',
  # 'BIPOLAR|PredictedADHD_percent',
  # 'BIPOLAR|PredictedSCHZ_percent',
  # 'ADHD|PredictedBIPOLAR_percent',
  # 'ADHD|PredictedADHD_percent',
  # 'ADHD|PredictedSCHZ_percent',
  # 'SCHZ|PredictedBIPOLAR_percent',
  # 'SCHZ|PredictedADHD_percent',
  # 'SCHZ|PredictedSCHZ_percent'
  ]

for col in result_columns:
  validation_value = validation[col][0]
  est_p = len(permutations[permutations[col]>validation_value])/len(permutations[col])
  if est_p < .001:
    est_p = "< .001"
  else:
    est_p = "= {:0.3f}".format(est_p)
  print(col, est_p, validation_value)
  fig = arr_plot = make_subplots(
    rows = 1,
    cols = 1,
    row_titles=[""],
    column_titles=[""],
    x_title='Test Accuracy',
    y_title=''
  )
  fig.add_trace(go.Histogram(
      x = permutations[col],
      # histnorm = 'percent',
      name = col,
      nbinsx = 50
    ),
    row = 1,
    col = 1
  )
  fig.add_vline(
    x=validation_value,
    line_color="red",
    line_width =2,
    row = 1,
    col = 1
  )
  fig.add_annotation(
    dict(
      font=dict(
        color='black',
        size=15
      ),
      x=.4,
      showarrow=False,
      text=f'{"{:2.2f}".format(validation_value*100)}%, <i>p<i> {est_p}',
      textangle=0,
      xanchor='right',
      yanchor='top',
      # xref="paper",
      # yref="paper",
      align="right"
    ),
    row = 1,
    col = 1
  )
  fig.update_layout(
    # height=800, 
    # width=1000, 
    title_text=f'',
    showlegend=False,
    font=dict(
        # family="Times New Roman",
        size=14,
        color="Black"
      ),
      template='simple_white',
      paper_bgcolor='White',
      plot_bgcolor='White'
    )
  fig.update_xaxes(range=[0.1,.6], dtick=.1)
  fig.write_html(f'{col}_histogram_01.html')
  fig.update_layout(
    font=dict(
      # family="Times New Roman",
      # size=35,
      color="Black"
    ),
  )
  fig.write_image(f'{col}_histogram_01.png')
  fig.show()
  # fig.show(renderer='notebook')

test_accuracy = 0.002 0.45357135


In [6]:

# Array visuals
diag_list = ['SCZ', 'BD', 'ADHD']

arr_plot = make_subplots(
  rows = 3,
  cols = 3,
  row_titles=diag_list,
  column_titles=diag_list,
  x_title='Predicted',
  y_title='True'
)
ind = 0
ind_b = 0
for i in diag_list:
  for j in diag_list:
    outcome = f'{i}|Predicted{j}_percent'
    validation_value = validation[outcome][0]
    est_p = len(permutations[permutations[outcome]>validation_value])/len(permutations[outcome])
    if est_p < .001:
      est_p = "< .001"
    else:
      est_p = "= {:0.3f}".format(est_p)
    print(outcome, est_p)
    # fig = go.Figure()
    arr_plot.add_trace(
      go.Histogram(
        x = permutations[outcome],
        # histnorm = 'percent',
        name = outcome,
        nbinsx = 50
      ),
      row = diag_list.index(i)+1,
      col = diag_list.index(j)+1
    )
    arr_plot.add_vline(
      x=validation_value,
      line_color="red",
      line_width =2,
      # opacity=.2,
      # annotation_font_color ='Black',
      # annotation_font_size=15,
      # annotation_text=f'{"{:2.2f}".format(validation_value*100)}%, <i>p<i> {est_p}',
      # annotation_position="top right",
      row = diag_list.index(i)+1,
      col = diag_list.index(j)+1
    )
    arr_plot.add_annotation(
      dict(
        font=dict(
          color='black',
          size=14
        ),
        x=1,
        showarrow=False,
        text=f'{"{:2.2f}".format(validation_value*100)}%, <i>p<i> {est_p}',
        textangle=0,
        xanchor='right',
        yanchor='top',
        # xref="paper",
        # yref="paper",
        align="right"
      ),
      row = diag_list.index(i)+1,
      col = diag_list.index(j)+1
    )
    


arr_plot.update_layout(
  # height=800, 
  # width=1000, 
  # title_text=f'Permutation Significance Testing 01',
  showlegend=False,
  font=dict(
    # family="Times New Roman",
    size=14,
    color="Black"
    ),
  template='simple_white',
  paper_bgcolor='White',
  plot_bgcolor='White'
)
arr_plot.update_xaxes(range=[0.0,1.0], dtick=.2)
arr_plot.for_each_annotation(
  lambda a:  a.update(x = 0,textangle=-90, xanchor='right', xshift=-40) if a.x ==.98 else
  a.update(xshift = -60) if a.x ==0 else
  a.update(yanchor = 'bottom', y=1,yshift = 25) if a.text == "Predicted" else()
  # a.update(x = -0.07) if a.text in row_titles else()
)
arr_plot.update_layout(margin=dict(t = 60, l = 120))
arr_plot.write_html('Significance_Results_01.html')
arr_plot.update_layout(
    font=dict(
      # family="Times New Roman",
      size=14,
      color="Black"
      )
  )

arr_plot.write_image('Significance_Results_01.png')
arr_plot.show()

SCZ|PredictedSCZ_percent = 0.033
SCZ|PredictedBD_percent = 0.676
SCZ|PredictedADHD_percent = 0.961
BD|PredictedSCZ_percent = 0.425
BD|PredictedBD_percent = 0.202
BD|PredictedADHD_percent = 0.893
ADHD|PredictedSCZ_percent = 0.972
ADHD|PredictedBD_percent = 0.284
ADHD|PredictedADHD_percent = 0.069


In [9]:

arr_plot.show()