# Import packages

In [111]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import shap
from pathlib import Path

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.metrics import roc_curve, auc, roc_auc_score, confusion_matrix
from scipy.stats import shapiro
from matplotlib import rcParams
from cycler import cycler
from sklearn.inspection import permutation_importance

rcParams.update({'figure.autolayout': True})

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

Path('../results').mkdir(exist_ok=True)
linestyle_cycler =  [
     'solid','dotted', 'dashed','dashdot',
     (0, (1, 1)),(0, (1, 1))]
plt.rc('axes', prop_cycle=(cycler('color', ['r', 'g', 'b', 'y', 'c', 'k']) + cycler('linestyle', linestyle_cycler)))

## Load lipid data

In [112]:
lipid_data_df = pd.read_csv(f'all_data.csv')

In [113]:
lipid_data_df

Unnamed: 0,Date,Blood Type,Person,Cer d18:2_16:0,Cer d18:1_18:0,Cer d18:1_18:1,Cer d18:1_16:0,Cer d18:1_20:0,Cer d18:1_22:0,Cer d18:1_24:0,Cer d18:1_24:1,Cer d40:2,Cer d41:1,Cer d41:2,Cer d43:1,Cer d44:1,Cer d44:2,Hex2Cer 32:1;2O,Hex2Cer 34:1;2O,Hex2Cer 34:2;2O,Hex2Cer 42:2;2O,Hex2Cer 42:3;2O,Hex3Cer 34:1;2O,HexCer 14:2;2O/28:0,HexCer 41:1;2O,HexCer 42:1;2O,LPA 17:0,LPC 14:0 SN2,LPC 14:0 SN1,LPC 15:0 SN2,LPC 15:0 SN1,LPC 16:0 SN2,LPC 16:0 SN1,LPC 16:1 SN1,LPC 16:1 SN2,LPC 17:0 SN2,LPC 17:0 SN1,LPC 18:0 SN1,LPC 18:1 SN1,LPC 18:1 SN2,LPC 18:2 SN1,LPC 18:2 SN2,LPC 18:3,LPC 19:0 SN2,LPC 20:0 SN2,LPC 20:1 SN1,LPC 20:2 SN2,LPC 20:3 SN1,LPC 20:4 SN2,LPC 20:4 SN1,LPC 20:5 SN2,LPC 20:5 SN1,LPC 22:0,LPC 22:5 SN1,LPC 22:6 SN2,LPC 24:0,LPC O-18:0,LPC O-18:1,LPC O-20:0,LPC O-22:0,LPC O-24:1,LPE 16:0 SN1,LPE 16:0 SN2,LPE 18:0,LPE 18:0 SN2,LPE 18:1 SN2,LPE 18:1 SN1,LPE 18:2 SN2,LPE 18:2 SN1,LPE 20:4 SN2,LPE 20:4 SN1,LPE 20:5,LPE 22:4,LPE 22:6,LPE O-16:1,LPE O-18:1,LPI 18:0,LPI 20:4,PA 16:0_18:1,PA 16:0_18:2,PC 12:0_16:0,PC 14:0_16:1,PC 14:0_18:2,PC 14:0_20:4,PC 14:0_22:6,PC 15:0_16:0,PC 15:0_18:2,PC 15:0_20:3,PC 15:0_20:4,PC 16:0_14:0,PC 16:0_16:0,PC 16:0_17:0,PC 16:0_18:1,PC 16:0_18:2,PC 16:0_18:3,PC 16:0_20:1,PC 16:0_22:6,PC 16:1_20:4,PC 16:1_22:6,PC 17:0_18:1,PC 17:0_18:2,PC 17:0_20:4,PC 17:0_22:6,PC 17:1_20:4,PC 18:0_18:1,PC 18:0_18:2,PC 18:0_20:1,PC 18:0_20:4,PC 18:0_22:4,PC 18:0_22:6,PC 18:0_24:4,PC 18:1_14:0,PC 18:1_18:2,PC 18:1_20:4,PC 18:1_22:6,PC 18:2_18:2 Cis,PC 18:2_20:4,PC 18:3_20:4,PC 19:0_18:1,PC 19:0_18:2,PC 20:1_18:1,PC 20:1_18:2,PC 20:1_20:4,PC 20:2_20:4,PC 20:4_20:4 Cis,PC 22:6_20:4,PC O-16:0_16:0,PC O-16:0_20:3,PC O-16:0_20:4,PC O-16:0_20:5,PC O-16:1_16:0,PC O-16:1_18:2,PC O-16:1_20:4,PC O-16:1_22:6,PC O-18:0_18:1,PC O-18:0_18:2,PC O-18:0_20:4,PC O-18:0_22:6,PC O-18:1_16:0,PC O-18:1_20:4,PC O-18:1_20:5,PC O-18:1_22:6,PC O-18:2_22:4,PC O-20:0_20:4,PC O-20:1_20:4,PC O-22:0_20:4,PC O-22:1_20:4,PC O-22:1_22:6,PC O-22:2_20:4,PC O-24:1_20:4,PE 16:0_16:0,PE 16:0_16:1,PE 16:0_18:1,PE 16:0_18:2,PE 16:0_20:4,PE 16:0_20:5,PE 16:0_22:6,PE 18:0_18:1,PE 18:0_20:4,PE 18:0_22:6,PE 18:1_18:1 Cis,PE 18:1_18:2,PE 18:2_18:2,PE 18:2_20:4,PE 18:3_18:3,PE O-16:1_20:4,PE O-16:1_22:6,PE O-17:1_20:4,PE O-17:1_22:6,PE O-18:1_18:1,PE O-18:1_20:4,PE O-18:2_22:6,PE O-20:0_18:2,PE O-20:1_18:2,PE O-20:1_20:4,PG 16:0_18:2,PG 18:0_20:4,PG 18:1_18:1 Cis,PI 16:0_18:1,PI 16:0_18:2,PI 16:0_20:4,PI 16:0_22:5,PI 18:0_18:2,PI 18:0_20:3,PI 18:0_20:4,PI 18:0_22:4,PI 18:1_18:2,PS 16:0_16:0,PS 16:0_18:1,PS 18:0_18:0,PS 18:0_18:1,PS 18:0_18:2,PS 18:0_20:4,PS 18:0_22:6,SM 34:0;3O,SM 34:1;3O,SM 34:2;3O,SM d18:1_16:0,SM d18:1_18:0,SM d18:1_18:1,SM d18:1_24:0,SM d18:1_24:1,SM d24:2_14:0,SM d32:0,SM d32:1,SM d32:2,SM d33:1,SM d33:2,SM d34:0,SM d34:2,SM d35:1,SM d35:2,SM d36:0,SM d36:3,SM d38:1,SM d38:3,SM d39:1,SM d40:1,SM d40:2,SM d40:3,SM d41:1,SM d41:2,SM d41:3,SM d44:2,SM d44:3,Cer d18:1_17:0,Cer d43:2,HexCer 18:1;2O/16:0,LPC 17:0,LPC 19:0 SN1,LPC 20:0 SN1,LPC 20:2 SN1,LPC 20:5,LPC 22:4,LPC O-16:0,PA 16:0_20:4,PC 15:0_18:1,PC 16:0_20:3,PC 16:0_20:4,PC 16:0_20:5,PC 16:0_22:4,PC 16:0_22:5,PC 18:0_20:2,PC 18:0_20:3,PC 18:1_18:1 Cis,PC 18:1_22:0,PC 20:0_20:0,PC 22:0_18:2,PC O-16:0_16:1,PC O-16:0_18:1,PC O-20:0_22:6,PC O-20:1_22:4,PC O-20:2_20:4,PC O-22:0_18:2,PC O-22:2_18:2,PE O-16:1_18:1,PE O-16:1_22:4,PS 16:0_20:4,PS 18:2_18:2,SM d38:2,LPC 18:0 SN2,PC 16:0_16:1,PC 18:1_22:4,PE O-18:1_20:3,PE O-18:1_22:6,PE O-20:0_22:6,PG 16:0_18:1,PI 16:0_16:1,PI 18:0_18:1,PI 18:1_20:4,LPC 20:3 SN2,LPC 22:6 SN1,LPC O-22:1,PC 18:0_16:0,PC 18:0_22:5,PC 19:0_20:4,PC O-18:0_22:4,PC O-18:2_20:4,PC O-24:2_20:4,PE O-18:3_20:4,PI 18:0_22:5
0,210625,Plasma,1,0.05,0.27,0.08,0.52,0.82,3.37,9.62,0.88,0.84,4.21,0.84,1.15,0.21,0.08,0.19,1.49,0.0,0.26,0.06,0.17,0.7,0.88,2.22,0.04,0.37,8.37,0.78,4.23,34.14,404.07,5.52,1.03,0.73,5.58,175.02,124.43,11.1,158.38,17.16,1.1,0.64,0.79,1.21,0.78,2.88,5.6,28.41,0.77,2.65,0.29,1.21,2.45,0.55,1.01,1.27,0.27,0.35,0.49,2.09,0.3,2.47,0.3,0.21,2.82,0.91,5.61,1.34,3.91,0.17,0.04,2.3,0.22,0.31,0.34,0.38,0.15,0.06,9.08,3.16,27.22,9.78,4.44,2.6,12.85,3.24,4.4,19.45,41.6,2.09,133.44,1258.59,25.49,8.86,233.84,95.23,1.11,9.84,24.26,13.39,6.11,2.0,160.13,835.0,1.62,368.38,4.74,95.31,0.72,51.52,412.39,113.59,12.86,51.45,22.49,2.4,0.75,2.74,10.28,171.95,20.55,3.9,1.91,0.57,9.7,9.66,16.91,1.09,3.85,16.8,18.91,4.76,1.22,4.11,13.52,2.54,11.9,22.88,1.25,8.97,2.26,2.26,3.38,1.43,2.42,0.52,1.08,3.8,0.0,0.06,0.8,1.3,0.07,0.08,1.8,1.01,7.38,1.35,4.68,0.79,1.73,0.08,0.02,4.55,2.41,0.21,0.1,0.57,8.27,1.19,0.11,0.3,0.52,0.04,0.0,0.1,1.44,1.43,2.27,0.64,3.83,2.13,29.32,0.09,0.55,0.1,1.74,46.68,0.12,1.25,4.07,1.59,0.16,0.41,0.1,119.2,20.48,10.58,28.85,60.11,7.21,0.25,21.85,1.2,7.18,0.13,3.07,25.68,2.86,0.32,0.37,0.44,25.34,0.13,10.22,51.7,37.12,0.75,20.62,14.5,0.55,0.19,0.2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,210625,Serum,1,0.05,0.3,0.08,0.58,0.84,4.08,11.54,0.95,0.95,4.86,0.99,1.27,0.2,0.07,0.21,1.71,0.14,0.28,0.07,0.2,0.82,0.98,2.74,0.0,0.84,4.01,0.38,2.06,17.52,182.69,2.53,0.38,0.31,2.65,85.28,52.91,4.85,73.37,8.22,0.51,0.19,0.3,0.49,0.33,1.03,2.26,13.96,0.24,1.23,0.14,0.55,1.02,0.22,0.58,0.75,0.11,0.15,0.26,3.23,0.42,4.15,0.41,0.65,4.53,1.2,7.75,1.57,5.85,0.17,0.15,3.65,0.35,0.37,0.33,0.5,0.0,0.05,5.16,1.85,14.5,5.7,2.66,1.52,7.68,1.72,2.5,11.09,22.11,1.22,139.65,780.72,15.04,5.24,158.62,54.34,0.39,5.64,14.47,8.04,2.77,1.15,90.69,561.39,0.81,200.38,2.76,47.28,0.44,29.89,222.67,61.37,6.49,27.75,13.1,1.11,0.41,1.62,6.09,100.75,12.5,2.6,1.23,0.39,1.88,4.58,10.42,0.84,2.15,10.43,11.14,2.85,0.96,3.98,7.21,2.58,6.65,12.35,0.92,2.26,1.23,1.09,2.08,0.76,1.55,0.31,0.59,2.49,0.0,0.05,1.09,1.91,0.09,0.12,2.7,1.25,10.15,1.9,6.38,1.08,2.4,0.13,0.04,6.04,3.41,0.31,0.14,0.81,11.64,1.89,0.13,0.4,0.68,0.04,0.0,0.12,2.45,2.63,4.03,1.06,5.91,3.51,48.06,0.19,0.9,0.11,0.0,5.14,0.16,0.01,0.11,0.08,0.11,0.27,0.07,81.31,12.77,7.92,20.13,43.07,4.91,0.19,16.1,0.81,4.79,0.1,2.11,17.46,1.82,0.21,0.3,0.26,17.09,0.1,6.44,33.5,27.53,0.58,16.12,10.41,0.38,0.13,0.14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,210625,Blood EDTA - Serum,1,0.07,0.82,0.11,1.71,0.88,4.12,11.9,0.75,1.01,2.21,0.48,0.54,0.41,0.41,0.2,3.03,0.08,0.96,0.08,0.19,0.56,0.42,2.41,0.0,0.66,2.49,0.3,1.35,15.89,143.58,1.48,0.26,0.22,1.92,65.83,23.85,8.82,23.59,10.15,0.16,0.2,0.34,0.35,0.16,1.07,3.42,3.7,0.17,0.33,0.12,0.22,0.81,0.18,0.36,0.43,0.1,0.12,0.14,2.76,2.21,3.09,0.23,13.15,2.3,3.57,2.51,18.87,2.02,1.04,4.17,0.74,0.22,0.57,0.66,0.15,3.82,1.75,4.41,1.17,6.34,1.43,0.55,1.76,3.82,0.74,0.62,10.71,66.95,1.73,161.75,471.69,5.32,21.24,37.35,11.04,0.0,6.18,8.03,2.21,0.61,0.96,116.36,295.06,1.09,59.02,0.7,13.03,0.2,17.95,97.53,13.11,0.95,8.71,2.11,0.24,0.46,0.74,2.71,25.54,3.34,0.67,0.19,0.11,3.49,1.8,4.49,0.2,4.66,3.78,3.36,0.55,2.32,1.62,2.44,0.34,8.77,2.73,0.77,0.43,0.24,0.38,0.6,0.27,0.25,0.13,0.18,0.44,1.42,0.59,75.68,11.23,0.42,0.52,5.16,20.2,20.97,1.67,21.5,10.11,16.02,0.65,0.02,10.39,2.33,0.41,0.09,6.75,33.51,0.89,0.23,0.23,1.19,0.06,0.14,0.21,1.48,1.33,1.58,0.39,3.36,1.57,20.18,0.13,0.37,0.0,1.83,4.09,20.63,5.16,62.22,18.97,0.18,0.43,0.07,119.9,22.87,8.72,64.33,73.5,4.54,0.24,17.72,0.7,6.73,0.05,4.19,15.04,2.31,0.33,0.44,0.18,18.48,0.06,4.84,54.56,20.59,0.35,13.16,5.12,0.29,1.41,0.75,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,210625,DBS - Venous,1,0.07,0.42,0.06,1.08,0.43,3.09,5.17,1.12,0.55,1.18,0.27,0.31,0.22,0.17,0.11,1.66,0.07,0.54,0.06,0.11,0.62,0.2,0.48,0.0,0.29,1.04,0.23,0.7,6.66,61.82,0.74,0.35,0.28,0.95,25.12,12.25,1.7,15.88,2.43,0.17,0.12,0.17,0.2,0.14,0.57,1.09,3.0,0.2,0.3,0.0,0.16,0.17,0.11,0.21,0.31,0.09,0.12,0.12,1.5,0.3,2.06,0.23,0.82,1.49,0.62,1.85,2.82,1.73,0.27,0.66,0.98,0.64,1.07,0.57,0.33,5.75,4.03,1.19,0.31,3.92,1.31,0.42,0.56,2.21,0.57,0.57,3.53,17.81,0.6,64.09,298.49,4.53,7.23,38.0,10.95,0.26,2.64,4.71,1.9,0.59,0.49,45.81,177.8,0.46,54.9,0.66,11.11,0.29,7.91,74.01,14.37,1.59,6.58,2.6,0.26,0.19,0.41,1.59,18.06,3.19,0.9,0.36,0.09,1.1,0.61,5.53,0.4,0.97,3.18,3.26,0.65,0.65,0.88,1.99,0.37,3.37,3.36,0.09,0.54,0.7,0.36,0.51,0.12,0.28,0.16,0.18,0.44,0.39,0.26,27.21,6.18,0.42,0.64,5.06,7.2,20.37,1.63,11.63,8.4,14.25,1.03,0.03,12.75,3.66,0.46,0.1,4.96,51.47,2.48,0.15,0.18,2.04,0.08,0.07,0.06,0.77,0.66,0.59,0.44,1.8,1.15,13.0,0.14,0.3,0.1,0.87,2.39,7.51,2.92,56.7,19.51,0.07,0.28,0.07,103.88,13.28,7.95,49.52,69.99,3.93,0.2,12.45,0.57,4.98,0.14,2.98,11.58,1.37,0.17,0.31,0.18,9.19,0.14,2.7,35.03,12.92,0.45,7.21,4.11,0.36,0.94,0.63,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,210625,DBS - Finger,1,0.06,0.32,0.05,0.89,0.37,2.23,4.0,1.05,0.47,0.88,0.21,0.23,0.18,0.16,0.1,1.43,0.0,0.41,0.05,0.08,0.31,0.17,0.65,0.0,0.38,1.17,0.35,0.62,6.64,58.04,0.71,0.3,0.26,1.04,24.91,11.93,1.94,15.0,2.74,0.16,0.23,0.19,0.28,0.23,0.34,0.71,3.0,0.16,0.15,0.07,0.19,0.33,0.12,0.38,0.35,0.0,0.08,0.13,1.57,0.36,1.94,0.29,1.01,1.5,0.68,1.7,3.42,1.78,0.33,0.83,0.96,0.48,0.86,0.51,0.65,4.27,3.52,1.03,0.5,3.46,1.04,0.36,0.56,1.93,0.47,0.48,2.7,16.07,0.51,19.65,263.77,3.72,5.74,31.69,7.93,0.0,2.37,4.22,1.74,0.59,0.76,37.3,153.95,0.37,47.3,0.59,9.99,0.27,7.13,65.28,12.7,1.12,6.06,2.21,0.22,0.39,0.43,1.4,16.02,2.63,0.51,0.32,0.11,0.99,0.56,4.33,0.34,0.58,2.43,2.9,0.54,0.7,0.62,1.67,0.3,3.26,2.81,0.03,0.4,0.28,0.31,0.65,0.1,0.27,0.11,0.07,0.38,0.51,0.28,31.96,7.26,0.51,0.67,5.92,8.22,23.79,1.94,12.51,9.35,16.62,1.08,0.04,13.74,4.3,0.54,0.12,4.87,50.59,2.39,0.17,0.17,1.63,0.06,0.08,0.12,0.89,0.65,1.24,0.43,1.76,1.17,14.34,0.16,0.24,0.12,0.76,2.45,8.93,2.86,53.23,17.91,0.13,0.28,0.13,115.95,13.03,7.73,45.26,66.22,3.85,0.24,11.57,0.5,4.41,0.08,2.66,11.47,1.37,0.42,0.3,0.15,8.74,0.07,2.68,32.84,12.57,0.45,6.32,4.09,0.32,0.91,0.62,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,210702,Plasma,1,0.04,0.25,0.11,0.47,0.72,3.74,10.48,1.29,0.84,3.8,0.67,0.94,0.2,0.07,0.15,1.36,0.12,0.25,0.06,0.14,1.03,0.86,2.23,0.0,0.29,1.2,0.19,0.71,5.91,75.65,0.98,0.24,0.16,0.88,29.21,22.68,2.6,41.89,4.96,0.44,0.11,0.17,0.22,0.14,0.74,1.0,4.96,0.16,0.53,0.06,0.37,0.29,0.12,0.17,0.24,0.09,0.08,0.11,0.99,0.14,1.31,0.16,0.47,2.58,0.58,2.72,0.66,1.64,0.11,0.0,0.91,0.09,0.14,0.22,0.34,0.36,0.34,0.26,0.28,5.3,1.7,1.33,0.57,3.87,0.93,0.91,2.41,9.04,0.51,28.56,412.34,8.47,3.98,45.31,17.07,0.5,3.05,7.04,3.04,1.0,0.49,67.46,322.86,0.52,84.96,1.15,18.08,0.41,10.75,114.88,23.77,2.32,34.66,9.24,0.0,0.24,0.83,3.68,49.04,5.03,1.64,0.65,0.36,2.14,2.68,8.82,0.55,1.14,6.68,7.1,2.24,0.58,2.09,3.79,0.65,3.77,6.42,0.34,1.2,0.54,0.62,1.0,0.33,0.71,0.14,0.41,1.05,0.0,0.03,0.56,0.67,0.04,0.02,0.99,1.04,4.01,0.72,2.9,0.5,0.8,0.08,0.0,4.0,1.99,0.27,0.1,0.83,9.05,1.07,0.1,0.35,0.47,0.01,0.0,0.03,2.42,1.24,1.55,0.58,4.41,1.79,23.59,0.1,0.71,0.11,0.05,3.15,0.29,0.01,0.28,0.08,0.12,0.24,0.07,86.74,11.73,7.53,17.25,42.59,5.63,0.18,11.9,0.63,4.92,0.09,2.38,15.35,1.59,0.23,0.34,0.2,15.87,0.08,5.66,33.79,25.6,0.54,10.65,7.31,0.4,0.12,0.07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,210702,Serum,1,0.22,0.46,0.22,0.78,1.53,7.33,20.47,1.68,1.71,6.75,1.44,1.81,0.32,0.13,0.32,2.66,0.18,0.41,0.12,0.28,5.27,1.6,3.88,0.0,0.57,2.6,0.29,1.44,13.53,144.39,2.11,0.4,0.2,1.78,66.45,57.68,5.82,92.7,10.86,0.92,0.18,0.31,0.42,0.43,1.67,2.13,10.91,0.25,1.12,0.13,0.4,0.74,0.09,0.37,0.5,0.06,0.14,0.33,1.85,0.27,2.57,0.26,0.87,5.38,1.14,5.83,1.15,3.64,0.16,0.14,2.0,0.25,0.38,0.26,0.77,0.39,0.0,0.52,0.54,10.7,3.71,1.26,1.02,7.35,1.74,1.71,4.72,17.74,0.84,80.2,876.25,16.19,5.38,79.38,44.62,1.08,5.96,14.61,5.47,2.05,1.0,136.36,690.67,1.06,159.51,2.5,36.74,0.26,22.44,210.77,43.39,4.92,68.27,18.1,0.0,0.45,1.51,7.43,102.03,9.77,3.04,1.13,0.33,2.57,4.49,15.56,1.0,2.22,13.49,14.09,4.23,0.86,7.05,6.91,1.42,11.05,12.57,0.68,2.55,1.09,1.06,2.03,0.8,1.37,0.19,1.01,2.33,0.0,0.06,1.13,1.39,0.12,0.07,1.93,2.19,7.82,1.35,5.73,1.05,1.55,0.1,0.0,9.05,4.06,0.59,0.23,1.77,17.97,2.11,0.21,0.76,0.99,0.03,0.01,0.04,3.98,2.56,2.84,1.25,8.31,3.51,44.43,0.22,1.64,0.16,0.14,6.66,0.1,0.05,0.13,0.07,0.17,0.35,0.1,110.44,18.47,11.11,26.24,56.29,7.94,0.25,22.62,1.12,7.7,0.14,3.2,27.59,2.37,0.35,0.41,0.35,22.93,0.12,7.98,46.04,39.72,0.72,19.66,13.85,0.5,0.18,0.19,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,210702,Blood EDTA - Serum,1,0.11,1.32,0.17,3.6,0.9,4.81,12.48,2.39,1.32,1.43,0.63,0.32,0.67,0.62,0.02,0.22,0.01,0.1,0.01,0.02,0.31,0.16,0.42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.06,3.47,2.49,0.0,14.98,2.04,5.05,2.54,22.99,0.0,3.26,4.47,0.0,0.0,3.84,0.0,0.0,2.14,1.07,0.52,0.44,2.78,0.51,0.85,1.03,2.76,0.83,3.88,3.31,30.28,2.75,0.0,418.08,4.08,16.78,18.4,4.88,0.24,4.93,6.71,2.57,0.33,1.39,73.61,212.32,0.78,37.96,0.7,5.13,1.54,8.66,60.17,8.45,0.19,9.96,2.57,0.17,0.0,0.53,4.82,18.27,3.11,0.09,0.53,0.35,3.24,1.96,3.89,0.22,1.55,4.24,3.29,0.59,1.18,2.31,0.67,0.14,4.79,2.85,0.32,0.0,0.08,0.33,0.53,0.56,0.27,0.0,0.0,0.37,2.09,0.78,149.91,21.59,0.83,0.86,10.34,48.45,54.07,4.22,60.48,28.54,30.83,1.35,0.09,25.79,6.63,1.1,0.24,28.31,113.33,2.65,0.68,0.71,6.85,0.18,0.11,0.62,3.63,2.8,1.66,0.57,4.05,2.29,22.88,0.13,1.02,0.11,1.54,5.49,20.32,5.41,66.31,22.98,0.15,1.25,0.0,205.2,26.99,6.71,141.18,164.34,4.91,0.45,11.45,0.72,5.29,0.0,4.5,10.35,2.66,0.46,0.47,0.37,13.35,0.0,3.85,67.54,18.1,1.44,12.05,5.73,0.6,3.57,1.99,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,210702,DBS - Venous,1,0.08,0.46,0.06,1.27,0.52,3.47,6.95,1.4,0.67,1.41,0.32,0.3,0.3,0.23,0.12,1.92,0.04,0.54,0.07,0.12,0.34,0.25,1.22,0.0,0.34,0.68,0.03,0.47,4.55,39.97,0.46,0.05,0.02,0.82,21.06,13.04,2.1,17.71,2.91,0.2,0.25,0.19,0.19,0.19,0.39,0.96,2.12,0.0,0.28,0.07,0.14,0.28,0.08,0.22,0.24,0.0,0.02,0.12,1.62,0.26,1.78,0.22,0.96,1.84,0.69,1.55,2.66,1.27,0.25,0.64,0.67,0.6,1.16,0.75,0.38,2.2,3.49,0.39,0.13,3.27,0.85,0.22,0.58,2.64,0.71,0.54,3.04,18.87,0.73,70.31,364.94,5.87,8.86,28.83,9.95,0.3,2.93,5.4,1.84,0.49,1.54,65.94,224.29,0.5,53.79,0.77,10.63,0.34,5.7,82.67,13.23,1.31,17.91,4.16,0.03,0.35,0.56,2.3,21.36,4.94,0.72,0.39,0.16,1.31,1.02,4.62,0.4,1.02,4.29,4.25,0.94,1.17,1.94,2.17,0.43,7.32,3.33,0.2,0.59,0.32,0.41,1.27,0.16,0.33,0.04,0.28,0.47,0.62,0.35,40.95,9.25,0.68,0.88,8.09,10.6,28.78,2.18,16.95,12.54,21.08,1.59,0.04,19.24,5.05,0.73,0.18,6.88,73.5,3.16,0.23,0.17,2.25,0.07,0.12,0.09,1.46,1.08,1.47,0.91,2.86,1.63,20.11,0.26,0.57,0.1,1.22,3.32,12.47,4.75,97.48,30.18,0.17,0.29,0.04,120.39,15.02,7.65,60.55,71.62,4.21,0.22,12.71,0.55,5.36,0.06,3.29,11.74,1.52,0.54,0.27,0.17,9.53,0.04,2.92,37.38,13.76,0.92,7.35,4.14,0.28,0.98,0.77,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,210702,DBS - Finger,1,0.04,0.33,0.06,0.8,0.33,2.21,4.14,0.83,0.4,0.71,0.17,0.18,0.18,0.17,0.08,1.41,0.05,0.4,0.05,0.07,0.22,0.14,0.38,0.0,0.1,0.37,0.0,0.17,2.65,23.81,0.44,0.11,0.13,0.26,12.48,6.83,1.02,9.66,1.73,0.12,0.15,0.12,0.11,0.09,0.21,0.7,1.27,0.0,0.15,0.1,0.11,0.15,0.05,0.14,0.15,0.0,0.06,0.06,0.53,0.13,0.94,0.1,0.5,0.95,0.27,0.82,1.55,0.7,0.15,0.39,0.35,0.29,0.48,0.58,0.14,1.33,1.17,0.16,0.13,1.86,0.5,0.19,0.36,1.54,0.37,0.25,1.7,12.4,0.41,13.51,198.52,2.98,5.51,15.46,5.44,0.14,1.78,3.12,0.98,0.32,0.9,39.83,121.63,0.35,32.62,0.43,5.74,0.23,3.58,47.59,7.48,1.03,9.5,2.32,0.0,0.14,0.27,1.2,11.22,1.83,0.4,0.2,0.05,0.78,0.84,4.17,0.26,0.94,2.46,2.3,0.46,0.45,1.16,1.27,0.24,2.63,2.3,0.07,0.29,0.29,0.2,0.4,0.08,0.12,0.08,0.12,0.26,0.3,0.17,18.86,4.47,0.33,0.44,3.44,5.35,13.84,1.03,7.56,5.81,10.61,0.71,0.02,10.34,2.45,0.35,0.08,3.26,32.83,1.51,0.08,0.12,1.1,0.04,0.04,0.1,0.86,0.54,0.85,0.42,1.49,0.94,10.91,0.09,0.18,0.11,0.79,1.75,8.55,2.99,55.67,18.34,0.11,0.22,0.05,83.17,10.75,5.17,42.97,55.41,3.09,0.16,8.5,0.39,4.22,0.06,2.62,8.57,1.11,0.33,0.23,0.13,7.12,0.04,1.91,28.79,9.49,0.55,5.1,2.79,0.22,0.7,0.54,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [114]:
lipid_data_df.isna().any().value_counts()

False    158
True     123
dtype: int64

In [115]:
print(lipid_data_df.isna().any().sort_values(ascending=False))

PI 18:0_22:5            True
PA 16:0_18:1            True
PC 17:1_20:4            True
SM d38:3                True
PC 16:1_22:6            True
PC 16:1_20:4            True
PC 16:0_20:1            True
PC 16:0_18:2            True
PC 16:0_18:1            True
Cer d18:1_17:0          True
HexCer 18:1;2O/16:0     True
LPC 17:0                True
PC 14:0_16:1            True
LPC 19:0 SN1            True
LPC 20:0 SN1            True
LPI 20:4                True
PC 16:0_22:4            True
LPI 18:0                True
LPC 20:2 SN1            True
LPE O-16:1              True
LPC 20:5                True
LPC 22:4                True
LPE 20:5                True
LPC O-16:0              True
PA 16:0_20:4            True
PC 15:0_18:1            True
PC 16:0_20:3            True
PC 16:0_20:4            True
LPE 18:1 SN2            True
LPE 18:0 SN2            True
PC 18:0_18:2            True
PC 18:0_20:4            True
PC 18:0_24:4            True
PC 18:1_14:0            True
PE O-20:1_18:2

In [116]:
lipid_data_df.drop(columns=['Person', 'Date']).describe()

Unnamed: 0,Cer d18:2_16:0,Cer d18:1_18:0,Cer d18:1_18:1,Cer d18:1_16:0,Cer d18:1_20:0,Cer d18:1_22:0,Cer d18:1_24:0,Cer d18:1_24:1,Cer d40:2,Cer d41:1,Cer d41:2,Cer d43:1,Cer d44:1,Cer d44:2,Hex2Cer 32:1;2O,Hex2Cer 34:1;2O,Hex2Cer 34:2;2O,Hex2Cer 42:2;2O,Hex2Cer 42:3;2O,Hex3Cer 34:1;2O,HexCer 14:2;2O/28:0,HexCer 41:1;2O,HexCer 42:1;2O,LPA 17:0,LPC 14:0 SN2,LPC 14:0 SN1,LPC 15:0 SN2,LPC 15:0 SN1,LPC 16:0 SN2,LPC 16:0 SN1,LPC 16:1 SN1,LPC 16:1 SN2,LPC 17:0 SN2,LPC 17:0 SN1,LPC 18:0 SN1,LPC 18:1 SN1,LPC 18:1 SN2,LPC 18:2 SN1,LPC 18:2 SN2,LPC 18:3,LPC 19:0 SN2,LPC 20:0 SN2,LPC 20:1 SN1,LPC 20:2 SN2,LPC 20:3 SN1,LPC 20:4 SN2,LPC 20:4 SN1,LPC 20:5 SN2,LPC 20:5 SN1,LPC 22:0,LPC 22:5 SN1,LPC 22:6 SN2,LPC 24:0,LPC O-18:0,LPC O-18:1,LPC O-20:0,LPC O-22:0,LPC O-24:1,LPE 16:0 SN1,LPE 16:0 SN2,LPE 18:0,LPE 18:0 SN2,LPE 18:1 SN2,LPE 18:1 SN1,LPE 18:2 SN2,LPE 18:2 SN1,LPE 20:4 SN2,LPE 20:4 SN1,LPE 20:5,LPE 22:4,LPE 22:6,LPE O-16:1,LPE O-18:1,LPI 18:0,LPI 20:4,PA 16:0_18:1,PA 16:0_18:2,PC 12:0_16:0,PC 14:0_16:1,PC 14:0_18:2,PC 14:0_20:4,PC 14:0_22:6,PC 15:0_16:0,PC 15:0_18:2,PC 15:0_20:3,PC 15:0_20:4,PC 16:0_14:0,PC 16:0_16:0,PC 16:0_17:0,PC 16:0_18:1,PC 16:0_18:2,PC 16:0_18:3,PC 16:0_20:1,PC 16:0_22:6,PC 16:1_20:4,PC 16:1_22:6,PC 17:0_18:1,PC 17:0_18:2,PC 17:0_20:4,PC 17:0_22:6,PC 17:1_20:4,PC 18:0_18:1,PC 18:0_18:2,PC 18:0_20:1,PC 18:0_20:4,PC 18:0_22:4,PC 18:0_22:6,PC 18:0_24:4,PC 18:1_14:0,PC 18:1_18:2,PC 18:1_20:4,PC 18:1_22:6,PC 18:2_18:2 Cis,PC 18:2_20:4,PC 18:3_20:4,PC 19:0_18:1,PC 19:0_18:2,PC 20:1_18:1,PC 20:1_18:2,PC 20:1_20:4,PC 20:2_20:4,PC 20:4_20:4 Cis,PC 22:6_20:4,PC O-16:0_16:0,PC O-16:0_20:3,PC O-16:0_20:4,PC O-16:0_20:5,PC O-16:1_16:0,PC O-16:1_18:2,PC O-16:1_20:4,PC O-16:1_22:6,PC O-18:0_18:1,PC O-18:0_18:2,PC O-18:0_20:4,PC O-18:0_22:6,PC O-18:1_16:0,PC O-18:1_20:4,PC O-18:1_20:5,PC O-18:1_22:6,PC O-18:2_22:4,PC O-20:0_20:4,PC O-20:1_20:4,PC O-22:0_20:4,PC O-22:1_20:4,PC O-22:1_22:6,PC O-22:2_20:4,PC O-24:1_20:4,PE 16:0_16:0,PE 16:0_16:1,PE 16:0_18:1,PE 16:0_18:2,PE 16:0_20:4,PE 16:0_20:5,PE 16:0_22:6,PE 18:0_18:1,PE 18:0_20:4,PE 18:0_22:6,PE 18:1_18:1 Cis,PE 18:1_18:2,PE 18:2_18:2,PE 18:2_20:4,PE 18:3_18:3,PE O-16:1_20:4,PE O-16:1_22:6,PE O-17:1_20:4,PE O-17:1_22:6,PE O-18:1_18:1,PE O-18:1_20:4,PE O-18:2_22:6,PE O-20:0_18:2,PE O-20:1_18:2,PE O-20:1_20:4,PG 16:0_18:2,PG 18:0_20:4,PG 18:1_18:1 Cis,PI 16:0_18:1,PI 16:0_18:2,PI 16:0_20:4,PI 16:0_22:5,PI 18:0_18:2,PI 18:0_20:3,PI 18:0_20:4,PI 18:0_22:4,PI 18:1_18:2,PS 16:0_16:0,PS 16:0_18:1,PS 18:0_18:0,PS 18:0_18:1,PS 18:0_18:2,PS 18:0_20:4,PS 18:0_22:6,SM 34:0;3O,SM 34:1;3O,SM 34:2;3O,SM d18:1_16:0,SM d18:1_18:0,SM d18:1_18:1,SM d18:1_24:0,SM d18:1_24:1,SM d24:2_14:0,SM d32:0,SM d32:1,SM d32:2,SM d33:1,SM d33:2,SM d34:0,SM d34:2,SM d35:1,SM d35:2,SM d36:0,SM d36:3,SM d38:1,SM d38:3,SM d39:1,SM d40:1,SM d40:2,SM d40:3,SM d41:1,SM d41:2,SM d41:3,SM d44:2,SM d44:3,Cer d18:1_17:0,Cer d43:2,HexCer 18:1;2O/16:0,LPC 17:0,LPC 19:0 SN1,LPC 20:0 SN1,LPC 20:2 SN1,LPC 20:5,LPC 22:4,LPC O-16:0,PA 16:0_20:4,PC 15:0_18:1,PC 16:0_20:3,PC 16:0_20:4,PC 16:0_20:5,PC 16:0_22:4,PC 16:0_22:5,PC 18:0_20:2,PC 18:0_20:3,PC 18:1_18:1 Cis,PC 18:1_22:0,PC 20:0_20:0,PC 22:0_18:2,PC O-16:0_16:1,PC O-16:0_18:1,PC O-20:0_22:6,PC O-20:1_22:4,PC O-20:2_20:4,PC O-22:0_18:2,PC O-22:2_18:2,PE O-16:1_18:1,PE O-16:1_22:4,PS 16:0_20:4,PS 18:2_18:2,SM d38:2,LPC 18:0 SN2,PC 16:0_16:1,PC 18:1_22:4,PE O-18:1_20:3,PE O-18:1_22:6,PE O-20:0_22:6,PG 16:0_18:1,PI 16:0_16:1,PI 18:0_18:1,PI 18:1_20:4,LPC 20:3 SN2,LPC 22:6 SN1,LPC O-22:1,PC 18:0_16:0,PC 18:0_22:5,PC 19:0_20:4,PC O-18:0_22:4,PC O-18:2_20:4,PC O-24:2_20:4,PE O-18:3_20:4,PI 18:0_22:5
count,60.0,30.0,30.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,45.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,45.0,30.0,60.0,15.0,60.0,60.0,60.0,60.0,45.0,15.0,45.0,60.0,60.0,60.0,60.0,45.0,60.0,30.0,30.0,60.0,30.0,30.0,60.0,60.0,15.0,15.0,30.0,30.0,45.0,60.0,60.0,60.0,15.0,45.0,60.0,60.0,60.0,60.0,45.0,45.0,60.0,60.0,60.0,60.0,60.0,30.0,60.0,60.0,30.0,60.0,45.0,15.0,45.0,60.0,60.0,45.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,15.0,45.0,60.0,45.0,60.0,30.0,30.0,60.0,60.0,60.0,60.0,45.0,60.0,15.0,60.0,30.0,60.0,60.0,15.0,45.0,30.0,30.0,60.0,60.0,60.0,30.0,15.0,60.0,15.0,15.0,30.0,60.0,60.0,30.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,45.0,15.0,60.0,60.0,60.0,15.0,15.0,60.0,60.0,30.0,60.0,60.0,60.0,30.0,60.0,30.0,60.0,45.0,60.0,60.0,60.0,60.0,60.0,30.0,60.0,60.0,60.0,30.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,15.0,60.0,60.0,30.0,30.0,60.0,60.0,60.0,45.0,60.0,60.0,60.0,30.0,60.0,45.0,60.0,45.0,60.0,60.0,45.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,30.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,45.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,15.0,30.0,30.0,30.0,30.0,30.0,30.0,45.0,15.0,45.0,30.0,45.0,45.0,45.0,30.0,30.0,30.0,45.0,45.0,30.0,45.0,30.0,30.0,45.0,45.0,30.0,30.0,45.0,30.0,30.0,45.0,30.0,45.0,30.0,30.0,30.0,15.0,15.0,15.0,15.0,15.0,30.0,15.0,30.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0
mean,0.068667,0.413333,0.091,1.032,0.491667,2.504667,6.264833,6.889833,0.628333,1.4625,0.399333,0.515333,0.229667,0.230889,0.141667,2.005833,0.077833,0.6365,0.1005,0.134,0.986333,0.311333,0.937333,0.011556,0.249667,0.901833,0.238,0.636,6.971333,72.573333,1.011333,0.284444,0.216,0.949333,29.697667,19.172667,2.635167,29.0375,4.538444,0.2585,0.164667,0.227667,0.298833,0.202667,1.003667,3.695,2.637,0.189333,0.638667,0.075,0.244333,0.447556,0.133167,0.208833,0.363167,0.073333,0.087556,0.155333,2.323333,0.962167,3.148167,0.188667,4.258889,4.119833,1.822167,5.167333,7.491,3.7555,0.432667,2.353833,1.693667,0.289333,1.036667,0.375556,0.303333,1.742889,1.514667,0.528833,0.342889,3.616167,1.076333,0.53,0.771667,2.928333,0.742667,1.183833,2.866,24.451333,0.7795,62.83,422.225778,6.788667,7.344222,48.8205,17.552333,0.374667,3.0655,6.429833,2.932667,0.8495,3.471778,54.847167,332.184,0.699,99.407333,1.450333,17.186,0.382667,9.475556,66.979333,25.514,2.553833,16.354167,5.3355,0.431,0.304,0.886833,3.705333,50.536,5.79,1.139167,0.471333,0.202,2.461833,2.119,7.1715,0.375167,1.865167,5.8185,5.920333,1.186167,0.859333,1.934167,3.4075,0.930444,6.067333,6.985,0.2885,1.069667,0.650667,0.648667,1.144833,0.374333,0.863333,0.1695,0.2795,1.337167,0.416667,0.195167,25.693667,5.851,0.398667,0.370833,4.022667,6.870667,18.612167,1.697333,11.217333,6.416,13.048,0.814,0.028333,12.374833,2.947333,0.469,0.1055,4.147667,37.654667,1.753833,0.2235,0.3,1.895333,0.067667,0.061667,0.116667,1.178333,1.443,1.766333,0.706444,4.237667,1.731833,24.5115,0.174,0.614833,0.104667,0.922167,3.538444,8.2515,2.589833,30.893556,11.611,0.154333,0.425333,0.078,124.937333,16.022167,7.846333,48.377667,87.176333,4.816667,0.2155,11.324333,0.544,5.695333,0.0875,3.588667,14.881167,1.787667,0.264833,0.538,0.245167,11.631667,0.124889,3.549167,38.2895,19.281333,0.921833,9.776,5.823333,0.4955,1.1965,0.847,0.38,0.287667,0.587,0.420667,0.151,0.172,0.176667,0.315111,0.090667,0.723333,0.914,0.663111,94.038444,141.578667,11.841667,88.960667,24.460333,3.044444,29.505333,241.160667,0.116222,0.056,0.146333,0.532889,6.654667,0.271333,0.989667,0.716444,0.252667,0.571667,5.742,4.335,0.443333,0.035,4.543,2.002333,9.380667,5.552667,1.632,0.326,0.019333,0.142333,0.148,0.864667,0.548667,0.588667,0.290667,0.083333,8.259333,4.486,0.362667,0.611333,2.736667,0.656667,0.412,0.319333
std,0.037256,0.305302,0.044206,0.912692,0.280956,1.222305,3.975405,6.902184,0.375627,1.276462,0.292128,0.473535,0.176769,0.210463,0.08753,1.01975,0.052435,0.466225,0.055827,0.063036,1.196751,0.275678,0.78523,0.033368,0.209342,1.211948,0.189932,0.620768,5.261703,60.77101,0.882271,0.196221,0.166896,0.906889,26.184001,19.122241,1.970022,27.968842,3.358606,0.221297,0.106536,0.134284,0.190121,0.149457,0.885253,3.432791,4.189154,0.191813,0.680041,0.064634,0.226437,0.404914,0.147608,0.149599,0.214448,0.068208,0.064391,0.08125,2.264177,2.301233,3.197066,0.082396,11.246324,6.751744,2.611118,7.772427,13.465462,5.586693,0.686239,4.891076,2.073293,0.178846,1.165589,0.233701,0.216124,1.574169,1.403502,1.410257,0.525701,4.010817,1.473897,0.689055,0.4345,2.04413,0.50901,1.60797,2.932071,16.725417,0.501609,52.85557,211.242047,4.620303,4.91145,39.576678,19.682102,0.322541,1.55361,3.829862,2.193942,0.917496,4.757156,29.848061,216.053417,0.357153,76.297761,1.039723,15.460586,0.351313,8.643258,91.395371,22.1509,2.400977,13.028268,4.591331,0.509004,0.187037,0.65621,2.636586,46.405394,3.904893,0.85221,0.329676,0.118041,1.56331,1.911067,4.458829,0.250366,1.446653,3.717131,3.977879,1.008194,0.455236,1.272611,2.479407,0.745572,3.13491,4.64511,0.266266,1.28193,0.557628,0.531344,0.802337,0.261004,0.728193,0.111923,0.238245,1.076896,0.529322,0.196921,33.491841,5.260474,0.289573,0.364261,2.936295,8.601527,13.122488,0.973376,12.149548,6.146141,11.278437,0.68924,0.023206,8.338659,1.385322,0.280989,0.054971,5.375634,30.236128,0.775585,0.22984,0.19817,2.42276,0.057736,0.064438,0.11556,0.841061,0.74985,0.774435,0.302724,2.246615,0.760826,10.99596,0.127539,0.369165,0.065387,0.966235,6.705516,8.175057,2.484162,41.530174,10.831695,0.119793,0.339798,0.044903,74.682342,6.940583,2.107458,42.456871,60.184946,1.680272,0.118156,4.730274,0.234414,1.99617,0.058733,1.601481,5.284859,0.728617,0.123144,0.475679,0.105195,4.950297,0.080781,1.930834,17.055231,8.883822,0.488937,4.499705,2.903688,0.222211,1.56733,0.905133,0.0,0.23344,0.309718,0.392902,0.104728,0.075219,0.110151,0.25384,0.050634,0.488779,0.906215,0.956525,48.972875,82.760064,10.817759,53.979024,16.53193,1.610165,22.522991,121.692488,0.092178,0.059804,0.092382,0.298166,2.657531,0.185802,0.706841,0.457365,0.126789,0.302519,6.191789,7.263679,0.489694,0.036742,1.7696,0.884946,6.348379,3.290735,1.477204,0.591509,0.027637,0.15902,0.065596,0.509433,0.214338,0.398961,0.158631,0.030158,6.202787,2.199662,0.249269,0.38783,2.084188,0.552716,0.247508,0.164424
min,0.02,0.14,0.04,0.16,0.05,0.51,0.26,0.5,0.09,0.12,0.06,0.1,0.02,0.03,0.01,0.22,0.0,0.09,0.01,0.02,0.07,0.07,0.14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.47,0.09,0.07,0.32,0.54,0.17,0.19,0.0,4.95,0.22,0.0,163.38,1.62,0.88,9.43,1.63,0.0,0.85,1.76,0.72,0.0,0.0,11.3,121.63,0.21,29.01,0.33,2.62,0.12,1.63,0.0,4.98,0.0,2.1,0.85,0.0,0.0,0.0,0.97,10.2,1.6,0.0,0.03,0.05,0.6,0.26,2.11,0.0,0.12,1.37,1.49,0.14,0.18,0.61,0.18,0.03,2.63,1.95,0.0,0.0,0.08,0.2,0.2,0.08,0.12,0.0,0.0,0.21,0.0,0.0,0.25,0.29,0.03,0.01,0.59,0.11,1.98,0.37,1.7,0.16,0.48,0.01,0.0,1.37,0.86,0.08,0.02,0.14,2.81,0.45,0.02,0.11,0.17,0.01,0.0,0.02,0.24,0.52,0.54,0.26,1.2,0.7,6.61,0.06,0.18,0.0,0.0,0.58,0.0,0.0,0.0,0.02,0.01,0.14,0.0,63.97,8.34,4.36,10.45,26.01,2.35,0.07,4.6,0.17,2.45,0.0,1.75,7.22,0.78,0.07,0.16,0.1,4.06,0.0,0.84,14.16,4.82,0.25,2.72,1.54,0.11,0.08,0.07,0.38,0.05,0.22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.68,41.18,2.33,23.19,6.65,1.02,6.16,83.03,0.02,0.0,0.03,0.1,1.87,0.0,0.16,0.0,0.06,0.15,0.14,0.0,0.0,0.0,2.34,0.64,3.08,2.0,0.14,0.03,0.0,0.01,0.04,0.2,0.16,0.0,0.0,0.0,2.45,1.21,0.05,0.15,0.46,0.07,0.08,0.17
25%,0.04,0.2425,0.06,0.4775,0.28,1.635,3.745,2.025,0.3575,0.57,0.21,0.2,0.1275,0.09,0.08,1.4025,0.04,0.2775,0.06,0.0875,0.305,0.13,0.38,0.0,0.1,0.3325,0.18,0.27,3.6625,31.8925,0.44,0.2,0.15,0.42,14.1675,7.6275,1.265,10.9,1.74,0.14,0.12,0.17,0.19,0.1325,0.52,1.4075,0.92,0.05,0.225,0.0,0.12,0.18,0.08,0.1475,0.24,0.02,0.06,0.1075,0.885,0.2075,1.36,0.15,0.46,0.57,0.48,1.215,1.2925,0.835,0.1525,0.1175,0.525,0.18,0.24,0.21,0.155,0.12,0.0675,0.0875,0.13,1.45,0.3475,0.18,0.505,1.5075,0.3675,0.4275,1.4075,15.5075,0.4325,17.265,243.66,3.1825,4.51,20.6675,5.7075,0.155,1.9025,3.5175,1.465,0.3275,0.26,34.625,165.875,0.4375,44.9075,0.645,6.8525,0.205,3.73,5.53,10.7325,0.965,6.89,2.1325,0.1425,0.18,0.39,1.495,17.04,3.13,0.5025,0.2475,0.11,1.39,0.645,3.865,0.2,0.8,2.665,2.9525,0.495,0.555,0.9975,1.6,0.34,3.315,3.0525,0.08,0.3775,0.285,0.32,0.5375,0.1675,0.315,0.09,0.11,0.4675,0.0,0.03,0.57,1.15,0.09,0.0675,1.8975,0.9175,7.145,1.005,4.3625,1.0225,1.5575,0.08,0.02,4.7525,2.0275,0.225,0.07,0.565,9.1025,1.16,0.08,0.17,0.51,0.02,0.0,0.06,0.6,0.805,1.24,0.48,2.21,1.1025,14.275,0.1125,0.3575,0.07,0.0375,1.63,0.1275,0.04,0.05,0.08,0.09,0.27,0.05,89.69,11.3525,6.2525,24.84,57.1375,3.595,0.1375,7.835,0.37,4.2275,0.05,2.615,10.2175,1.285,0.1875,0.3,0.1675,8.26,0.07,1.955,26.515,12.035,0.5675,6.1275,3.29,0.355,0.19,0.2425,0.38,0.105,0.375,0.18,0.1,0.14,0.11,0.17,0.075,0.36,0.0,0.05,49.97,72.97,4.6075,50.165,10.81,1.54,11.7,120.44,0.06,0.01,0.0925,0.3,4.49,0.18,0.4175,0.34,0.1525,0.345,0.56,0.01,0.0,0.0,3.135,1.485,5.095,2.89,0.725,0.07,0.0,0.045,0.1,0.565,0.42,0.28,0.19,0.075,5.15,2.825,0.195,0.295,1.305,0.265,0.175,0.225
50%,0.06,0.285,0.08,0.735,0.4,2.235,5.295,4.415,0.485,1.19,0.315,0.32,0.18,0.17,0.12,1.73,0.075,0.525,0.085,0.135,0.495,0.205,0.635,0.0,0.245,0.525,0.19,0.495,5.8,59.93,0.785,0.27,0.2,0.79,23.29,12.885,2.135,16.805,4.09,0.17,0.16,0.2,0.28,0.155,0.675,2.295,1.575,0.17,0.33,0.075,0.175,0.3,0.11,0.185,0.335,0.08,0.08,0.155,1.475,0.28,1.86,0.19,0.96,1.5,0.755,2.765,2.785,1.85,0.225,0.64,1.05,0.24,0.57,0.34,0.26,2.02,1.565,0.185,0.2,2.44,0.605,0.325,0.605,2.485,0.635,0.655,2.26,18.34,0.625,54.69,418.08,5.235,5.6,34.77,10.45,0.285,2.67,5.96,2.485,0.55,1.15,43.155,295.06,0.585,67.485,0.975,10.98,0.27,7.13,30.69,15.935,1.57,9.73,2.96,0.22,0.32,0.635,3.47,25.54,4.975,0.82,0.37,0.16,2.12,1.575,5.06,0.315,1.265,4.35,4.55,0.775,0.73,1.65,2.735,0.65,5.09,5.065,0.21,0.625,0.46,0.41,0.82,0.33,0.475,0.14,0.205,0.745,0.305,0.13,17.715,5.06,0.4,0.285,3.37,5.02,16.05,1.58,7.385,6.12,12.65,0.95,0.02,10.835,2.71,0.41,0.09,2.94,34.755,1.675,0.155,0.23,1.2,0.05,0.045,0.085,0.92,1.365,1.59,0.64,3.98,1.655,23.31,0.13,0.56,0.1,0.815,2.32,8.035,2.765,0.22,12.265,0.11,0.32,0.07,101.905,14.285,7.825,36.125,66.125,4.63,0.19,10.585,0.53,5.395,0.09,2.975,14.585,1.715,0.24,0.47,0.235,11.565,0.12,3.48,33.755,19.15,0.8,9.635,5.065,0.43,0.735,0.625,0.38,0.235,0.51,0.235,0.13,0.165,0.155,0.21,0.09,0.55,1.03,0.09,83.88,124.24,9.405,78.425,16.55,2.5,20.46,254.305,0.08,0.04,0.12,0.48,6.4,0.23,0.72,0.53,0.225,0.485,4.52,0.16,0.44,0.04,4.22,1.795,7.12,3.64,1.03,0.18,0.01,0.1,0.14,0.755,0.53,0.43,0.23,0.09,6.48,4.48,0.28,0.5,1.68,0.39,0.41,0.28
75%,0.08,0.4675,0.11,1.2875,0.7125,3.13,8.2075,9.56,0.855,1.7275,0.48,0.64,0.2825,0.26,0.1825,2.5425,0.12,0.8425,0.1225,0.1725,1.015,0.4,1.375,0.0,0.335,1.08,0.295,0.8325,9.0525,101.55,1.3225,0.38,0.265,1.18,35.8925,23.9925,3.61,43.0825,6.2,0.35,0.1875,0.29,0.3825,0.2275,1.3275,5.57,2.3,0.235,0.975,0.1175,0.3175,0.63,0.1425,0.2425,0.475,0.095,0.12,0.18,3.1025,1.075,3.075,0.24,3.41,5.22,2.3175,5.27,6.3275,3.7075,0.2775,1.525,2.3675,0.3725,1.15,0.51,0.38,2.96,2.7,0.3325,0.31,4.625,1.2875,0.55,0.915,3.8725,0.9725,1.17,3.15,24.3475,1.0625,98.05,523.46,9.4975,8.32,63.91,20.085,0.53,3.9975,8.335,3.775,1.07,4.91,71.595,402.545,0.8725,140.525,2.2725,24.185,0.375,11.45,93.815,37.175,3.7125,25.28,7.8575,0.5575,0.41,1.19,4.65,73.535,7.75,1.6425,0.5825,0.2725,3.1725,3.0525,9.54,0.47,2.5375,8.73,7.7925,1.645,1.03,2.295,4.59,1.42,8.045,10.7875,0.48,1.75,0.83,0.81,1.55,0.4825,1.355,0.245,0.3875,2.2225,0.595,0.27,36.0175,9.28,0.65,0.5525,5.295,9.1875,26.675,2.195,14.1725,9.9375,21.235,1.3025,0.04,17.5575,3.6525,0.615,0.1325,4.9075,56.465,2.1125,0.265,0.355,2.2275,0.09,0.1175,0.12,1.465,1.805,2.125,0.91,5.6325,2.22,32.355,0.1875,0.7525,0.13,1.4675,3.22,13.3325,4.3375,66.31,19.105,0.17,0.4375,0.0925,129.865,18.275,9.065,51.245,94.735,5.515,0.2525,14.4975,0.69,6.915,0.1,4.235,18.6875,2.1875,0.33,0.61,0.29,14.32,0.15,4.765,46.3825,24.74,1.0925,12.3825,7.48,0.665,1.39,0.97,0.38,0.3825,0.6775,0.6825,0.1975,0.2,0.205,0.39,0.11,1.06,1.5875,1.17,134.55,188.3,12.7925,117.135,35.85,4.15,43.62,344.1975,0.11,0.0875,0.16,0.66,7.97,0.3525,1.4375,1.06,0.335,0.7,8.15,7.8425,0.74,0.06,5.965,2.4,12.66,8.46,1.945,0.27,0.025,0.155,0.21,1.0575,0.77,0.945,0.44,0.1,8.505,6.025,0.51,0.82,4.405,1.095,0.595,0.32
max,0.22,1.32,0.22,4.39,1.53,7.33,20.47,29.51,1.78,6.75,1.44,2.37,1.04,1.03,0.48,5.09,0.21,2.04,0.26,0.28,5.27,1.6,3.88,0.2,0.84,8.37,0.78,4.23,34.14,404.07,5.52,1.03,0.73,5.58,175.02,124.43,11.1,158.38,17.16,1.1,0.64,0.79,1.21,0.78,3.67,15.95,28.41,0.77,2.65,0.29,1.21,2.45,1.1,1.01,1.27,0.27,0.35,0.49,10.75,17.29,16.2,0.41,72.91,36.37,14.92,43.46,84.6,29.89,3.26,30.85,12.27,0.71,4.87,1.02,0.77,5.75,4.1,9.08,3.16,27.22,9.78,4.44,2.6,12.85,3.24,11.06,19.45,94.46,2.75,161.75,1258.59,25.49,21.24,233.84,95.23,1.11,9.84,24.26,13.39,6.11,19.61,160.13,835.0,1.62,368.38,4.74,95.31,1.54,51.52,412.39,113.59,12.86,68.27,22.49,2.4,0.75,2.74,10.28,171.95,20.55,3.9,1.91,0.57,9.7,9.66,21.85,1.09,5.75,16.8,18.91,4.76,2.39,7.05,13.52,2.91,11.9,22.88,1.25,8.97,2.26,2.26,3.38,1.43,2.44,0.52,1.08,3.8,2.09,0.87,149.91,21.59,0.96,1.63,13.03,48.45,54.07,4.22,60.48,28.54,40.07,2.14,0.1,33.23,6.63,1.36,0.28,28.31,124.76,3.77,1.06,0.76,13.98,0.27,0.2,0.62,3.98,3.42,4.03,1.57,9.99,3.68,48.06,0.73,2.06,0.33,4.6,46.68,31.15,9.47,136.09,33.87,0.77,2.35,0.26,528.71,45.57,13.85,260.12,375.54,8.86,0.67,23.29,1.24,11.82,0.44,9.42,27.59,4.86,0.78,3.71,0.65,25.34,0.51,10.22,97.67,40.19,2.3,20.62,14.5,1.49,8.03,4.5,0.38,1.15,1.53,1.31,0.42,0.32,0.49,1.07,0.18,1.9,3.37,3.49,204.89,348.23,43.93,222.85,65.47,6.84,86.17,529.98,0.49,0.25,0.45,1.45,12.71,0.73,2.58,1.62,0.54,1.24,25.75,23.09,1.83,0.11,9.46,4.3,23.21,11.95,5.78,2.41,0.11,0.68,0.25,2.72,0.82,1.23,0.52,0.13,25.82,9.04,0.92,1.47,7.43,1.74,0.82,0.82


# Remove lipids that does not appear in every person or type

In [117]:
lipid_data_df_without_na = lipid_data_df.dropna(axis='columns')
plasma = lipid_data_df_without_na[lipid_data_df_without_na['Blood Type'] == 'Plasma']
serum = lipid_data_df_without_na[lipid_data_df_without_na['Blood Type'] == 'Serum']
blood_edta = lipid_data_df_without_na[lipid_data_df_without_na['Blood Type'] == 'Blood EDTA - Serum']
dbs_venous = lipid_data_df_without_na[lipid_data_df_without_na['Blood Type'] == 'DBS - Venous']
dbs_finger = lipid_data_df_without_na[lipid_data_df_without_na['Blood Type'] == 'DBS - Finger']
blood_type_dict = {'Plasma': plasma, 
              'Serum':serum,
              'Blood EDTA - Serum': blood_edta,
              'DBS - Venous': dbs_venous,
              'DBS - Finger': dbs_finger}

# Pair-wise scatter plots

In [None]:
plt = sns.pairplot(lipid_data_df_without_na, y_vars=["Person", 'Blood Type', 'Date'], hue='Blood Type')
plt.savefig('../results/pair-plot.pdf')

# Dimension reduction

## PCA

In [None]:
Path('../results/visualisation').mkdir(exist_ok=True)

In [None]:
pca = PCA(n_components=2)
lipid_data_df_without_na_standardised = (lipid_data_df_without_na.drop(
    columns=['Person', 'Blood Type', 'Date'])-
                            lipid_data_df_without_na.drop(
    columns=['Person', 'Blood Type', 'Date']).mean())/lipid_data_df_without_na.drop(
    columns=['Person', 'Blood Type', 'Date']).std()
projected_lipid_data = pca.fit_transform(lipid_data_df_without_na_standardised)
print(pca.explained_variance_ratio_)
print(sum(pca.explained_variance_ratio_))

In [None]:
pc_df = pd.DataFrame(data = projected_lipid_data , 
        columns = ['PC1', 'PC2'])
pc_df['Blood Type'] = lipid_data_df['Blood Type']
pc_df['Person'] = lipid_data_df['Person']

lmplot_persons = sns.lmplot( x="PC1", y="PC2",
  data=pc_df, 
  fit_reg=False, 
  hue='Person', # color by cluster
  legend=True,
  scatter_kws={"s": 50}) # specify the point size
lmplot_persons.savefig('../results/visualisation/persons_pca.pdf')

In [None]:
lmplot_type = sns.lmplot( x="PC1", y="PC2",
  data=pc_df, 
  fit_reg=False, 
  hue='Blood Type', # color by cluster
  legend=True,
  scatter_kws={"s": 50}) # specify the point size
lmplot_type.savefig('../results/visualisation/type_pca.pdf')

## t-SNE

In [None]:
tsne = TSNE(n_components=2, init='pca', learning_rate='auto')
projected_lipid_data = tsne.fit_transform(lipid_data_df_without_na_standardised)

In [None]:
pc_df = pd.DataFrame(data = projected_lipid_data , 
        columns = ['TSNE1', 'TSNE2'])
pc_df['Blood Type'] = lipid_data_df['Blood Type']
pc_df['Person'] = lipid_data_df['Person']

lmplot_persons = sns.lmplot( x="TSNE1", y="TSNE2",
  data=pc_df, 
  fit_reg=False, 
  hue='Person', # color by cluster
  legend=True,
  scatter_kws={"s": 50}) # specify the point size
lmplot_persons.savefig('../results/visualisation/persons_tsne.pdf')

In [None]:
lmplot_type = sns.lmplot( x="TSNE1", y="TSNE2",
  data=pc_df, 
  fit_reg=False, 
  hue='Blood Type', # color by cluster
  legend=True,
  scatter_kws={"s": 80}) # specify the point size
lmplot_type.savefig('visualisation/type_tsne.pdf')

# Statistical tests

In [None]:
p_border = 0.01
Path('../results/statistic/friedman').mkdir(parents=True, exist_ok=True)
Path('../results/statistic/kruskal-wallis').mkdir(parents=True, exist_ok=True)

## Friedman test to test differences in lipid distributions over timepoints per blood type

In [None]:
column = []
for key, value in blood_type_dict.items():
    counter = 0
    dates = value['Date'].unique()
    first_date_data = value[value['Date'] == dates[0]]
    second_date_data = value[value['Date'] == dates[1]]
    third_date_data = value[value['Date'] == dates[2]]
    for k in value.drop(columns=['Person', 'Blood Type', 'Date']).keys(): 
        _, pvalue = stats.friedmanchisquare(first_date_data[k], second_date_data[k], third_date_data[k])
        if pvalue <= p_border:
            counter += 1
    column.append(counter)

sns.heatmap(np.asarray(column)[:, np.newaxis],  linewidths=0.5, cmap='viridis',
            yticklabels=blood_type_dict.keys(), xticklabels=[], fmt='g', annot=True, cbar=False)
plt.savefig('statistic/friedman/friedmann_test_blood_type.pdf')

## Kruskal wallis test to test differences in lipid distributions between persons per blood type

In [None]:
column = []
for key, value in blood_type_dict.items():
    counter = 0
    person = value['Person'].unique()
    first_person_data = value[value['Person'] == person[0]]
    second_person_data = value[value['Person'] == person[1]]
    third_person_data = value[value['Person'] == person[2]]
    fourth_person_data = value[value['Person'] == person[3]]
    
    for k in value.drop(columns=['Person', 'Blood Type', 'Date']).keys(): 
        _, pvalue = stats.kruskal(first_person_data[k], second_person_data[k],
                                            third_person_data[k], fourth_person_data[k])
        if pvalue <= p_border:
            counter += 1
    column.append(counter)

sns.heatmap(np.asarray(column)[:, np.newaxis],  linewidths=0.5, cmap='viridis',
            yticklabels=blood_type_dict.keys(), xticklabels=[], fmt='g', annot=True,
           cbar=False)
plt.savefig('../results/statistic/kruskal-wallis/kruskal-wallis_test_persons.pdf')

# Compare intergroup differences

## Shapiro-Wilk test for normality of lipid distributions

In [None]:
for key, value in blood_type_dict.items():
    counter = 0
    column = []
    for k in value.drop(columns=['Person', 'Blood Type', 'Date']).keys():
        _, pvalue = shapiro(value[k])
        if pvalue <= p_border:
            counter += 1
    print(f'Number of rejected normal distributed variables: {counter} in {key}')

## Wilcoxon signed-rank test to test differences in lipid distribution between blood types. And visualise the number of differences with a heatmap.

In [None]:
Path('statistic/wilcoxon').mkdir(parents=True, exist_ok=True)

In [None]:
matrix = []
for first_key, first_value in blood_type_dict.items():
    column = []
    for second_key, second_value in blood_type_dict.items():
        counter = 0
        if first_key != second_key:
            for k in first_value.drop(columns=['Person', 'Blood Type', 'Date']).keys():
                statistics, pvalue = stats.wilcoxon(first_value[k], second_value[k], mode='exact')
                if pvalue <= p_border:
                    counter += 1
        column.append(counter)
    matrix.append(column)

mask = np.triu(matrix)
sns.heatmap(matrix, annot=True, linewidths=0.5, cmap='viridis',
            xticklabels=blood_type_dict.keys(), yticklabels=blood_type_dict.keys(), fmt='g', mask=mask)
plt.savefig('../results/statistic/wilcoxon/wilcoxon_signed_rank_test.pdf')

# Train a random forest classifier

In [None]:
Path('../results/classification/multiclass').mkdir(parents=True, exist_ok=True)
Path('../results/classification/one_vs_rest').mkdir(parents=True, exist_ok=True)
Path('../results/classification/one_vs_one').mkdir(parents=True, exist_ok=True)

In [None]:
n_estimators = [int(x) for x in np.linspace(start = 10, stop = 500, num = 5)]
max_depth = [int(x) for x in np.linspace(10, 100, num = 5)]
max_depth.append(None)
min_samples_split = [2, 3, 5]
min_samples_leaf = [1, 2, 4]
lw = 2
# Create the grid
param_grid = {'n_estimators': n_estimators,
              'max_depth': max_depth,
              'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf}
random_forest = RandomForestClassifier()
num_classes = len(lipid_data_df_without_na['Blood Type'].unique())
lipid_x = lipid_data_df_without_na.drop(columns=['Person', 'Blood Type', 'Date'])
label_encoder = LabelEncoder()
blood_type = label_encoder.fit_transform(lipid_data_df_without_na['Blood Type'])
logo = LeaveOneGroupOut()

## Multiclass

In [None]:
all_true = []
all_prediction = []

auroc_list = []
outer_groups = lipid_data_df_without_na['Person']
list_shap_values = list()
list_test_sets = list()
list_permutation_imortance = list()
for train_index, test_index in logo.split(lipid_x, groups=outer_groups):
    train_val_data = lipid_x.loc[train_index]
    test_data = lipid_x.loc[test_index]
    blood_type_test_val = blood_type[train_index]
    blood_type_test = blood_type[test_index]
    inner_groups = outer_groups.loc[train_index]
    grid_search = GridSearchCV(estimator = random_forest, param_grid = param_grid, 
                          cv = logo, n_jobs = -1, verbose = 1)
    clf = grid_search.fit(train_val_data,  blood_type_test_val, groups = inner_groups)
    
    #explaining model
    explainer = shap.TreeExplainer(clf.best_estimator_)
    shap_values = explainer.shap_values(test_data)
    #for each iteration we save the test_set index and the shap_values
    list_shap_values.append(shap_values)
    list_test_sets.append(test_index)
    importance = permutation_importance(clf, test_data, blood_type_test)
    list_permutation_imortance.append(importance)
    
    # test stuff
    y_pred_proba = clf.predict_proba(test_data)
    y_pred = clf.predict(test_data)
    all_true.extend(blood_type_test)
    all_prediction.extend(y_pred_proba)
tmp = [permutation_imortance.importances_mean for permutation_imortance in list_permutation_imortance]
mean_importance = np.mean(tmp, axis=0)
tmp = [permutation_imortance.importances_std for permutation_imortance in list_permutation_imortance]
std_importance = np.mean(tmp, axis=0)

## Permutation importance

In [None]:
forest_importances = pd.Series(mean_importance, index=lipid_x.keys())
forest_sd = pd.Series(std_importance, index=lipid_x.keys())
both = pd.concat([forest_importances, forest_sd], axis=1).sort_values(by=0, ascending=False)

ax = both[0][:10].plot.bar(yerr=both[1])
ax.set_title("Feature importances using permutation on full model")
ax.set_ylabel("Mean accuracy decrease")
plt.savefig('../results/classification/multiclass/permutation_feature_importance_multiclass.pdf')

## Shap diagram

In [None]:
shap_values = np.concatenate(list_shap_values,axis=1)
class_int = 0
for i in range(len(label_encoder.classes_)):
    shap.summary_plot(shap_values[i], lipid_x, show=False)
    plt.savefig(f'../results/classification/multiclass/ \
                multiclass_{label_encoder.inverse_transform([i])[0]}_shap_beeswarm.pdf')
    plt.clf()

In [None]:
shap.summary_plot(list(shap_values[:,:,:]), lipid_x, plot_type="bar",
                  class_names=label_encoder.classes_, show=False)
plt.savefig(f'../results/classification/multiclass/multiclass_shap_bar.pdf')

## Visualise AUROC

In [None]:
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(len(label_encoder.classes_)):
    type_true = np.array([1 if y == i else 0 for y in all_true])
    fpr[i], tpr[i], _ = roc_curve(type_true, np.array(all_prediction)[:,i])
    roc_auc[i] = auc(fpr[i], tpr[i])

#for i in range(len(label_encoder.classes_)):
#    plt.plot(fpr[i], tpr[i], lw=lw,
#        label=f"{label_encoder.inverse_transform([i])[0]} (area = {roc_auc[i]:0.2f})"
#)


all_fpr = np.unique(np.concatenate([fpr[i] for i in range(len(label_encoder.classes_))]))
mean_tpr = np.zeros_like(all_fpr)
for i in range(len(label_encoder.classes_)):
    mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
mean_tpr /= len(label_encoder.classes_)
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

plt.plot(
    fpr["macro"],
    tpr["macro"],
    label="macro-average ROC curve (area = {0:0.2f})".format(roc_auc["macro"]),
    color="navy",
    linestyle="-",
    linewidth=lw)

plt.plot([0, 1], [0, 1], "k--", lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.title('Multiclass')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc="lower right")
plt.savefig('../results/classification/multiclass/multiclass_classifier.pdf')
plt.show()

## One vs. rest classification

In [None]:
all_true = []
all_prediction = []
clf_list = []

outer_groups = lipid_data_df_without_na['Person']
list_shap_values = list()
list_test_sets = list()
mean_importance_list = list()
std_importance_list = list()

for one_type in range(len(label_encoder.classes_)):
    all_true_type = []
    all_prediction_group = []
    tmp_list_shap_values = list()
    tmp_list_test_sets = list()
    list_permutation_imortance = list()
    for train_index, test_index in logo.split(lipid_x, groups=outer_groups):
        ovr_blood_type = np.array([1 if y == one_type else 0 for y in blood_type])
        train_val_data = lipid_x.loc[train_index]
        test_data = lipid_x.loc[test_index]
        blood_type_test_val = ovr_blood_type[train_index]
        blood_type_test = ovr_blood_type[test_index]
        inner_groups = outer_groups.loc[train_index]
        grid_search = GridSearchCV(estimator = random_forest, param_grid = param_grid, 
                              cv = logo, n_jobs = -1, verbose = 1)
        clf = grid_search.fit(train_val_data, blood_type_test_val, groups = inner_groups)
        clf_list.append(clf)
        
        #explaining model
        explainer = shap.TreeExplainer(clf.best_estimator_)
        shap_values = explainer.shap_values(test_data)
        #for each iteration we save the test_set index and the shap_values
        tmp_list_shap_values.append(shap_values)
        tmp_list_test_sets.append(test_index)
        importance = permutation_importance(clf, test_data, blood_type_test)
        list_permutation_imortance.append(importance)
    
        # test stuff
        y_pred_proba = clf.predict_proba(test_data)
        y_pred = clf.predict(test_data)
        all_true_type.extend(blood_type_test)
        all_prediction_group.extend(y_pred_proba)
    
    list_shap_values.append(np.concatenate(tmp_list_shap_values,axis=1))
    list_test_sets.append(np.concatenate(tmp_list_test_sets, axis = 0))
    all_true.append(all_true_type)
    all_prediction.append(all_prediction_group)
    tmp = [permutation_imortance.importances_mean for permutation_imortance in list_permutation_imortance]
    mean_importance = np.mean(tmp, axis=0)
    tmp = [permutation_imortance.importances_std for permutation_imortance in list_permutation_imortance]
    std_importance = np.mean(tmp, axis=0)
    mean_importance_list.append(mean_importance)
    std_importance_list.append(std_importance)

## Permutation importance

In [None]:
for i in range(len(label_encoder.classes_)):
    mean_importance = mean_importance_list[i]
    std_importance = std_importance_list[i]
    forest_importances = pd.Series(mean_importance, index=lipid_x.keys())
    forest_sd = pd.Series(std_importance, index=lipid_x.keys())
    both = pd. concat([forest_importances, forest_sd], axis=1).sort_values(by=0, ascending=False)

    fig, ax = plt.subplots()
    both[0][:10].plot.bar(yerr=both[1], ax=ax)
    ax.set_title("Feature importances using permutation on full model")
    ax.set_ylabel("Mean accuracy decrease")
    plt.savefig(f'../results/classification/one_vs_rest/ \
                {label_encoder.inverse_transform([i])[0]}_vs_rest_permutation_importance.pdf')
    plt.clf()

## SHAP

In [None]:
for i in range(len(label_encoder.classes_)):
    shap_values = list_shap_values[i]
    test_set = list_test_sets[i]
    X = X_test = lipid_x.loc[test_set]
    shap.summary_plot(shap_values[1], X, show=False, max_display=10)
    plt.savefig(f'{label_encoder.inverse_transform([i])[0]}_vs_rest_shap_beeswarm.pdf')
    plt.clf()
    shap.summary_plot(list(shap_values[:,:,:]), X, plot_type="bar",
                  class_names=[label_encoder.inverse_transform([i])[0], 'Others'], show=False)
    plt.savefig(f'../results/classification/one_vs_rest/{label_encoder.inverse_transform([i])[0]}\
    _vs_rest_shap_bar.pdf')
    plt.clf()

## Visualise AUROC

In [None]:
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(len(label_encoder.classes_)):
    fpr[i], tpr[i], _ = roc_curve(all_true[i], np.array(all_prediction[i])[:, 1])
    roc_auc[i] = auc(fpr[i], tpr[i])

for i in range(len(label_encoder.classes_)):
    plt.plot(fpr[i], tpr[i], lw=lw,
        label=f"{label_encoder.inverse_transform([i])[0]} (area = {roc_auc[i]:0.2f})")


all_fpr = np.unique(np.concatenate([fpr[i] for i in range(len(label_encoder.classes_))]))
mean_tpr = np.zeros_like(all_fpr)
for i in range(len(label_encoder.classes_)):
    mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
mean_tpr /= len(label_encoder.classes_)
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

plt.plot([0, 1], [0, 1], "k--", lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.title('One vs. Rest')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc="lower right")
plt.savefig('../results/classification/one_vs_rest/one_vs_rest_classifier.pdf')
plt.show()

## One vs. one classification

In [None]:
all_true = []
all_prediction = []

list_shap_values = list()
list_test_sets = list()
mean_importance_list = list()
std_importance_list = list()

for one_type in range(len(label_encoder.classes_)):
    all_true_type = []
    all_prediction_type = []

    for second_one_type in range(len(label_encoder.classes_)):
        if one_type == second_one_type:
            all_true_type.append([])
            all_prediction_type.append([])
            continue
        d = {one_type: 1, second_one_type: 0}
        ovo_blood_type = np.array([d[y]  for y in blood_type if y in d])
        indices = np.nonzero(np.isin(blood_type, [one_type, second_one_type]))
        outer_groups = lipid_data_df_without_na.loc[indices[0]]['Person']
        lipid_x = lipid_data_df_without_na.loc[indices[0]].drop(columns=['Person', 'Blood Type', 'Date'])
        all_prediction_ovo = []
        all_true_ovo = []
        for train_index, test_index in logo.split(lipid_x, groups=outer_groups):
            train_val_data = lipid_x.iloc[train_index]
            test_data = lipid_x.iloc[test_index]
            blood_type_test_val = ovo_blood_type[train_index]
            blood_type_test = ovo_blood_type[test_index]
            inner_groups = outer_groups.iloc[train_index]
            grid_search = GridSearchCV(estimator = random_forest, param_grid = param_grid, 
                                  cv = logo, n_jobs = -1, verbose = 1)
            clf = grid_search.fit(train_val_data, blood_type_test_val, groups = inner_groups)

            #explaining model
            explainer = shap.TreeExplainer(clf.best_estimator_)
            shap_values = explainer.shap_values(test_data)
            #for each iteration we save the test_set index and the shap_values
            tmp_list_shap_values.append(shap_values)
            tmp_list_test_sets.append(test_index)
            importance = permutation_importance(clf, test_data, blood_type_test)
            list_permutation_imortance.append(importance)
        
            # test stuff
            y_pred_proba = clf.predict_proba(test_data)
            y_pred = clf.predict(test_data)
            all_true_ovo.extend(blood_type_test)
            all_prediction_ovo.extend(y_pred_proba)
        all_true_type.append(all_true_ovo)
        all_prediction_type.append(all_prediction_ovo)
    all_true.append(all_true_type)
    all_prediction.append(all_prediction_type)

## Visualise AUROCS

In [None]:
for one_type in range(len(label_encoder.classes_)):
    for second_one_type in range(len(label_encoder.classes_)):
        if one_type == second_one_type:
            continue
        all_true_ovo = all_true[one_type][second_one_type]
        all_prediction_ovo = all_prediction[one_type][second_one_type]
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        lw = 2


        fpr, tpr, _ = roc_curve(all_true_ovo, np.array(all_prediction_ovo)[:,1])
        roc_auc = auc(fpr, tpr)

        plt.plot(fpr, tpr, lw=lw,
            label=f"{label_encoder.inverse_transform([one_type])[0]}"\
                 f" vs. {label_encoder.inverse_transform([second_one_type])[0]} (area = {roc_auc:0.2f})")

        plt.plot([0, 1], [0, 1], "k--", lw=lw)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.title(f'{label_encoder.inverse_transform([one_type])[0]} vs. One')
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.legend(loc="lower right")
    plt.savefig(f'../results/classification/one_vs_one/ \
    {label_encoder.inverse_transform([one_type])[0]}_vs_one_classifier.pdf')
    plt.show()