# Data analysis for QMSKI workshop 2019

## Import packages

In [1]:
import autoreload
%load_ext autoreload 
%autoreload 2

In [2]:
import numpy  as np
import pandas as pd
import scipy
from scipy.stats.stats import pearsonr

___

## DICE COEFFICIENT

In [3]:
# Load table of overlap coefficients
overlap_coeff = pd.read_csv("/Volumes/Sere's HD/work/data_pyKNEEr/OAI1/segmented/YR04_01_DESS_prep_fc_overlapCoeff.csv")
overlap_coeff.index = np.arange(1,len(overlap_coeff)+1) # First ID column starting from 1
overlap_coeff

Unnamed: 0,Subjects,DiceCoeff,JaccardCoeff,VolumeSimilarity
1,YR04_01_DESS_prep_fc,0.88,0.78,-0.05
2,YR04_02_DESS_prep_fc,0.85,0.73,-0.08
3,YR04_03_DESS_prep_fc,0.87,0.76,-0.06
4,YR04_04_DESS_prep_fc,0.88,0.79,-0.06
5,YR04_05_DESS_prep_fc,0.89,0.8,-0.1
6,YR04_06_DESS_prep_fc,0.05,0.03,-0.58
7,YR04_07_DESS_prep_fc,0.87,0.77,-0.1
8,YR04_08_DESS_prep_fc,0.87,0.78,-0.08
9,YR04_09_DESS_prep_fc,1.0,1.0,0.0
10,YR04_10_DESS_prep_fc,0.84,0.72,-0.13


In [4]:
# Select only successful segmentations for statistics based on the value of the Dice coefficient

# Index of unsuccessfull segmentation and reference image
index_failed    = overlap_coeff[overlap_coeff.DiceCoeff  < 0.70].index.tolist() # remove failed segmentations
index_reference = overlap_coeff[overlap_coeff.DiceCoeff == 1.00].index.tolist() # remove reference
index_excluded  = index_failed + index_reference # combine indexes

print ("Rows to be removed are:") # print 
overlap_coeff.loc[index_excluded,:]

Rows to be removed are:


Unnamed: 0,Subjects,DiceCoeff,JaccardCoeff,VolumeSimilarity
6,YR04_06_DESS_prep_fc,0.05,0.03,-0.58
9,YR04_09_DESS_prep_fc,1.0,1.0,0.0


In [5]:
# Remove the rows of reference image and failed segmentations
overlap_coeff = overlap_coeff.drop(index_excluded)

In [6]:
# Descriptive statistics on successful segmentations
overlap_coeff.describe().round(2) # print out two decimals

Unnamed: 0,DiceCoeff,JaccardCoeff,VolumeSimilarity
count,17.0,17.0,17.0
mean,0.86,0.76,-0.08
std,0.02,0.03,0.02
min,0.81,0.68,-0.13
25%,0.86,0.75,-0.09
50%,0.87,0.77,-0.08
75%,0.88,0.78,-0.06
max,0.89,0.8,-0.03


___

## CARTILAGE THICKNESS

### Descriptive statistics of cartilage thickness calculated from pyKNEEr's segmentations

In [7]:
# Load cartilage thickness
thickness_pkr = pd.read_csv("/Volumes/Sere's HD/work/data_pyKNEEr/OAI1/morphology/thicknesses_pkn.csv")
thickness_pkr.index = np.arange(1,len(thickness_pkr)+1) # First ID column starting from 1
thickness_pkr

Unnamed: 0,Subjects,averageThickness,std.dev
1,YR04_01_DESS_prep_fc_thickness_1,2.39,0.88
2,YR04_02_DESS_prep_fc_thickness_1,2.23,0.89
3,YR04_03_DESS_prep_fc_thickness_1,1.94,0.63
4,YR04_04_DESS_prep_fc_thickness_1,2.03,0.65
5,YR04_05_DESS_prep_fc_thickness_1,2.32,0.78
6,YR04_06_DESS_prep_fc_thickness_1,1.57,0.75
7,YR04_07_DESS_prep_fc_thickness_1,2.06,0.68
8,YR04_08_DESS_prep_fc_thickness_1,2.23,0.84
9,YR04_09_DESS_prep_fc_thickness_1,2.19,0.69
10,YR04_10_DESS_prep_fc_thickness_1,2.2,0.73


In [8]:
# Remove the rows of reference image and failed segmentations
thickness_pkr = thickness_pkr.drop(index_excluded)

In [9]:
# Descriptive statistics on successful segmentations
thickness_pkr.describe().round(2) # print out two decimals

Unnamed: 0,averageThickness,std.dev
count,17.0,17.0
mean,2.17,0.77
std,0.16,0.1
min,1.94,0.62
25%,2.06,0.71
50%,2.2,0.76
75%,2.25,0.84
max,2.5,0.95


### Descriptive statistics of cartilage thickness calculated from ground truth segmentations

In [10]:
# Load cartilage thickness
thickness_gt = pd.read_csv("/Volumes/Sere's HD/work/data_pyKNEEr/OAI1/morphology/thicknesses_gt.csv")
thickness_gt.index = np.arange(1,len(thickness_gt)+1) # First ID column starting from 1
thickness_gt

Unnamed: 0,Subjects,averageThickness,std.dev
1,YR04_01_DESS_QM_fc_thickness_1,2.45,0.84
2,YR04_02_DESS_QM_fc_thickness_1,2.29,0.84
3,YR04_03_DESS_QM_fc_thickness_1,1.9,0.65
4,YR04_04_DESS_QM_fc_thickness_1,2.09,0.67
5,YR04_05_DESS_QM_fc_thickness_1,2.42,0.84
6,YR04_06_DESS_QM_fc_thickness_1,2.2,0.79
7,YR04_07_DESS_QM_fc_thickness_1,2.09,0.72
8,YR04_08_DESS_QM_fc_thickness_1,2.3,0.9
9,YR04_09_DESS_QM_fc_thickness_1,2.19,0.69
10,YR04_10_DESS_QM_fc_thickness_1,2.32,0.7


In [11]:
# Remove the rows of reference image and failed segmentations
thickness_gt = thickness_gt.drop(index_excluded)

In [12]:
# Descriptive statistics on successful segmentations
thickness_gt.describe().round(2) # print out two decimals

Unnamed: 0,averageThickness,std.dev
count,17.0,17.0
mean,2.25,0.78
std,0.19,0.11
min,1.87,0.64
25%,2.16,0.7
50%,2.29,0.73
75%,2.38,0.84
max,2.63,1.02


### Person's coefficient between the two groups of thicknesses

In [13]:
# Evaluation of the correlation between thicknesses calculated from images segmented using pyKNEEr's and from ground truth images

# convert table columns to lists
thickness_pkr_list = thickness_pkr["averageThickness"].tolist()
thickness_gt_list  = thickness_gt["averageThickness"].tolist()

# calculate Pearson's coefficient
[pearsons, pvalue] = scipy.stats.pearsonr(thickness_pkr_list, thickness_gt_list)
print("Person's coefficient is %.2f" % round(pearsons,2))

Person's coefficient is 0.96


___

## CARTILAGE VOLUME

### Descriptive statistics of cartilage volume calculated from pyKNEEr's segmentations

In [14]:
# Load cartilage volume
volume_pkr = pd.read_csv("/Volumes/Sere's HD/work/data_pyKNEEr/OAI1/morphology/volumes_pkn.csv")
volume_pkr.index = np.arange(1,len(volume_pkr)+1) # First ID column starting from 1
volume_pkr

Unnamed: 0,Subjects,Volumes
1,YR04_01_DESS_prep_fc,14965
2,YR04_02_DESS_prep_fc,12295
3,YR04_03_DESS_prep_fc,10750
4,YR04_04_DESS_prep_fc,11723
5,YR04_05_DESS_prep_fc,13288
6,YR04_06_DESS_prep_fc,6032
7,YR04_07_DESS_prep_fc,11395
8,YR04_08_DESS_prep_fc,13490
9,YR04_09_DESS_prep_fc,12983
10,YR04_10_DESS_prep_fc,16378


In [15]:
# Remove the rows of reference image and failed segmentations
volume_pkr = volume_pkr.drop(index_excluded)

In [16]:
# Descriptive statistics on successful segmentations
volume_pkr.describe().round(2) # print out two decimals

Unnamed: 0,Volumes
count,17.0
mean,13084.24
std,1760.34
min,10414.0
25%,11723.0
50%,13490.0
75%,13964.0
max,16378.0


### Descriptive statistics of cartilage volume calculated from ground truth segmentations

In [17]:
# Load cartilage volume
volume_gt = pd.read_csv("/Volumes/Sere's HD/work/data_pyKNEEr/OAI1/morphology/volumes_gt.csv")
volume_gt.index = np.arange(1,len(volume_gt)+1) # First ID column starting from 1
volume_gt

Unnamed: 0,Subjects,Volumes
1,YR04_01_DESS_QM_fc,15783
2,YR04_02_DESS_QM_fc,13317
3,YR04_03_DESS_QM_fc,11385
4,YR04_04_DESS_QM_fc,12395
5,YR04_05_DESS_QM_fc,14649
6,YR04_06_DESS_QM_fc,10925
7,YR04_07_DESS_QM_fc,12603
8,YR04_08_DESS_QM_fc,14585
9,YR04_09_DESS_QM_fc,12983
10,YR04_10_DESS_QM_fc,18658


In [18]:
# Remove the rows of reference image and failed segmentations
volume_gt = volume_gt.drop(index_excluded)

In [19]:
# Descriptive statistics on successful segmentations
volume_gt.describe().round(2) # print out two decimals

Unnamed: 0,Volumes
count,17.0
mean,14168.94
std,2018.97
min,11237.0
25%,12603.0
50%,14649.0
75%,15052.0
max,18658.0


### Person's coefficient between the two groups of volumes

In [20]:
# Evaluation of the correlation between thicknesses calculated from images segmented using pyKNEEr's and from ground truth images

# convert table columns to lists
volume_pkr_list = volume_pkr["Volumes"].tolist()
volume_gt_list  = volume_gt["Volumes"].tolist()

# calculate Pearson's coefficient
[pearsons, pvalue] = scipy.stats.pearsonr(volume_pkr_list, volume_gt_list)
print("Person's coefficient is %.2f" % round(pearsons,2))

Person's coefficient is 0.98
