In [1]:
%load_ext autoreload
%autoreload 2

In [35]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from pathlib import Path

In [10]:
from src import setup

# Requirements

Put the data path into `src/data-path.txt`.

In [11]:
path = setup.get_data_path()

The path in `src/data-path.txt` should point to the location where the data is organized as follows:

```
.
├── ClinicalCFD
│   ├── MagnitudeClinical
│   └── VectorClinical
├── ...
├── labels
│   ├── Cases_highrisk_noMI.xlsx
│   ├── FC_AI_cases_discrepant.xlsx
│   ├── list of FC.xlsx
│   ├── Matching.xlsx
│   └── WSSdescriptors_AvgValues.xlsx
├── ...
```

# Labels

In [13]:
path_labels = path.joinpath('labels')

In [14]:
[
    e.name for e in path_labels.iterdir()
]

['Cases_highrisk_noMI.xlsx',
 'FC_AI_cases_discrepant.xlsx',
 'list of FC.xlsx',
 'Matching.xlsx',
 'WSSdescriptors_AvgValues.xlsx',
 '.~lock.Cases_highrisk_noMI.xlsx#',
 '.~lock.FC_AI_cases_discrepant.xlsx#']

In [16]:
df_labels = pd.read_excel(path_labels.joinpath('WSSdescriptors_AvgValues.xlsx'), engine='openpyxl')
df_labels

Unnamed: 0,Code,TAWSS,OSI,RRT,transWSS,CFI,TAWSSax,TAWSSsc,WSSratio,TSVI,FC
0,CHUV01_LAD,3.64717,0.014050,0.846456,0.073336,0.057265,3.58750,0.158997,0.176172,228.3910,0
1,CHUV01_LCX,2.55975,0.003294,0.611145,0.055230,0.040732,2.48901,0.337315,0.290763,86.0399,1
2,CHUV02_LAD,2.63011,0.005814,0.582346,0.056851,0.040757,2.59072,0.225770,0.202236,60.5216,1
3,CHUV02_LCX,2.98097,0.000154,0.373309,0.039946,0.014991,2.95874,0.257049,0.077637,22.2836,0
4,CHUV02_RCA,2.43675,0.006072,0.583699,0.098474,0.056273,2.29555,0.589120,0.467824,60.5002,0
...,...,...,...,...,...,...,...,...,...,...,...
183,OLV048_LCX,2.53027,0.000036,0.407267,0.017071,0.007672,2.52312,0.127840,0.048128,10.4213,0
184,OLV049_LAD,2.86934,0.010788,0.673836,0.028470,0.035946,2.84023,0.160913,0.326003,102.0050,1
185,OLV049_RCA,2.09383,0.001343,0.756832,0.034190,0.030094,2.02367,0.213538,0.165229,39.1061,0
186,OLV050_LCX,2.82282,0.000498,0.409097,0.058604,0.025448,2.76545,0.386589,0.137708,33.7438,1


In [27]:
df_labels.isna().any(axis=None)

False

In [18]:
df_labels.Code.is_unique

True

In [20]:
df_labels.FC.value_counts()

0    108
1     80
Name: FC, dtype: int64

## Patients statistics

In [22]:
df_labels['patient'] = df_labels.Code.apply(lambda s: s.split('_')[0])

In [32]:
df_labels.patient.nunique()

80

In [31]:
df_labels.groupby('patient').count()['Code'].rename('n_record_per_patient').value_counts()

2    52
3    28
Name: n_record_per_patient, dtype: int64

# Data

In [34]:
path_data = path.joinpath('ClinicalCFD')
path_magnitude = path_data.joinpath('MagnitudeClinical/')
path_vector = path_data.joinpath('VectorClinical/')

In [52]:
files_magnitude = list(map(lambda e: e.name, path_magnitude.glob('*.vtp')))
files_vector = list(map(lambda e: e.name, path_vector.glob('*.vtp')))

In [53]:
len(files_magnitude), len(files_vector)

(188, 188)

In [54]:
files_vector[:3], files_magnitude[:3]

(['FRH005_LCX_WSS.vtp', 'FRH012_RCA_WSS.vtp', 'OLV009_LAD_WSS.vtp'],
 ['OLV015_LAD_WSSMag.vtp', 'CHUV08_LCX_WSSMag.vtp', 'OLV047_LCX_WSSMag.vtp'])

In [55]:
# Compare patients between the two sets
extract_name_vessel = lambda name: '_'.join(name.split('_')[:-1])

patients_vector = list(map(extract_name_vessel, files_vector))
patients_magnitude = list(map(extract_name_vessel, files_magnitude))
len(set(patients_magnitude)), len(set(patients_vector))

(188, 188)

In [56]:
len(set(patients_vector).intersection(patients_magnitude))

188