# Pilot Study: Preliminary Correlations
Mostly interested in the relationship between hemolysis and various extracted FCM features. 

### Analysis Plan
1. Import CSV data into pandas dataframes
2. Concatenate
3. Calculate correlation between hemolysis and all features in feature-set. \
    Pearson correlation coefficient
4. Visualize the most significant relationships.

## 1. Import

In [96]:
import numpy as np
import pandas as pd 
import csv

data = pd.DataFrame()

### a) Hemolysis Data

In [200]:
# import and then repeat each row 6 times
tmp = []
with open('data/processed/hemolysis-2020-02.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        tmp.append(row)
hdata = pd.DataFrame(np.array(tmp[1:]).T[1:].T.reshape(-1,1),columns=['Hemolysis'])
hdata['Patient'] = np.repeat(['2','3','4'],6)
hdata['Time'] = np.tile(np.repeat(['PRE','POST'],3),3)
hdata['Shear']=np.tile(['0','600','1200'],6)
cols = hdata.columns.tolist()
cols = cols[-3:] + cols[:-3]
hdata = hdata[cols]
hdata

Unnamed: 0,Patient,Time,Shear,Hemolysis
0,2,PRE,0,0.627152191
1,2,PRE,600,0.933646215
2,2,PRE,1200,1.076384861
3,2,POST,0,1.245394422
4,2,POST,600,1.611435857
5,2,POST,1200,1.117542629
6,3,PRE,0,0.529074104
7,3,PRE,600,0.764636653
8,3,PRE,1200,1.042232
9,3,POST,0,0.613141036


### b) Platelet Data

In [195]:
tmp = []
with open('data/processed/plt-2020-02-c.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        tmp.append(row)
pdata = pd.DataFrame(tmp[1:],columns=tmp[0])

# Clean-up 
del pdata['LkIn']
pdata['Agonist'] = pdata['Agonist'].replace({"YES":"TRAP","NO":"None"})
pdata['RPM'] = pdata['RPM'].replace({"0C":'0'})
pdata[['Patient','Time']] = ldata[['Patient','Time']]
del pdata['Experiment']
cols = pdata.columns.tolist()
cols = cols[-2:] + cols[:-2]
pdata = pdata[cols]
pdata.rename({'RPM':'Shear','Agonist':'Activation'}, axis=1, inplace = True)
pdata

Unnamed: 0,Patient,Time,Shear,Activation,Density,%PMPs,CD61,%CD62+,CD62,CD42
0,0,PRE,0,,58998.74836785085,0.0221006103978109,168.48548794358388,0.0008609556607834,38.19717549283665,286.438407149338
1,0,PRE,0,TRAP,15475.277410196695,0.0577751664708186,210.9690513406172,0.3704011639991685,173.0936954659055,192.8218520789196
2,0,PRE,600,,37161.84615232,0.036597832754038,159.63385442879422,0.0008488964346349,36.84734918695216,268.95987855750434
3,0,PRE,600,TRAP,31382.34304078239,0.0712686567164179,176.23564405717434,0.3194053836882282,144.59599030674116,316.227766016838
4,0,PRE,1200,,26068.823528341178,0.3494280686317642,82.78825906280481,0.0040574809805579,26.416483203860924,115.47819846894582
5,0,PRE,1200,TRAP,26804.11302872653,0.4259986902423052,70.41355154858087,0.0182544209925841,39.59644988918792,62.08243607191253
6,0,POST,0,,114287.33812480287,0.028974002778329,156.78788438269703,0.0002043735949315,35.54522355611888,276.316137845464
7,0,POST,0,TRAP,18170.12585326314,0.0753876314382463,199.88548118735105,0.0740169622205089,155.38398312749737,239.27991734281375
8,0,POST,600,,40847.35372886463,0.0746180267677365,77.04043920109092,0.0016638935108153,22.266720103519187,72.33941627366748
9,0,POST,600,TRAP,30374.22834520929,0.1447851435974951,77.04043920109092,0.0031561671506122,24.58244068920197,45.72526698969311


### c) Leukocyte Data

In [144]:
tmp = []
with open('data/processed/LKO-2020-02-c.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        tmp.append(row)
ldata = pd.DataFrame(tmp[1:],columns=tmp[0])
ldata

Unnamed: 0,Patient,Time,Shear,Activation,N-CD11b,N-%CD61,N-CD45,M-CD11b,M-%CD61,M-CD45
0,0,PRE,0,,41.90379632286141,0.0061912658927584,542.69564101529,17.36338897446367,0.9421221864951768,1648.7424498298817
1,0,PRE,0,fMLP,68.1577066761324,0.0209328782707622,830.2704341914919,11.724968022041695,0.9613636363636364,1663.3601518516643
2,0,PRE,600,,45.09972785633476,0.0140046296296296,542.4226530435212,14.086396641308118,0.9623287671232876,1759.0611408548466
3,0,PRE,600,fMLP,65.3334670370742,0.0154138277672288,752.9055918615112,12.68114170471682,0.9692982456140352,1380.8483527397138
4,0,PRE,1200,,58.91713041767426,0.0239157372986369,745.5722286270634,11.312987934369003,0.9605263157894736,1785.6726422892511
5,0,PRE,1200,fMLP,66.70628886005171,0.0511085180863477,777.9886504681964,11.810612776139736,0.9722921914357684,1544.9229590485038
6,0,POST,0,,67.05525603145482,0.0121689334287759,368.5090640890285,92.62937446923706,0.02,1182.098498197335
7,0,POST,0,fMLP,55.97685276630448,0.0119614575257503,518.9770155741558,16.60880234793898,0.0,1008.9793530256604
8,0,POST,600,,38.04940685417992,0.0083453877823522,384.8942407435984,27.843031550484937,0.0,1389.5560332260984
9,0,POST,600,fMLP,48.749194336606855,0.0088183421516754,470.4640217730048,14.291373617909075,0.024390243902439,1059.7710189830068


## 2. Concatenation

In [209]:
aData = pd.merge(pdata, ldata, how='outer', on=['Patient', 'Time','Shear','Activation'])
aData = pd.merge(aData, hdata, how='outer', on=['Patient', 'Time','Shear'])
# convert to floats
aData[['Density','%PMPs','CD61',
       '%CD62+','CD62','CD42',
       'N-CD11b','N-%CD61','Hemolysis']] = aData[['Density','%PMPs','CD61',
                                              '%CD62+','CD62','CD42',
                                              'N-CD11b','M-%CD61','Hemolysis']].astype(np.float16)
aData

Unnamed: 0,Patient,Time,Shear,Activation,Density,%PMPs,CD61,%CD62+,CD62,CD42,N-CD11b,N-%CD61,N-CD45,M-CD11b,M-%CD61,M-CD45,Hemolysis
0,0,PRE,0,,59008.0,0.022095,168.5000,0.000861,38.187500,286.50000,41.906250,0.941895,542.69564101529,17.36338897446367,0.9421221864951769,1648.7424498298817,
1,0,PRE,0,TRAP,15472.0,0.057770,211.0000,0.370361,173.125000,192.87500,,,,,,,
2,0,PRE,0,fMLP,,,,,,,68.187500,0.961426,830.2704341914919,11.724968022041697,0.9613636363636363,1663.3601518516643,
3,0,PRE,600,,37152.0,0.036591,159.6250,0.000849,36.843750,269.00000,45.093750,0.962402,542.4226530435212,14.086396641308118,0.9623287671232876,1759.0611408548466,
4,0,PRE,600,TRAP,31376.0,0.071289,176.2500,0.319336,144.625000,316.25000,,,,,,,
5,0,PRE,600,fMLP,,,,,,,65.312500,0.969238,752.9055918615112,12.68114170471682,0.9692982456140351,1380.8483527397138,
6,0,PRE,1200,,26064.0,0.349365,82.8125,0.004059,26.421875,115.50000,58.906250,0.960449,745.5722286270634,11.312987934369005,0.9605263157894737,1785.6726422892511,
7,0,PRE,1200,TRAP,26800.0,0.426025,70.4375,0.018250,39.593750,62.09375,,,,,,,
8,0,PRE,1200,fMLP,,,,,,,66.687500,0.972168,777.9886504681964,11.810612776139735,0.9722921914357683,1544.9229590485038,
9,0,POST,0,,inf,0.028976,156.7500,0.000204,35.531250,276.25000,67.062500,0.020004,368.5090640890285,92.62937446923706,0.02,1182.098498197335,


## 3. Correlation

In [213]:
# filter patients without hemolysis data
aData['Patient'] = aData['Patient'].astype(np.float16)
correlation = aData[aData.Patient > 1].corr(method='pearson')

In [220]:
correlation

Unnamed: 0,Patient,Density,%PMPs,CD61,%CD62+,CD62,CD42,N-CD11b,N-%CD61,Hemolysis
Patient,1.0,0.22986,-0.241187,0.20455,0.023698,0.053226,0.278633,-0.30864,0.144862,0.236482
Density,0.22986,1.0,-0.117089,-0.254532,-0.173292,-0.220145,0.01436,-0.179955,0.559872,0.410445
%PMPs,-0.241187,-0.117089,1.0,0.10219,0.251092,0.277397,-0.14807,0.452142,-0.509547,-0.205355
CD61,0.20455,-0.254532,0.10219,1.0,0.757584,0.857965,0.470758,0.162299,0.185557,-0.302743
%CD62+,0.023698,-0.173292,0.251092,0.757584,1.0,0.951082,-0.015116,0.137797,-0.334121,-0.115502
CD62,0.053226,-0.220145,0.277397,0.857965,0.951082,1.0,0.129794,0.391324,0.090977,-0.170042
CD42,0.278633,0.01436,-0.14807,0.470758,-0.015116,0.129794,1.0,0.576156,-0.265639,-0.291201
N-CD11b,-0.30864,-0.179955,0.452142,0.162299,0.137797,0.391324,0.576156,1.0,-0.351428,-0.332574
N-%CD61,0.144862,0.559872,-0.509547,0.185557,-0.334121,0.090977,-0.265639,-0.351428,1.0,0.529307
Hemolysis,0.236482,0.410445,-0.205355,-0.302743,-0.115502,-0.170042,-0.291201,-0.332574,0.529307,1.0
