In [29]:
import FlowCytometryTools
import glob
import pandas as pd
import numpy as np
import impyute as im
import os
import fancyimpute
np.set_printoptions(suppress=True)
from fancyimpute import KNN, NuclearNormMinimization, SoftImpute, BiScaler

In [30]:
files_path = '/Users/kangakum/FlowAI/TestCases'

In [31]:
read_files = glob.glob(os.path.join(files_path, "*.fcs"))

In [32]:
read_files.sort()


In [33]:
numcases = int(len(read_files) / 4)
cases = []
for x in range(numcases):
    cases.append([])


In [34]:
for x in range(len(read_files)):
    cases[x%numcases].append(read_files[x])
print(cases[0])

['/Users/kangakum/FlowAI/TestCases/1XXX103.fcs', '/Users/kangakum/FlowAI/TestCases/2XXX103.fcs', '/Users/kangakum/FlowAI/TestCases/3XXX103.fcs', '/Users/kangakum/FlowAI/TestCases/4XXX103.fcs']


In [35]:
from FlowCytometryTools import FCMeasurement

In [36]:
channels = ['FSC-A', 'FSC-H', 'SSC-A', 'FITC-A', 'PE-A', 'PerCP-Cy5-5-A', 'PE-Cy7-A', 'APC-A', 'APC-R700-A', 'APC-H7-A', 'V450-A', 'V500-C-A', 'BV605-A']
Tube1Antibodies = ['FSC-A', 'FSC-H', 'SSC-A', 'Kap P', 'Lam P', 'CD19', 'CD2', 'CD10', 'CD20', 'CD23', 'CD5', 'CD45', 'CD200']
Tube2Antibodies = ['FSC-A', 'FSC-H', 'SSC-A', 'Kap M', 'Lam M', 'CD38', 'CD11c', 'CD103', 'CD3', 'CD25', 'FMC7', 'CD45', 'CD19']
Tube3Antibodies = ['FSC-A', 'FSC-H', 'SSC-A', 'CD57', 'CD16', 'CD8', 'CD5', 'CD2', 'CD4', 'CD56', 'CD3', 'CD45', 'CD7']
Tube4Antibodies = ['FSC-A', 'FSC-H', 'SSC-A', 'TCR ab', 'TCRgd', 'CD3', 'CD2', 'CD22', 'CD52', 'CD19', 'CD30', 'CD45', 'HLADR']
print(len(channels))

13


In [37]:
Tubes = []
Tubes.append({channels[i]: Tube1Antibodies[i] for i in range(len(channels))})
Tubes.append({channels[i]: Tube2Antibodies[i] for i in range(len(channels))})
Tubes.append({channels[i]: Tube3Antibodies[i] for i in range(len(channels))})
Tubes.append({channels[i]: Tube4Antibodies[i] for i in range(len(channels))})

In [38]:
columns = ['FSC-A', 'FSC-H', 'SSC-A', 'Kap P', 'Lam P', 'CD19', 'CD2', 'CD10', 'CD20', 'CD23', 'CD5', 'CD45', 'CD200', 'Kap M', 'Lam M', 'CD38', 'CD11c', 'CD103', 'CD3', 'CD25', 'FMC7', 'CD57', 'CD16', 'CD8', 'CD4', 'CD56', 'CD7','TCR ab', 'TCRgd','CD22', 'CD52', 'CD30', 'HLADR']
print(columns)

['FSC-A', 'FSC-H', 'SSC-A', 'Kap P', 'Lam P', 'CD19', 'CD2', 'CD10', 'CD20', 'CD23', 'CD5', 'CD45', 'CD200', 'Kap M', 'Lam M', 'CD38', 'CD11c', 'CD103', 'CD3', 'CD25', 'FMC7', 'CD57', 'CD16', 'CD8', 'CD4', 'CD56', 'CD7', 'TCR ab', 'TCRgd', 'CD22', 'CD52', 'CD30', 'HLADR']


In [39]:
caseData = []
print(caseData)
for x in range(len(cases)):
    rows = 0
    for case in cases[x]:
        sample = FCMeasurement(ID = 'Test Sample', datafile = case)
        rows += sample.data.shape[0]
    caseData.append(np.empty((rows,len(columns),)))
    caseData[x][:] = np.nan
print(caseData)

[]
[array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]]), array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]]), array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])]


In [40]:
def case_data_processing(caseNum):
    adjustment = 0
    for x in range(len(cases[caseNum])):
        sample = FCMeasurement(ID = 'Test Sample', datafile = cases[caseNum][x])
        data_as_array = sample.data.values
        data_as_array = np.delete(data_as_array, sample.channel_names.index('Time'), 1)
        key = Tubes[x]
        for y in range(len(channels)):
            antibody = key.get(channels[y])
            antibody_num = columns.index(antibody)
            for z in range(sample.data.values.shape[0]):
                caseData[caseNum][z + adjustment, antibody_num] = data_as_array[z, y]
            print("Data Processing: Case " + str(caseNum + 1) + " tube " + str(x + 1) + " channel " + str(y + 1))
        adjustment += sample.data.values.shape[0]  

In [41]:
for x in range(len(cases)):
    case_data_processing(x)

Data Processing: Case 1 tube 1 channel 1
Data Processing: Case 1 tube 1 channel 2
Data Processing: Case 1 tube 1 channel 3
Data Processing: Case 1 tube 1 channel 4
Data Processing: Case 1 tube 1 channel 5
Data Processing: Case 1 tube 1 channel 6
Data Processing: Case 1 tube 1 channel 7
Data Processing: Case 1 tube 1 channel 8
Data Processing: Case 1 tube 1 channel 9
Data Processing: Case 1 tube 1 channel 10
Data Processing: Case 1 tube 1 channel 11
Data Processing: Case 1 tube 1 channel 12
Data Processing: Case 1 tube 1 channel 13
Data Processing: Case 1 tube 2 channel 1
Data Processing: Case 1 tube 2 channel 2
Data Processing: Case 1 tube 2 channel 3
Data Processing: Case 1 tube 2 channel 4
Data Processing: Case 1 tube 2 channel 5
Data Processing: Case 1 tube 2 channel 6
Data Processing: Case 1 tube 2 channel 7
Data Processing: Case 1 tube 2 channel 8
Data Processing: Case 1 tube 2 channel 9
Data Processing: Case 1 tube 2 channel 10
Data Processing: Case 1 tube 2 channel 11
Data Proce

In [42]:
print(caseData[0][0])

[82652.953125   67951.         31013.1015625    708.90002441
   776.90002441   108.80000305 16291.10058594    26.40999985
    27.79999924   540.71002197   681.70001221  5846.30029297
  2704.70019531            nan            nan            nan
            nan            nan            nan            nan
            nan            nan            nan            nan
            nan            nan            nan            nan
            nan            nan            nan            nan
            nan]


In [43]:
X_filled_softimpute = SoftImpute().fit_transform(caseData[0])

[SoftImpute] Max Singular Value of X_init = 124642696.122875
[SoftImpute] Iter 1: observed MAE=2007.288530 rank=10
[SoftImpute] Iter 2: observed MAE=1988.192977 rank=9
[SoftImpute] Iter 3: observed MAE=1984.365707 rank=9
[SoftImpute] Iter 4: observed MAE=1987.457198 rank=8
[SoftImpute] Iter 5: observed MAE=1993.729098 rank=8
[SoftImpute] Iter 6: observed MAE=2001.489170 rank=8
[SoftImpute] Iter 7: observed MAE=2009.421782 rank=8
[SoftImpute] Iter 8: observed MAE=2016.869286 rank=8
[SoftImpute] Iter 9: observed MAE=2023.542029 rank=8
[SoftImpute] Iter 10: observed MAE=2029.350648 rank=8
[SoftImpute] Iter 11: observed MAE=2034.309963 rank=8
[SoftImpute] Iter 12: observed MAE=2038.491398 rank=8
[SoftImpute] Iter 13: observed MAE=2041.999464 rank=8
[SoftImpute] Iter 14: observed MAE=2044.948403 rank=8
[SoftImpute] Iter 15: observed MAE=2047.443974 rank=8
[SoftImpute] Iter 16: observed MAE=2048.883723 rank=7
[SoftImpute] Iter 17: observed MAE=2049.980312 rank=7
[SoftImpute] Iter 18: observe

In [46]:
print(X_filled_softimpute[0])
print(X_filled_softimpute[200000])

[82652.953125   67951.         31013.1015625    708.90002441
   776.90002441   108.80000305 16291.10058594    26.40999985
    27.79999924   540.71002197   681.70001221  5846.30029297
  2704.70019531   934.69500504  1262.35397423   792.98324143
  3321.71086228   183.54514132  1427.57263243  3298.27935024
  1369.10658553  1032.25740853  2218.88507607  2362.59284147
  1881.36356262  1164.90393469  1880.3821174    647.4289017
   245.9078472    355.86394805  1015.92913642   509.7254451
  3198.49032071]
[90966.71875    76674.         24116.20117188  1222.98388412
  7105.11664716 18388.90039062  4868.12194965   659.59839553
   942.06220326 12773.71072836 13448.74104755  5327.80029297
  4056.06183374   372.30001831  2058.69995117   817.70001221
  8379.29980469    54.20999908   561.55999756   594.91998291
  9746.10058594  1083.32985395  2236.41808731  2585.71745807
  2112.00727766  2299.32552735  2626.19834255   652.21701387
   290.1905782   1744.71728932  5603.65239189  1240.13226945
 12335.22