## Dataset
Google Drive or locally

In [17]:
import sys
from pathlib import Path


if "google.colab" in sys.modules:
    from google.colab import drive
    drive.mount("/content/drive")
    data_dir = Path("/content/drive/MyDrive/ahead")
else:
    print(
        "I suppose you're running a jupyter notebook from phunc20/ahead/experiments"
    )
    data_dir = Path.cwd().parent/"data"

data_dir.is_dir()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


True

In [18]:
list(data_dir.iterdir())

[PosixPath('/content/drive/MyDrive/ahead/raw_fcs'),
 PosixPath('/content/drive/MyDrive/ahead/EU_label.xlsx'),
 PosixPath('/content/drive/MyDrive/ahead/raw_fcs.zip'),
 PosixPath('/content/drive/MyDrive/ahead/EU_marker_channel_mapping.xlsx')]

## Package Installation

In [3]:
!pip install -qqq FlowCal

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.3/82.3 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.3/103.3 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for FlowCal (setup.py) ... [?25l[?25hdone


Note that installing `FlowCal` also installs a dependency `openpyxl` that we will soon use.

## Verification of Identical Files
1. `raw_fcs` and `raw_fcs.zip` seem to be different

In [16]:
!zip -rq raw_fcs.zip {data_dir}/raw_fcs

In [None]:
!diff raw_fcs.zip {data_dir}/raw_fcs.zip

Binary files raw_fcs.zip and /content/drive/MyDrive/ahead/raw_fcs.zip differ


## Examination of The Excel Files

**(?)** You need to make sure that these excel files both contain only a single worksheet! How would you do that in Python?  
**(R)** Let's use `openpyxl`.  

In [None]:
import openpyxl

In [None]:
filename_mapping = "EU_marker_channel_mapping.xlsx"
filename_label = "EU_label.xlsx"
wb_mapping = openpyxl.load_workbook(data_dir/filename_mapping)
wb_label = openpyxl.load_workbook(data_dir/filename_label)
print(f'wb_mapping.sheetnames = {wb_mapping.sheetnames}')
print(f'wb_label.sheetnames   = {wb_label.sheetnames}')

wb_mapping.sheetnames = ['worksheet']
wb_label.sheetnames   = ['Sheet1', '工作表1']


- `EU_marker_channel_mapping.xlsx` contains only one worksheet
- `EU_label.xlsx` contains two worksheets
    - In particular, the worksheet `"工作表1"` is empty (as revealed by the next cell)

In [None]:
list(wb_label["工作表1"].values)

[]

Ok, great. Since both Excel files contains a single worksheet, we could more easily use Pandas to inspect their content.

In [19]:
import numpy as np
import pandas as pd

In [20]:
# pd.read_excel?

In [21]:
df_mapping = pd.read_excel(data_dir/"EU_marker_channel_mapping.xlsx")
df_label = pd.read_excel(data_dir/"EU_label.xlsx")
print(f'df_mapping.shape = {df_mapping.shape}')
print(f'df_label.shape   = {df_label.shape}')

df_mapping.shape = (35, 4)
df_label.shape   = (40, 2)


### `df_mapping`

In [22]:
df_mapping.shape

(35, 4)

In [23]:
df_mapping.head()

Unnamed: 0,marker_channel,use,PxS(marker),PxN(channel)
0,SSC-W,1,,SSC-W
1,SSC-A,1,,SSC-A
2,SSC-H,1,,SSC-H
3,FSC-W,1,,FSC-W
4,FSC-A,1,,FSC-A


In [24]:
df_mapping.tail()

Unnamed: 0,marker_channel,use,PxS(marker),PxN(channel)
30,CTLA-4_FJComp-BYG670-A,1,CTLA-4,FJComp-BYG670-A
31,PD-1_FJComp-BV750-P-A,1,PD-1,FJComp-BV750-P-A
32,TNFa_FJComp-FITC-A,1,TNFa,FJComp-FITC-A
33,livedead_FJComp-APC-H7-A,1,livedead,FJComp-APC-H7-A
34,-_FJComp-BB790-P-A,0,-,FJComp-BB790-P-A


In [25]:
df_mapping["use"].value_counts()

1    31
0     4
Name: use, dtype: int64

In [26]:
df_mapping.loc[df_mapping["use"] == 1, :].shape

(31, 4)

In [27]:
ds_unique_channel = df_mapping.loc[:, "PxN(channel)"].unique()
print(f'{ds_unique_channel = }')
print(f'{ds_unique_channel.shape = }')

ds_unique_channel = array(['SSC-W', 'SSC-A', 'SSC-H', 'FSC-W', 'FSC-A', 'FSC-H', 'Time',
       'FJComp-BUV737-A', 'FJComp-APC-A', 'FJComp-BV711-A',
       'FJComp-BB700-P-A', 'FJComp-BB630-A', 'FJComp-BUV395-A',
       'FJComp-BUV563-A', 'FJComp-BV480-A', 'FJComp-BV421-A',
       'FJComp-BV650-A', 'FJComp-BYG584-A', 'FJComp-PE-CF594-A',
       'FJComp-BUV615-P-A', 'FJComp-BUV805-A', 'FJComp-BYG790-A',
       'FJComp-PE-Cy5.5-A', 'FJComp-BV570-A', 'FJComp-BUV496-A',
       'FJComp-BV605-A', 'FJComp-BB660-P-A', 'FJComp-BV786-A',
       'FJComp-APC-R700-A', 'FJComp-BUV661-A', 'FJComp-BYG670-A',
       'FJComp-BV750-P-A', 'FJComp-FITC-A', 'FJComp-APC-H7-A',
       'FJComp-BB790-P-A'], dtype=object)
ds_unique_channel.shape = (35,)


In [28]:
ds_unique_marker = df_mapping.loc[:, "PxS(marker)"].unique()
print(f'{ds_unique_marker = }')
print(f'{ds_unique_marker.shape = }')

ds_unique_marker = array([nan, 'IFNg', 'FOXP3', 'CD25', 'CD3', 'IL-2', 'GATA3', 'CD45RA',
       'CD19', 'IL-4', 'IL-10', 'RORgt', 'IL-6', 'Tbet', 'CD8', 'CCR7',
       'CD14', 'HLA-DR', 'CD4', '4-IBB', '-', 'CD40L', 'IL-17a', 'CTLA-4',
       'PD-1', 'TNFa', 'livedead'], dtype=object)
ds_unique_marker.shape = (27,)


In [29]:
ds_unique_marker_channel = df_mapping.loc[:, "marker_channel"].unique()
print(f'{ds_unique_marker_channel = }')
print(f'{ds_unique_marker_channel.shape = }')

ds_unique_marker_channel = array(['SSC-W', 'SSC-A', 'SSC-H', 'FSC-W', 'FSC-A', 'FSC-H', 'Time',
       'IFNg_FJComp-BUV737-A', 'FOXP3_FJComp-APC-A',
       'CD25_FJComp-BV711-A', 'CD3_FJComp-BB700-P-A',
       'IL-2_FJComp-BB630-A', 'GATA3_FJComp-BUV395-A',
       'CD45RA_FJComp-BUV563-A', 'CD19_FJComp-BV480-A',
       'IL-4_FJComp-BV421-A', 'IL-10_FJComp-BV650-A',
       'RORgt_FJComp-BYG584-A', 'IL-6_FJComp-PE-CF594-A',
       'Tbet_FJComp-BUV615-P-A', 'CD8_FJComp-BUV805-A',
       'CCR7_FJComp-BYG790-A', 'CD14_FJComp-PE-Cy5.5-A',
       'HLA-DR_FJComp-BV570-A', 'CD4_FJComp-BUV496-A',
       '4-IBB_FJComp-BV605-A', '-_FJComp-BB660-P-A',
       'CD40L_FJComp-BV786-A', 'IL-17a_FJComp-APC-R700-A',
       '-_FJComp-BUV661-A', 'CTLA-4_FJComp-BYG670-A',
       'PD-1_FJComp-BV750-P-A', 'TNFa_FJComp-FITC-A',
       'livedead_FJComp-APC-H7-A', '-_FJComp-BB790-P-A'], dtype=object)
ds_unique_marker_channel.shape = (35,)


In [30]:
df_mapping.loc[:, "PxS(marker)"].unique()

array([nan, 'IFNg', 'FOXP3', 'CD25', 'CD3', 'IL-2', 'GATA3', 'CD45RA',
       'CD19', 'IL-4', 'IL-10', 'RORgt', 'IL-6', 'Tbet', 'CD8', 'CCR7',
       'CD14', 'HLA-DR', 'CD4', '4-IBB', '-', 'CD40L', 'IL-17a', 'CTLA-4',
       'PD-1', 'TNFa', 'livedead'], dtype=object)

In [31]:
ds_unique_channel.shape

(35,)

Note that the strings in `"PxN(channel)"` are all distinct, or, in other words, unique. It seems that
- `"PxS(marker)"` represents some kind of **brand**/**manufacturer** (which, in our case, might be of minor importance)
- `"PxN(channel)"` represents the channel of measurement

**(?)** It seems that
```
marker_channel = PxS(marker) + PxN(channel)
```
as strings. Try to verify this.

In [32]:
df_mapping["PxS(marker)"].fillna("") + "_" + df_mapping["PxN(channel)"]

0                       _SSC-W
1                       _SSC-A
2                       _SSC-H
3                       _FSC-W
4                       _FSC-A
5                       _FSC-H
6                        _Time
7         IFNg_FJComp-BUV737-A
8           FOXP3_FJComp-APC-A
9          CD25_FJComp-BV711-A
10        CD3_FJComp-BB700-P-A
11         IL-2_FJComp-BB630-A
12       GATA3_FJComp-BUV395-A
13      CD45RA_FJComp-BUV563-A
14         CD19_FJComp-BV480-A
15         IL-4_FJComp-BV421-A
16        IL-10_FJComp-BV650-A
17       RORgt_FJComp-BYG584-A
18      IL-6_FJComp-PE-CF594-A
19      Tbet_FJComp-BUV615-P-A
20         CD8_FJComp-BUV805-A
21        CCR7_FJComp-BYG790-A
22      CD14_FJComp-PE-Cy5.5-A
23       HLA-DR_FJComp-BV570-A
24         CD4_FJComp-BUV496-A
25        4-IBB_FJComp-BV605-A
26          -_FJComp-BB660-P-A
27        CD40L_FJComp-BV786-A
28    IL-17a_FJComp-APC-R700-A
29           -_FJComp-BUV661-A
30      CTLA-4_FJComp-BYG670-A
31       PD-1_FJComp-BV750-P-A
32      

In [33]:
df_mapping["marker_channel"].equals(
    df_mapping["PxS(marker)"].fillna("") + "_" + df_mapping["PxN(channel)"]
)

False

My bad: `NaN + SSC-W = SSC-W` is the rule we observe, not `_SSC-W`.

In [34]:
df_mapping.loc[0, "PxS(marker)"] is np.nan

True

In [35]:
def rule(row):
    if row["PxS(marker)"] is np.nan:
        return row["PxN(channel)"]
    else:
        return row["PxS(marker)"] + "_" + row["PxN(channel)"]

In [36]:
df_mapping.apply(rule, axis=1)

0                        SSC-W
1                        SSC-A
2                        SSC-H
3                        FSC-W
4                        FSC-A
5                        FSC-H
6                         Time
7         IFNg_FJComp-BUV737-A
8           FOXP3_FJComp-APC-A
9          CD25_FJComp-BV711-A
10        CD3_FJComp-BB700-P-A
11         IL-2_FJComp-BB630-A
12       GATA3_FJComp-BUV395-A
13      CD45RA_FJComp-BUV563-A
14         CD19_FJComp-BV480-A
15         IL-4_FJComp-BV421-A
16        IL-10_FJComp-BV650-A
17       RORgt_FJComp-BYG584-A
18      IL-6_FJComp-PE-CF594-A
19      Tbet_FJComp-BUV615-P-A
20         CD8_FJComp-BUV805-A
21        CCR7_FJComp-BYG790-A
22      CD14_FJComp-PE-Cy5.5-A
23       HLA-DR_FJComp-BV570-A
24         CD4_FJComp-BUV496-A
25        4-IBB_FJComp-BV605-A
26          -_FJComp-BB660-P-A
27        CD40L_FJComp-BV786-A
28    IL-17a_FJComp-APC-R700-A
29           -_FJComp-BUV661-A
30      CTLA-4_FJComp-BYG670-A
31       PD-1_FJComp-BV750-P-A
32      

In [37]:
df_mapping["marker_channel"].equals(df_mapping.apply(rule, axis=1))

True

In [38]:
df_mapping.dtypes

marker_channel    object
use                int64
PxS(marker)       object
PxN(channel)      object
dtype: object

### `df_label`

In [39]:
df_label.shape

(40, 2)

In [40]:
df_label.head()

Unnamed: 0,file_flow_id,label
0,flowrepo_covid_EU_002_flow_001,Healthy
1,flowrepo_covid_EU_003_flow_001,Healthy
2,flowrepo_covid_EU_004_flow_001,Healthy
3,flowrepo_covid_EU_005_flow_001,Healthy
4,flowrepo_covid_EU_006_flow_001,Healthy


In [41]:
df_label.loc[:, "label"].unique()

array(['Healthy', 'Sick'], dtype=object)

In [42]:
df_label.loc[:, "label"].value_counts()

Sick       28
Healthy    12
Name: label, dtype: int64

The first observation is that
- There might be too few data instances (only 40?)
- Somewhat unbalanced (28 vs 12)

## `.fcs` Files and `FlowCal`

Let's first verify that the subfolder names of `raw_fcs/` are exactly the 40 strings listed in `df_label.loc[:, "file_flow_id"]`.

In [43]:
sub_dirs = sorted(subdir.name for subdir in (data_dir/"raw_fcs").iterdir())
sub_dirs

['flowrepo_covid_EU_002_flow_001',
 'flowrepo_covid_EU_003_flow_001',
 'flowrepo_covid_EU_004_flow_001',
 'flowrepo_covid_EU_005_flow_001',
 'flowrepo_covid_EU_006_flow_001',
 'flowrepo_covid_EU_007_flow_001',
 'flowrepo_covid_EU_008_flow_001',
 'flowrepo_covid_EU_009_flow_001',
 'flowrepo_covid_EU_010_flow_001',
 'flowrepo_covid_EU_011_flow_001',
 'flowrepo_covid_EU_012_flow_001',
 'flowrepo_covid_EU_013_flow_001',
 'flowrepo_covid_EU_014_flow_001',
 'flowrepo_covid_EU_015_flow_001',
 'flowrepo_covid_EU_016_flow_001',
 'flowrepo_covid_EU_017_flow_001',
 'flowrepo_covid_EU_018_flow_001',
 'flowrepo_covid_EU_019_flow_001',
 'flowrepo_covid_EU_020_flow_001',
 'flowrepo_covid_EU_021_flow_001',
 'flowrepo_covid_EU_022_flow_001',
 'flowrepo_covid_EU_023_flow_001',
 'flowrepo_covid_EU_030_flow_001',
 'flowrepo_covid_EU_031_flow_001',
 'flowrepo_covid_EU_032_flow_001',
 'flowrepo_covid_EU_033_flow_001',
 'flowrepo_covid_EU_034_flow_001',
 'flowrepo_covid_EU_035_flow_001',
 'flowrepo_covid_EU_

In [44]:
pd.Series(sub_dirs).equals(df_label.loc[:, "file_flow_id"].sort_values())

True

In [45]:
list((data_dir/"raw_fcs").glob("**/*.fcs"))

[PosixPath('/content/drive/MyDrive/ahead/raw_fcs/flowrepo_covid_EU_007_flow_001/export_COVID19 samples 23_04_20_ST3_COVID19_ICU_002_A ST3 230420_052_Live_cells.fcs'),
 PosixPath('/content/drive/MyDrive/ahead/raw_fcs/flowrepo_covid_EU_002_flow_001/export_COVID19 samples 23_04_20_ST3_COVID19_HC_005 ST3 230420_016_Live_cells.fcs'),
 PosixPath('/content/drive/MyDrive/ahead/raw_fcs/flowrepo_covid_EU_004_flow_001/export_COVID19 samples 23_04_20_ST3_COVID19_HC_007 ST3 230420_014_Live_cells.fcs'),
 PosixPath('/content/drive/MyDrive/ahead/raw_fcs/flowrepo_covid_EU_005_flow_001/export_COVID19 samples 23_04_20_ST3_COVID19_HC_008 ST3 230420_013_Live_cells.fcs'),
 PosixPath('/content/drive/MyDrive/ahead/raw_fcs/flowrepo_covid_EU_006_flow_001/export_COVID19 samples 23_04_20_ST3_COVID19_HC_009 ST3 230420_012_Live_cells.fcs'),
 PosixPath('/content/drive/MyDrive/ahead/raw_fcs/flowrepo_covid_EU_003_flow_001/export_COVID19 samples 23_04_20_ST3_COVID19_HC_006 ST3 230420_015_Live_cells.fcs'),
 PosixPath('/

In [57]:
import FlowCal
import pandas as pd
import numpy as np

In [58]:
#pd.DataFrame?

In [77]:
array_events = []
array_channels = []
array_time_steps = []
array_ids = []
for i, path_fcs in enumerate((data_dir/"raw_fcs").glob("**/*.fcs")):
    id = path_fcs.parent.name
    array_ids.append(id)
    s = FlowCal.io.FCSData(str(path_fcs))
    n_events, n_channels = s.shape
    array_events.append(n_events)
    array_channels.append(n_channels)
    array_time_steps.append(s.time_step)

In [78]:
df_raw_fcs = pd.DataFrame.from_dict({
    "file_flow_id": array_ids,
    "n_events": array_events,
    "n_channels": array_channels,
    "time_step": array_time_steps,
})
df_raw_fcs.head()

Unnamed: 0,file_flow_id,n_events,n_channels,time_step
0,flowrepo_covid_EU_007_flow_001,1860,35,0.01
1,flowrepo_covid_EU_002_flow_001,363314,35,0.01
2,flowrepo_covid_EU_004_flow_001,183001,35,0.01
3,flowrepo_covid_EU_005_flow_001,298047,35,0.01
4,flowrepo_covid_EU_006_flow_001,248917,35,0.01


In [79]:
df_raw_fcs.dtypes

file_flow_id     object
n_events          int64
n_channels        int64
time_step       float64
dtype: object

In [80]:
df_raw_fcs.describe()

Unnamed: 0,n_events,n_channels,time_step
count,40.0,40.0,40.0
mean,126318.0,35.0,0.01
std,115032.235142,0.0,0.0
min,680.0,35.0,0.01
25%,17350.0,35.0,0.01
50%,90805.0,35.0,0.01
75%,203955.75,35.0,0.01
max,363314.0,35.0,0.01


In [91]:
df_raw_fcs.loc[:, "n_channels"].unique()

array([35])

We first notice that
> All FCSData above have the channel dimension `35`,
> identical to the number of indexes/rows of `df_mapping` above.

In [52]:
channels = df_mapping.loc[:, "PxN(channel)"].sort_values().to_list()

If any of the FCS file contains any channel whose name is not listed in `df_mapping`, then the next cell will print it out.

In [53]:
for path_fcs in (data_dir/"raw_fcs").glob("**/*.fcs"):
    s = FlowCal.io.FCSData(str(path_fcs))
    already_print_path = False
    prints_sth = False
    for c in s.channels:
        if c not in channels:
            if not already_print_path:
                print(path_fcs.name)
                already_print_path = True
            print(f'"{c}" not in channels')
            prints_sth = True
    if prints_sth:
        print()

Since we've gone this far to extract information from all the FCS files, let's summarize all useful information in one single `DataFrame` and eventually save the result to disk for future usage.

In [98]:
# Remove unneeded columns
df_raw_fcs.drop(columns=["time_step", "n_channels"], inplace=True)

In [100]:
# df_summary = df_raw_fcs.join(
#     df_label,
#     on="file_flow_id",
#     #validate="1:1",
# )

In [99]:
# df_raw_fcs.loc[:, "file_flow_id"].dtype, df_label.loc[:, "file_flow_id"].dtype

(dtype('O'), dtype('O'))

In [101]:
df_summary = pd.merge(
    left=df_raw_fcs,
    right=df_label,
    left_on="file_flow_id",
    right_on="file_flow_id",
)
df_summary

Unnamed: 0,file_flow_id,n_events,label
0,flowrepo_covid_EU_007_flow_001,1860,Healthy
1,flowrepo_covid_EU_002_flow_001,363314,Healthy
2,flowrepo_covid_EU_004_flow_001,183001,Healthy
3,flowrepo_covid_EU_005_flow_001,298047,Healthy
4,flowrepo_covid_EU_006_flow_001,248917,Healthy
5,flowrepo_covid_EU_003_flow_001,311492,Healthy
6,flowrepo_covid_EU_008_flow_001,10959,Healthy
7,flowrepo_covid_EU_011_flow_001,29363,Healthy
8,flowrepo_covid_EU_012_flow_001,18618,Healthy
9,flowrepo_covid_EU_010_flow_001,17412,Healthy


In [103]:
df_summary["wuhan"] = df_summary.loc[:, "label"].map(
    lambda s: True if s == "Sick" else False
)
df_summary

Unnamed: 0,file_flow_id,n_events,label,wuhan
0,flowrepo_covid_EU_007_flow_001,1860,Healthy,False
1,flowrepo_covid_EU_002_flow_001,363314,Healthy,False
2,flowrepo_covid_EU_004_flow_001,183001,Healthy,False
3,flowrepo_covid_EU_005_flow_001,298047,Healthy,False
4,flowrepo_covid_EU_006_flow_001,248917,Healthy,False
5,flowrepo_covid_EU_003_flow_001,311492,Healthy,False
6,flowrepo_covid_EU_008_flow_001,10959,Healthy,False
7,flowrepo_covid_EU_011_flow_001,29363,Healthy,False
8,flowrepo_covid_EU_012_flow_001,18618,Healthy,False
9,flowrepo_covid_EU_010_flow_001,17412,Healthy,False


In [114]:
if "label" in df_summary.columns:
    df_summary.drop(columns=["label"], inplace=True)

In [107]:
df_summary.to_csv("summary.csv", index=False)

In [118]:
positives = df_summary.loc[df_summary["wuhan"]==True, :]
positives.sort_values(by="n_events")

Unnamed: 0,file_flow_id,n_events,wuhan
33,flowrepo_covid_EU_041_flow_001,680,True
13,flowrepo_covid_EU_016_flow_001,10127,True
38,flowrepo_covid_EU_044_flow_001,13728,True
26,flowrepo_covid_EU_032_flow_001,13767,True
34,flowrepo_covid_EU_042_flow_001,15778,True
15,flowrepo_covid_EU_014_flow_001,15792,True
29,flowrepo_covid_EU_036_flow_001,17164,True
27,flowrepo_covid_EU_033_flow_001,22520,True
25,flowrepo_covid_EU_030_flow_001,27390,True
14,flowrepo_covid_EU_019_flow_001,36744,True


In [117]:
negatives = df_summary.loc[df_summary["wuhan"]==False, :]
negatives.sort_values(by="n_events")

Unnamed: 0,file_flow_id,n_events,wuhan
0,flowrepo_covid_EU_007_flow_001,1860,False
11,flowrepo_covid_EU_009_flow_001,2636,False
6,flowrepo_covid_EU_008_flow_001,10959,False
9,flowrepo_covid_EU_010_flow_001,17412,False
8,flowrepo_covid_EU_012_flow_001,18618,False
7,flowrepo_covid_EU_011_flow_001,29363,False
10,flowrepo_covid_EU_013_flow_001,170075,False
2,flowrepo_covid_EU_004_flow_001,183001,False
4,flowrepo_covid_EU_006_flow_001,248917,False
3,flowrepo_covid_EU_005_flow_001,298047,False


In [112]:
k = 2
positives.sort_values(by=["n_events"])[len(positives)//2: len(positives)//2 + k]

Unnamed: 0,file_flow_id,n_events,wuhan
23,flowrepo_covid_EU_034_flow_001,98608,True
36,flowrepo_covid_EU_048_flow_001,123154,True


In [113]:
negatives.sort_values(by=["n_events"])[len(negatives)//2: len(negatives)//2 + k]

Unnamed: 0,file_flow_id,n_events,wuhan
10,flowrepo_covid_EU_013_flow_001,170075,False
2,flowrepo_covid_EU_004_flow_001,183001,False


~We almost forgot a few other things worth save to disk as well~
- ~`(# allowed channels) = 31`~
- ~These 31 allowed channels' names~
- ~The uniform time step `0.1`~

~which we could have saved into, say, a JSON file.~
> May or may not worthy of saving.

## Conclusion
Combining with the `"ML Interview Questions.docx"`, to my understanding, here is what we are asked to do
1. Each FCSData file contains the same `35` channels.  
   However, we shall only make use of `31` of them, i.e. those channels that are marked with `use=1` in `EU_marker_channel_mapping.xlsx`
    - The columns of `EU_marker_channel_mapping.xlsx` other than `"use"` and `"PxN(channles)"` are thus useless to us
1. As for `EU_label.xlsx`, it simply contains the classification labels for the `.fcs` files