# Cytof data processing

## Requirements

For compatibility, the `numpy` version needs to be fixed:
```bash
pip3 install --upgrade pandas numpy==1.26.4 scprep
```

## Variables definition

In [7]:
# Specify the folder containing data to analyse
folder_path = "./test_cytof_data"

# Specify the metadata columns
metadata_columns = ['Cell_Index', 'Condition', 'Control', 'Replicate']

# Specify other columns to exclude from processing
excluded_columns = []

# Compute the non data columns in a new variable for easier later use
non_data_columns = excluded_columns + metadata_columns

## Import common packages

In [8]:
import os
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', 60)
pd.set_option('display.max_rows', 1000)

## Concatenate all files

### Configuration

In [9]:
get_condition_replicate_from_filename = True

### Concatenate

In [10]:
all_events = pd.DataFrame()

# Loop over all files in folder
for filename in os.listdir(folder_path):
    # Only consider files with '.txt' extension
    if filename.endswith('.txt'):
        # Build the full path to file
        file_path = os.path.join(folder_path, filename)
        # Load the file
        events = pd.read_csv(file_path, delimiter='\t')

        if get_condition_replicate_from_filename:
            # Retrieve metadata from the filename (ex: WGANormalised_Pro_PDO21 + CAFs_01.fcs_file_internal...)
            # First split: ['WGANormalised_Pro_PDO21 + CAFs_01', '_file_internal']
            # Second split over first element: ['WGANormalised', 'Pro', 'PDO21', '01']
            metadata_from_filename = filename.split('.fcs')[0].split('_')
            # Store the condition name in the dataframe: second-to-last element
            events['Condition'] = metadata_from_filename[-2]
            # Store the control name in the dataframe: second-to-last element split over '+', and stripped to remove whitespace from both sides
            events['Control'] = metadata_from_filename[-2].split('+')[0].strip()
            # Store the replicate in the dataframe: last element
            events['Replicate'] = metadata_from_filename[-1]

        # Add the file data to the DataFrame containing all events
        all_events = pd.concat([all_events, events], ignore_index=True)

# Print all events
all_events

Unnamed: 0,Cell_Index,89Y_pHH3_S28_v7,96Ru_96Ru,98Ru_98Ru,99Ru_99Ru,100Ru_100Ru,101Ru_101Ru,102Ru_102Ru,104Ru_104Ru,111Cd_Vimentin RV202 (v67,112Cd_FAP (1) v2,113In_CD326 (EpCAM) (hu) (v6),114Cd_CK18 (v6),115In_Pan-CK_v9,116Cd_GFP_v4,127I_IdU,142Nd_cCaspase 3_D175_v6,143Nd_RRM2,144Nd_SOX2 v2,145Nd_pNDRG1 T346 v4,146Nd_L1CAM,147Sm_OPTN,148Nd_CDK1 (1),149Sm_p4E-BP1_T37,150Nd_pRB_S807_S811_v10,151Eu_sqstm1,153Eu_ANXA1,155Gd_pAKT [S473] v12,156Gd_pNF-kB p65 v8,157Gd_MOPC21,158Gd_pP38 MAPK v7,160Gd_KI67(3),161Dy_pLATS1,163Dy_H3K9Me3,164Dy_TOP2A (3),165Ho_AlexaFluor488,167Er_TROP 2(1),168Er_pSMAD2,169Tm_EphB2,170Er_CHGA v3,171Yb_CD55 v4,172Yb_BIRC3,173Yb_pS6,174Yb_cPARP [D214] (2) (v6),176Yb_CyclinB1 (2) (v7),191Ir_DNA 1,193Ir_DNA 2,209Bi_Me2HH3[K4],Condition,Control,Replicate
0,1,43.585557,1213.959853,3558.386958,0.000000,1458.946308,421.249051,707.013792,478.102051,112.645929,603.746433,50897.801994,1594.755375,53993.672474,10.087679,28051.093144,1690.470631,394.401844,1997.347396,31736.874224,365.299426,630.263993,6095.601789,3.853920e+04,2672.376730,46715.681153,372.530023,1727.848742,4939.136139,251.864645,15661.175919,979.785837,13075.133833,1090.477880,2.310142e+03,0.004876,390.414788,724.547763,11229.491117,438.423305,838.639540,102.624017,2751.651199,1059.449506,2758.233772,1.783434e+05,3.267123e+05,1.320918e+04,PDO21 + hIL8 12.5ng,PDO21,02
1,2,0.000000,0.000000,0.000000,0.000000,0.000000,1106.766267,483.146464,0.000000,0.000000,6212.441179,112524.540240,3376.703678,63105.115731,2026.345475,78076.598058,5378.106523,3483.206300,14774.307667,16404.612777,0.000000,6325.650928,15999.116990,9.359513e+04,19611.374816,114839.243925,7337.893752,3590.558857,51435.443529,1505.413488,38390.718929,5494.854223,51952.686435,6593.492500,9.707502e+03,0.000554,3019.134512,583.799483,57342.426824,0.000000,9371.121594,253.987820,12415.285065,1217.250647,3465.030644,1.138082e+06,2.033536e+06,1.273348e+05,PDO21 + hIL8 12.5ng,PDO21,02
2,3,0.000000,0.000000,0.000000,1731.737952,3995.250069,562.508254,0.000000,181.267014,0.000000,211.807458,86705.838045,2220.989030,39521.113254,449.903746,5294.075421,7258.527181,2360.692782,2230.335842,4746.109479,3430.414044,2534.428668,23916.033206,5.296711e+04,4278.013643,25919.317740,218.391683,1234.880145,13760.759509,1343.636866,15913.337755,2911.830422,26219.449966,6317.144171,3.358493e+05,0.001748,328.106387,39.487448,22343.036966,1658.007231,2055.223981,0.000000,2376.693641,3997.301349,1056.888078,4.014445e+05,7.436298e+05,5.654798e+04,PDO21 + hIL8 12.5ng,PDO21,02
3,4,0.000000,2672.189928,0.000000,94.561986,0.000000,0.000000,2129.264680,0.000000,0.000000,0.000000,49121.290729,443.273420,66948.519779,434.657481,0.000000,0.000000,5211.869347,993.812809,1650.902655,0.000000,3786.186858,8147.540472,6.239263e+04,17666.475488,57566.085523,1134.561648,5410.785732,14088.210476,602.732347,155766.258197,3576.587010,23858.022486,3329.592530,1.212069e+04,0.000901,4550.623410,8163.213997,1948.253067,0.000000,3575.725906,0.000000,1061.280373,0.000000,3629.147982,7.199613e+05,1.275512e+06,3.018831e+05,PDO21 + hIL8 12.5ng,PDO21,02
4,5,0.000000,8911.913747,0.000000,22469.635814,1624.563602,7755.338861,0.000000,8886.237503,0.000000,0.000000,790518.367838,26739.996798,392637.619681,0.000000,170897.126636,7479.105856,25645.619233,52787.568714,53050.173847,23978.255141,5048.944674,113671.838662,1.089417e+06,217428.443030,501488.031274,0.000000,6905.471330,137955.596855,0.000000,269985.782167,129138.747335,536115.645320,58814.594023,7.214307e+06,0.000078,10430.904898,58470.340866,44148.812264,6660.390475,5337.305140,0.000000,36145.172415,40954.813217,65518.278247,7.275846e+06,1.301274e+07,1.797610e+06,PDO21 + hIL8 12.5ng,PDO21,02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331313,3477,362.390948,992.681035,2068.121114,260.349544,316.508499,0.000000,298.312521,0.000000,112.635680,392.738686,50437.660151,4469.028381,92466.053505,117.430747,6044.766148,1035.111273,972.697009,993.158569,1250.776576,943.535093,2870.155451,2049.797486,1.320265e+04,2251.043351,38214.616564,371.768906,937.367092,6934.300288,888.597777,17701.803073,847.845783,15227.675305,3686.257977,6.900475e+02,0.004716,838.881231,938.125302,6682.272450,1052.873953,804.977090,489.496254,4155.716466,1741.168901,5001.590085,1.799795e+05,3.174157e+05,1.902812e+04,PDO21 + hWnt3a 50ng,PDO21,01
331314,3478,372.663608,724.611709,0.000000,0.000000,35.160427,0.000000,829.251021,608.686856,0.000000,0.000000,88656.799304,0.000000,13176.904401,1042.452642,577.555315,1010.994956,853.626242,658.551980,4850.466761,662.469186,0.000000,18824.572962,5.653982e+04,5864.233023,21422.392967,531.357432,2174.363275,6732.846478,879.626762,33613.033575,3089.377985,7933.601307,2556.600206,2.081537e+05,0.004529,597.363041,348.786817,9553.137868,479.746291,2543.308891,360.295789,4413.866547,925.472703,1162.140184,3.706437e+05,6.465320e+05,1.202575e+04,PDO21 + hWnt3a 50ng,PDO21,01
331315,3479,12.442780,0.000000,0.000000,0.000000,77.353153,133.785501,181.986353,254.284758,19.052205,14.854769,3424.966362,85.885916,1172.260957,245.247535,0.000000,138.028785,27.924660,300.020467,648.970828,0.000000,191.587576,231.235996,7.506321e+02,476.929853,2679.400691,124.067507,209.814890,952.209344,173.123581,1926.201105,183.520701,585.860364,271.621016,1.082461e+03,0.025516,118.110472,179.070468,702.866085,818.625625,79.789012,61.177457,159.688245,73.685651,63.775938,2.602718e+04,4.795381e+04,1.629098e+03,PDO21 + hWnt3a 50ng,PDO21,01
331316,3480,59.816495,559.819517,899.679879,448.216709,0.000000,2819.140971,2075.140984,1882.903506,4726.652404,2184.649715,11797.323451,893.115372,111251.297405,1149.331822,7077.478208,33031.760020,0.000000,3585.608417,189.923909,506.207177,630.546302,0.000000,3.442723e+02,1805.169366,29501.166536,673.086272,1867.260163,5871.036522,2330.769157,4417.229336,705.743482,0.000000,648.056076,3.099320e+03,0.001744,575.907007,995.043718,2106.405927,0.000000,0.000000,203.907664,456.791537,4348.993633,2457.059358,7.204364e+05,1.381995e+06,7.844661e+02,PDO21 + hWnt3a 50ng,PDO21,01


## EMD Generation

### Prepare the data

In [11]:
#drop the metadata to create a df with only numerical data for normalisation/transformation
data = all_events.drop(non_data_columns ,axis=1)
data

Unnamed: 0,89Y_pHH3_S28_v7,96Ru_96Ru,98Ru_98Ru,99Ru_99Ru,100Ru_100Ru,101Ru_101Ru,102Ru_102Ru,104Ru_104Ru,111Cd_Vimentin RV202 (v67,112Cd_FAP (1) v2,113In_CD326 (EpCAM) (hu) (v6),114Cd_CK18 (v6),115In_Pan-CK_v9,116Cd_GFP_v4,127I_IdU,142Nd_cCaspase 3_D175_v6,143Nd_RRM2,144Nd_SOX2 v2,145Nd_pNDRG1 T346 v4,146Nd_L1CAM,147Sm_OPTN,148Nd_CDK1 (1),149Sm_p4E-BP1_T37,150Nd_pRB_S807_S811_v10,151Eu_sqstm1,153Eu_ANXA1,155Gd_pAKT [S473] v12,156Gd_pNF-kB p65 v8,157Gd_MOPC21,158Gd_pP38 MAPK v7,160Gd_KI67(3),161Dy_pLATS1,163Dy_H3K9Me3,164Dy_TOP2A (3),165Ho_AlexaFluor488,167Er_TROP 2(1),168Er_pSMAD2,169Tm_EphB2,170Er_CHGA v3,171Yb_CD55 v4,172Yb_BIRC3,173Yb_pS6,174Yb_cPARP [D214] (2) (v6),176Yb_CyclinB1 (2) (v7),191Ir_DNA 1,193Ir_DNA 2,209Bi_Me2HH3[K4]
0,43.585557,1213.959853,3558.386958,0.000000,1458.946308,421.249051,707.013792,478.102051,112.645929,603.746433,50897.801994,1594.755375,53993.672474,10.087679,28051.093144,1690.470631,394.401844,1997.347396,31736.874224,365.299426,630.263993,6095.601789,3.853920e+04,2672.376730,46715.681153,372.530023,1727.848742,4939.136139,251.864645,15661.175919,979.785837,13075.133833,1090.477880,2.310142e+03,0.004876,390.414788,724.547763,11229.491117,438.423305,838.639540,102.624017,2751.651199,1059.449506,2758.233772,1.783434e+05,3.267123e+05,1.320918e+04
1,0.000000,0.000000,0.000000,0.000000,0.000000,1106.766267,483.146464,0.000000,0.000000,6212.441179,112524.540240,3376.703678,63105.115731,2026.345475,78076.598058,5378.106523,3483.206300,14774.307667,16404.612777,0.000000,6325.650928,15999.116990,9.359513e+04,19611.374816,114839.243925,7337.893752,3590.558857,51435.443529,1505.413488,38390.718929,5494.854223,51952.686435,6593.492500,9.707502e+03,0.000554,3019.134512,583.799483,57342.426824,0.000000,9371.121594,253.987820,12415.285065,1217.250647,3465.030644,1.138082e+06,2.033536e+06,1.273348e+05
2,0.000000,0.000000,0.000000,1731.737952,3995.250069,562.508254,0.000000,181.267014,0.000000,211.807458,86705.838045,2220.989030,39521.113254,449.903746,5294.075421,7258.527181,2360.692782,2230.335842,4746.109479,3430.414044,2534.428668,23916.033206,5.296711e+04,4278.013643,25919.317740,218.391683,1234.880145,13760.759509,1343.636866,15913.337755,2911.830422,26219.449966,6317.144171,3.358493e+05,0.001748,328.106387,39.487448,22343.036966,1658.007231,2055.223981,0.000000,2376.693641,3997.301349,1056.888078,4.014445e+05,7.436298e+05,5.654798e+04
3,0.000000,2672.189928,0.000000,94.561986,0.000000,0.000000,2129.264680,0.000000,0.000000,0.000000,49121.290729,443.273420,66948.519779,434.657481,0.000000,0.000000,5211.869347,993.812809,1650.902655,0.000000,3786.186858,8147.540472,6.239263e+04,17666.475488,57566.085523,1134.561648,5410.785732,14088.210476,602.732347,155766.258197,3576.587010,23858.022486,3329.592530,1.212069e+04,0.000901,4550.623410,8163.213997,1948.253067,0.000000,3575.725906,0.000000,1061.280373,0.000000,3629.147982,7.199613e+05,1.275512e+06,3.018831e+05
4,0.000000,8911.913747,0.000000,22469.635814,1624.563602,7755.338861,0.000000,8886.237503,0.000000,0.000000,790518.367838,26739.996798,392637.619681,0.000000,170897.126636,7479.105856,25645.619233,52787.568714,53050.173847,23978.255141,5048.944674,113671.838662,1.089417e+06,217428.443030,501488.031274,0.000000,6905.471330,137955.596855,0.000000,269985.782167,129138.747335,536115.645320,58814.594023,7.214307e+06,0.000078,10430.904898,58470.340866,44148.812264,6660.390475,5337.305140,0.000000,36145.172415,40954.813217,65518.278247,7.275846e+06,1.301274e+07,1.797610e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331313,362.390948,992.681035,2068.121114,260.349544,316.508499,0.000000,298.312521,0.000000,112.635680,392.738686,50437.660151,4469.028381,92466.053505,117.430747,6044.766148,1035.111273,972.697009,993.158569,1250.776576,943.535093,2870.155451,2049.797486,1.320265e+04,2251.043351,38214.616564,371.768906,937.367092,6934.300288,888.597777,17701.803073,847.845783,15227.675305,3686.257977,6.900475e+02,0.004716,838.881231,938.125302,6682.272450,1052.873953,804.977090,489.496254,4155.716466,1741.168901,5001.590085,1.799795e+05,3.174157e+05,1.902812e+04
331314,372.663608,724.611709,0.000000,0.000000,35.160427,0.000000,829.251021,608.686856,0.000000,0.000000,88656.799304,0.000000,13176.904401,1042.452642,577.555315,1010.994956,853.626242,658.551980,4850.466761,662.469186,0.000000,18824.572962,5.653982e+04,5864.233023,21422.392967,531.357432,2174.363275,6732.846478,879.626762,33613.033575,3089.377985,7933.601307,2556.600206,2.081537e+05,0.004529,597.363041,348.786817,9553.137868,479.746291,2543.308891,360.295789,4413.866547,925.472703,1162.140184,3.706437e+05,6.465320e+05,1.202575e+04
331315,12.442780,0.000000,0.000000,0.000000,77.353153,133.785501,181.986353,254.284758,19.052205,14.854769,3424.966362,85.885916,1172.260957,245.247535,0.000000,138.028785,27.924660,300.020467,648.970828,0.000000,191.587576,231.235996,7.506321e+02,476.929853,2679.400691,124.067507,209.814890,952.209344,173.123581,1926.201105,183.520701,585.860364,271.621016,1.082461e+03,0.025516,118.110472,179.070468,702.866085,818.625625,79.789012,61.177457,159.688245,73.685651,63.775938,2.602718e+04,4.795381e+04,1.629098e+03
331316,59.816495,559.819517,899.679879,448.216709,0.000000,2819.140971,2075.140984,1882.903506,4726.652404,2184.649715,11797.323451,893.115372,111251.297405,1149.331822,7077.478208,33031.760020,0.000000,3585.608417,189.923909,506.207177,630.546302,0.000000,3.442723e+02,1805.169366,29501.166536,673.086272,1867.260163,5871.036522,2330.769157,4417.229336,705.743482,0.000000,648.056076,3.099320e+03,0.001744,575.907007,995.043718,2106.405927,0.000000,0.000000,203.907664,456.791537,4348.993633,2457.059358,7.204364e+05,1.381995e+06,7.844661e+02


In [None]:
#make sure all metadata columns are strings (not numberical as this will run into errors)
metadata = all_events.filter(metadata_columns)
metadata[metadata_columns] = metadata[metadata_columns].applymap(str)
metadata  

Unnamed: 0,Cell_Index,Condition,Control,Replicate
0,1,PDO21 + hIL8 12.5ng,PDO21,02
1,2,PDO21 + hIL8 12.5ng,PDO21,02
2,3,PDO21 + hIL8 12.5ng,PDO21,02
3,4,PDO21 + hIL8 12.5ng,PDO21,02
4,5,PDO21 + hIL8 12.5ng,PDO21,02
...,...,...,...,...
331313,3477,PDO21 + hWnt3a 50ng,PDO21,01
331314,3478,PDO21 + hWnt3a 50ng,PDO21,01
331315,3479,PDO21 + hWnt3a 50ng,PDO21,01
331316,3480,PDO21 + hWnt3a 50ng,PDO21,01


### Select a subset of data (optional)

In [None]:
#Batches:
#Batch 1 = PDO27wt/ko exp B BM/MOPC21/B7C18
#Batch 2 = PDO27 ABCEDF7 Tr
#Batch 3 = PDO27 ABCDEF7 NT
#Batch 4 = PDO21/23/216 ABE7 Tr
#Batch 5 = PDO21/23/216 ABE7 NT 
#Batch 6 = PDO5/11 ABE7 Tr/NT
#Batch 7 = PDO75/99 ABE7 Tr/NT
#Batch 8 = PDO109/141 ABE7 Tr/NT
#Batch 9 = NT/eGFP/eGFP-stIL15 ABE7

#### Configuration

In [None]:
# To enable this process, set this variable to True, False otherwise
should_select_a_subset = False

# Define here the filter to apply
subset_condition = \
    metadata['Patient'].isin(['X','5','11','21','23','27','75','99','109','141','216']) & \
    metadata['gd_donor'].isin(['A','B','E','7']) & \
    metadata['Transduction'].isin(['eGFP-stIL15']) & \
    metadata['Treatment'].isin(['BM','B7C18']) & \
    metadata['Batch'].isin(['Batch2','Batch4','Batch6','Batch7','Batch8'])

#### Select the data

In [None]:
if should_select_a_subset:
    #Select eGFP-stIL15 / ABE7 / wt PDO / BM / B7C18 (I was just selecting the data I wanted to use)
    data = data.loc[subset_condition]
    data

#### Select the metadata

In [None]:
if should_select_a_subset:
    #selecting the corresponding metadata
    metadata = metadata.loc[subset_condition]
    metadata

### Arcsinh transformation

#### Configuration

In [20]:
arcsinh_cofactor = 5

#### Data processing

In [21]:
#arcsinh transformation of all raw data
data = np.arcsinh(data/arcsinh_cofactor)
data

Unnamed: 0,89Y_pHH3_S28_v7,96Ru_96Ru,98Ru_98Ru,99Ru_99Ru,100Ru_100Ru,101Ru_101Ru,102Ru_102Ru,104Ru_104Ru,111Cd_Vimentin RV202 (v67,112Cd_FAP (1) v2,113In_CD326 (EpCAM) (hu) (v6),114Cd_CK18 (v6),115In_Pan-CK_v9,116Cd_GFP_v4,127I_IdU,142Nd_cCaspase 3_D175_v6,143Nd_RRM2,144Nd_SOX2 v2,145Nd_pNDRG1 T346 v4,146Nd_L1CAM,147Sm_OPTN,148Nd_CDK1 (1),149Sm_p4E-BP1_T37,150Nd_pRB_S807_S811_v10,151Eu_sqstm1,153Eu_ANXA1,155Gd_pAKT [S473] v12,156Gd_pNF-kB p65 v8,157Gd_MOPC21,158Gd_pP38 MAPK v7,160Gd_KI67(3),161Dy_pLATS1,163Dy_H3K9Me3,164Dy_TOP2A (3),165Ho_AlexaFluor488,167Er_TROP 2(1),168Er_pSMAD2,169Tm_EphB2,170Er_CHGA v3,171Yb_CD55 v4,172Yb_BIRC3,173Yb_pS6,174Yb_cPARP [D214] (2) (v6),176Yb_CyclinB1 (2) (v7),191Ir_DNA 1,193Ir_DNA 2,209Bi_Me2HH3[K4]
0,2.861709,6.185356,7.260772,0.000000,6.369182,5.126969,5.644772,5.253561,3.808451,5.486881,9.921284,6.458187,9.980331,1.451450,9.325492,6.516474,5.061120,6.683286,9.448944,4.984473,5.529864,7.799032,9.643140,6.974434,9.835544,5.004072,6.538344,7.588655,4.612700,8.742649,5.971050,8.562177,6.078086,6.828775,0.000975,5.050960,5.669269,8.410008,5.166927,5.815499,3.715374,7.003667,6.049220,7.006056,11.175175,11.780544,8.572377
1,0.000000,0.000000,0.000000,0.000000,0.000000,6.092912,5.264056,0.000000,0.000000,7.818019,10.714636,7.208365,10.136266,6.697700,10.349155,7.673801,7.239418,8.684354,8.789027,0.000000,7.836078,8.763998,10.530443,8.967574,10.734998,7.984517,7.269773,9.931792,6.400535,9.639280,7.695277,9.941798,7.877548,8.264364,0.000111,7.096435,5.453285,10.040505,0.000000,8.229097,4.621092,8.510393,6.188064,7.234187,13.028564,13.608996,10.838285
2,0.000000,0.000000,0.000000,6.540592,7.376571,5.416135,0.000000,4.283871,0.000000,4.439526,10.453986,6.789418,9.668300,5.192774,7.658053,7.973642,6.850421,6.793618,7.548790,7.224146,6.921434,9.166014,9.961136,7.444954,9.246453,4.470130,6.202443,8.613286,6.286848,8.758622,7.060247,9.257966,7.834732,11.808127,0.000350,4.877105,2.763677,9.097979,6.497083,6.711851,0.000000,6.857176,7.377084,6.046799,11.986534,12.603008,10.026554
3,0.000000,6.974364,0.000000,3.633663,0.000000,0.000000,6.747243,0.000000,0.000000,0.000000,9.885757,5.177928,10.195389,5.158301,0.000000,0.000000,7.642403,5.985264,6.492789,0.000000,7.322824,8.089181,10.124912,8.863133,10.044398,6.117716,7.679859,8.636803,5.485200,11.039821,7.265874,9.163585,7.194315,8.486379,0.000180,7.506729,8.091103,6.658399,0.000000,7.265633,0.000000,6.050946,0.000000,7.280463,12.570662,13.142568,11.701504
4,0.000000,8.178854,0.000000,9.103629,6.476706,8.039846,0.000000,8.175968,0.000000,0.000000,12.664153,9.277625,11.964352,0.000000,11.132526,8.003578,9.235837,9.957740,9.962703,9.168612,7.610644,10.724780,12.984863,11.373334,12.209044,0.000000,7.923779,10.918396,0.000000,11.589834,10.852352,12.275814,10.065855,14.875286,0.000016,8.336238,10.059984,9.779031,7.887643,7.666186,0.000000,9.579008,9.703934,10.173794,14.883780,15.465149,13.485678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331313,4.976480,5.984125,6.718107,4.645827,4.841122,0.000000,4.781921,0.000000,3.808360,5.056894,9.912203,7.488636,10.518306,3.850011,7.790658,6.025979,5.963789,5.984606,6.215233,5.933350,7.045831,6.709207,8.571882,6.802860,9.634683,5.002027,5.926791,7.927945,5.873362,8.865131,5.826417,8.714579,7.296077,5.620483,0.000943,5.815787,5.927600,7.890923,6.042994,5.774533,5.277112,7.415950,6.546023,7.601221,11.184307,11.751677,8.937382
331314,5.004430,5.669357,0.000000,0.000000,2.648648,0.000000,5.804241,5.495030,0.000000,0.000000,10.476237,0.000000,8.569930,6.033047,5.442532,6.002406,5.833211,5.573767,7.570540,5.579698,0.000000,8.926628,10.026410,7.760336,9.055901,5.359166,6.768202,7.898463,5.863215,9.506378,7.119435,8.062572,6.930144,11.329741,0.000906,5.476252,4.938222,8.248334,5.256994,6.924931,4.970683,7.476216,5.914021,6.141732,11.906706,12.463087,8.478515
331315,1.642973,0.000000,0.000000,0.000000,3.433134,3.980296,4.287830,4.622261,2.047682,1.809221,7.222557,3.537575,6.150403,4.586081,0.000000,4.011499,2.421140,4.787629,5.559112,0.000000,4.339225,4.527265,5.704636,5.251106,6.977059,3.904941,4.430077,5.942501,4.237923,6.647016,4.296222,5.456809,4.688202,6.070707,0.005103,3.855777,4.271684,5.638888,5.791345,3.464075,3.199154,4.157178,3.384666,3.240618,9.250606,9.861703,6.479493
331316,3.176733,5.411344,5.885756,5.189017,0.000000,7.027898,6.721495,6.624281,7.544682,6.772921,8.459337,5.878433,10.703256,6.130650,7.948382,9.488934,0.000000,7.268393,4.330506,5.310680,5.530312,0.000000,4.925195,6.582121,9.375894,5.595597,6.615939,7.761496,6.837664,7.476978,5.642974,0.000000,5.557701,7.122648,0.000349,5.439674,5.986502,6.736449,0.000000,0.000000,4.401527,5.207966,7.461409,6.890431,12.571322,13.222748,5.748723


### Batch effect correction

In [22]:
import scprep

# Data centering by batch to correct any cytof batch effect
# Only if 'Batch' is a metadata
if 'Batch' in metadata.columns:
    data = scprep.normalize.batch_mean_center(data,sample_idx=metadata['Batch'])
    data

### Re-assemble processed data with metadata

#### Concatenate data with metadata

In [23]:
# Combine arcsinh-transformed and mean-centered data with metadata again
processed_data = pd.concat([data, metadata], axis=1)
processed_data

Unnamed: 0,89Y_pHH3_S28_v7,96Ru_96Ru,98Ru_98Ru,99Ru_99Ru,100Ru_100Ru,101Ru_101Ru,102Ru_102Ru,104Ru_104Ru,111Cd_Vimentin RV202 (v67,112Cd_FAP (1) v2,113In_CD326 (EpCAM) (hu) (v6),114Cd_CK18 (v6),115In_Pan-CK_v9,116Cd_GFP_v4,127I_IdU,142Nd_cCaspase 3_D175_v6,143Nd_RRM2,144Nd_SOX2 v2,145Nd_pNDRG1 T346 v4,146Nd_L1CAM,147Sm_OPTN,148Nd_CDK1 (1),149Sm_p4E-BP1_T37,150Nd_pRB_S807_S811_v10,151Eu_sqstm1,153Eu_ANXA1,155Gd_pAKT [S473] v12,156Gd_pNF-kB p65 v8,157Gd_MOPC21,158Gd_pP38 MAPK v7,160Gd_KI67(3),161Dy_pLATS1,163Dy_H3K9Me3,164Dy_TOP2A (3),165Ho_AlexaFluor488,167Er_TROP 2(1),168Er_pSMAD2,169Tm_EphB2,170Er_CHGA v3,171Yb_CD55 v4,172Yb_BIRC3,173Yb_pS6,174Yb_cPARP [D214] (2) (v6),176Yb_CyclinB1 (2) (v7),191Ir_DNA 1,193Ir_DNA 2,209Bi_Me2HH3[K4],Cell_Index,Condition,Control,Replicate
0,2.861709,6.185356,7.260772,0.000000,6.369182,5.126969,5.644772,5.253561,3.808451,5.486881,9.921284,6.458187,9.980331,1.451450,9.325492,6.516474,5.061120,6.683286,9.448944,4.984473,5.529864,7.799032,9.643140,6.974434,9.835544,5.004072,6.538344,7.588655,4.612700,8.742649,5.971050,8.562177,6.078086,6.828775,0.000975,5.050960,5.669269,8.410008,5.166927,5.815499,3.715374,7.003667,6.049220,7.006056,11.175175,11.780544,8.572377,1,PDO21 + hIL8 12.5ng,PDO21,02
1,0.000000,0.000000,0.000000,0.000000,0.000000,6.092912,5.264056,0.000000,0.000000,7.818019,10.714636,7.208365,10.136266,6.697700,10.349155,7.673801,7.239418,8.684354,8.789027,0.000000,7.836078,8.763998,10.530443,8.967574,10.734998,7.984517,7.269773,9.931792,6.400535,9.639280,7.695277,9.941798,7.877548,8.264364,0.000111,7.096435,5.453285,10.040505,0.000000,8.229097,4.621092,8.510393,6.188064,7.234187,13.028564,13.608996,10.838285,2,PDO21 + hIL8 12.5ng,PDO21,02
2,0.000000,0.000000,0.000000,6.540592,7.376571,5.416135,0.000000,4.283871,0.000000,4.439526,10.453986,6.789418,9.668300,5.192774,7.658053,7.973642,6.850421,6.793618,7.548790,7.224146,6.921434,9.166014,9.961136,7.444954,9.246453,4.470130,6.202443,8.613286,6.286848,8.758622,7.060247,9.257966,7.834732,11.808127,0.000350,4.877105,2.763677,9.097979,6.497083,6.711851,0.000000,6.857176,7.377084,6.046799,11.986534,12.603008,10.026554,3,PDO21 + hIL8 12.5ng,PDO21,02
3,0.000000,6.974364,0.000000,3.633663,0.000000,0.000000,6.747243,0.000000,0.000000,0.000000,9.885757,5.177928,10.195389,5.158301,0.000000,0.000000,7.642403,5.985264,6.492789,0.000000,7.322824,8.089181,10.124912,8.863133,10.044398,6.117716,7.679859,8.636803,5.485200,11.039821,7.265874,9.163585,7.194315,8.486379,0.000180,7.506729,8.091103,6.658399,0.000000,7.265633,0.000000,6.050946,0.000000,7.280463,12.570662,13.142568,11.701504,4,PDO21 + hIL8 12.5ng,PDO21,02
4,0.000000,8.178854,0.000000,9.103629,6.476706,8.039846,0.000000,8.175968,0.000000,0.000000,12.664153,9.277625,11.964352,0.000000,11.132526,8.003578,9.235837,9.957740,9.962703,9.168612,7.610644,10.724780,12.984863,11.373334,12.209044,0.000000,7.923779,10.918396,0.000000,11.589834,10.852352,12.275814,10.065855,14.875286,0.000016,8.336238,10.059984,9.779031,7.887643,7.666186,0.000000,9.579008,9.703934,10.173794,14.883780,15.465149,13.485678,5,PDO21 + hIL8 12.5ng,PDO21,02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331313,4.976480,5.984125,6.718107,4.645827,4.841122,0.000000,4.781921,0.000000,3.808360,5.056894,9.912203,7.488636,10.518306,3.850011,7.790658,6.025979,5.963789,5.984606,6.215233,5.933350,7.045831,6.709207,8.571882,6.802860,9.634683,5.002027,5.926791,7.927945,5.873362,8.865131,5.826417,8.714579,7.296077,5.620483,0.000943,5.815787,5.927600,7.890923,6.042994,5.774533,5.277112,7.415950,6.546023,7.601221,11.184307,11.751677,8.937382,3477,PDO21 + hWnt3a 50ng,PDO21,01
331314,5.004430,5.669357,0.000000,0.000000,2.648648,0.000000,5.804241,5.495030,0.000000,0.000000,10.476237,0.000000,8.569930,6.033047,5.442532,6.002406,5.833211,5.573767,7.570540,5.579698,0.000000,8.926628,10.026410,7.760336,9.055901,5.359166,6.768202,7.898463,5.863215,9.506378,7.119435,8.062572,6.930144,11.329741,0.000906,5.476252,4.938222,8.248334,5.256994,6.924931,4.970683,7.476216,5.914021,6.141732,11.906706,12.463087,8.478515,3478,PDO21 + hWnt3a 50ng,PDO21,01
331315,1.642973,0.000000,0.000000,0.000000,3.433134,3.980296,4.287830,4.622261,2.047682,1.809221,7.222557,3.537575,6.150403,4.586081,0.000000,4.011499,2.421140,4.787629,5.559112,0.000000,4.339225,4.527265,5.704636,5.251106,6.977059,3.904941,4.430077,5.942501,4.237923,6.647016,4.296222,5.456809,4.688202,6.070707,0.005103,3.855777,4.271684,5.638888,5.791345,3.464075,3.199154,4.157178,3.384666,3.240618,9.250606,9.861703,6.479493,3479,PDO21 + hWnt3a 50ng,PDO21,01
331316,3.176733,5.411344,5.885756,5.189017,0.000000,7.027898,6.721495,6.624281,7.544682,6.772921,8.459337,5.878433,10.703256,6.130650,7.948382,9.488934,0.000000,7.268393,4.330506,5.310680,5.530312,0.000000,4.925195,6.582121,9.375894,5.595597,6.615939,7.761496,6.837664,7.476978,5.642974,0.000000,5.557701,7.122648,0.000349,5.439674,5.986502,6.736449,0.000000,0.000000,4.401527,5.207966,7.461409,6.890431,12.571322,13.222748,5.748723,3480,PDO21 + hWnt3a 50ng,PDO21,01


#### Re-index the Dataframe

In [24]:
row_count = processed_data.shape[0]
processed_data.index = np.arange(row_count)

#### Ensure type of metadata column to be string

In [None]:

processed_data[metadata_columns] = processed_data[metadata_columns].applymap(str)
processed_data

Unnamed: 0,89Y_pHH3_S28_v7,96Ru_96Ru,98Ru_98Ru,99Ru_99Ru,100Ru_100Ru,101Ru_101Ru,102Ru_102Ru,104Ru_104Ru,111Cd_Vimentin RV202 (v67,112Cd_FAP (1) v2,113In_CD326 (EpCAM) (hu) (v6),114Cd_CK18 (v6),115In_Pan-CK_v9,116Cd_GFP_v4,127I_IdU,142Nd_cCaspase 3_D175_v6,143Nd_RRM2,144Nd_SOX2 v2,145Nd_pNDRG1 T346 v4,146Nd_L1CAM,147Sm_OPTN,148Nd_CDK1 (1),149Sm_p4E-BP1_T37,150Nd_pRB_S807_S811_v10,151Eu_sqstm1,153Eu_ANXA1,155Gd_pAKT [S473] v12,156Gd_pNF-kB p65 v8,157Gd_MOPC21,158Gd_pP38 MAPK v7,160Gd_KI67(3),161Dy_pLATS1,163Dy_H3K9Me3,164Dy_TOP2A (3),165Ho_AlexaFluor488,167Er_TROP 2(1),168Er_pSMAD2,169Tm_EphB2,170Er_CHGA v3,171Yb_CD55 v4,172Yb_BIRC3,173Yb_pS6,174Yb_cPARP [D214] (2) (v6),176Yb_CyclinB1 (2) (v7),191Ir_DNA 1,193Ir_DNA 2,209Bi_Me2HH3[K4],Cell_Index,Condition,Control,Replicate
0,2.861709,6.185356,7.260772,0.000000,6.369182,5.126969,5.644772,5.253561,3.808451,5.486881,9.921284,6.458187,9.980331,1.451450,9.325492,6.516474,5.061120,6.683286,9.448944,4.984473,5.529864,7.799032,9.643140,6.974434,9.835544,5.004072,6.538344,7.588655,4.612700,8.742649,5.971050,8.562177,6.078086,6.828775,0.000975,5.050960,5.669269,8.410008,5.166927,5.815499,3.715374,7.003667,6.049220,7.006056,11.175175,11.780544,8.572377,1,PDO21 + hIL8 12.5ng,PDO21,02
1,0.000000,0.000000,0.000000,0.000000,0.000000,6.092912,5.264056,0.000000,0.000000,7.818019,10.714636,7.208365,10.136266,6.697700,10.349155,7.673801,7.239418,8.684354,8.789027,0.000000,7.836078,8.763998,10.530443,8.967574,10.734998,7.984517,7.269773,9.931792,6.400535,9.639280,7.695277,9.941798,7.877548,8.264364,0.000111,7.096435,5.453285,10.040505,0.000000,8.229097,4.621092,8.510393,6.188064,7.234187,13.028564,13.608996,10.838285,2,PDO21 + hIL8 12.5ng,PDO21,02
2,0.000000,0.000000,0.000000,6.540592,7.376571,5.416135,0.000000,4.283871,0.000000,4.439526,10.453986,6.789418,9.668300,5.192774,7.658053,7.973642,6.850421,6.793618,7.548790,7.224146,6.921434,9.166014,9.961136,7.444954,9.246453,4.470130,6.202443,8.613286,6.286848,8.758622,7.060247,9.257966,7.834732,11.808127,0.000350,4.877105,2.763677,9.097979,6.497083,6.711851,0.000000,6.857176,7.377084,6.046799,11.986534,12.603008,10.026554,3,PDO21 + hIL8 12.5ng,PDO21,02
3,0.000000,6.974364,0.000000,3.633663,0.000000,0.000000,6.747243,0.000000,0.000000,0.000000,9.885757,5.177928,10.195389,5.158301,0.000000,0.000000,7.642403,5.985264,6.492789,0.000000,7.322824,8.089181,10.124912,8.863133,10.044398,6.117716,7.679859,8.636803,5.485200,11.039821,7.265874,9.163585,7.194315,8.486379,0.000180,7.506729,8.091103,6.658399,0.000000,7.265633,0.000000,6.050946,0.000000,7.280463,12.570662,13.142568,11.701504,4,PDO21 + hIL8 12.5ng,PDO21,02
4,0.000000,8.178854,0.000000,9.103629,6.476706,8.039846,0.000000,8.175968,0.000000,0.000000,12.664153,9.277625,11.964352,0.000000,11.132526,8.003578,9.235837,9.957740,9.962703,9.168612,7.610644,10.724780,12.984863,11.373334,12.209044,0.000000,7.923779,10.918396,0.000000,11.589834,10.852352,12.275814,10.065855,14.875286,0.000016,8.336238,10.059984,9.779031,7.887643,7.666186,0.000000,9.579008,9.703934,10.173794,14.883780,15.465149,13.485678,5,PDO21 + hIL8 12.5ng,PDO21,02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331313,4.976480,5.984125,6.718107,4.645827,4.841122,0.000000,4.781921,0.000000,3.808360,5.056894,9.912203,7.488636,10.518306,3.850011,7.790658,6.025979,5.963789,5.984606,6.215233,5.933350,7.045831,6.709207,8.571882,6.802860,9.634683,5.002027,5.926791,7.927945,5.873362,8.865131,5.826417,8.714579,7.296077,5.620483,0.000943,5.815787,5.927600,7.890923,6.042994,5.774533,5.277112,7.415950,6.546023,7.601221,11.184307,11.751677,8.937382,3477,PDO21 + hWnt3a 50ng,PDO21,01
331314,5.004430,5.669357,0.000000,0.000000,2.648648,0.000000,5.804241,5.495030,0.000000,0.000000,10.476237,0.000000,8.569930,6.033047,5.442532,6.002406,5.833211,5.573767,7.570540,5.579698,0.000000,8.926628,10.026410,7.760336,9.055901,5.359166,6.768202,7.898463,5.863215,9.506378,7.119435,8.062572,6.930144,11.329741,0.000906,5.476252,4.938222,8.248334,5.256994,6.924931,4.970683,7.476216,5.914021,6.141732,11.906706,12.463087,8.478515,3478,PDO21 + hWnt3a 50ng,PDO21,01
331315,1.642973,0.000000,0.000000,0.000000,3.433134,3.980296,4.287830,4.622261,2.047682,1.809221,7.222557,3.537575,6.150403,4.586081,0.000000,4.011499,2.421140,4.787629,5.559112,0.000000,4.339225,4.527265,5.704636,5.251106,6.977059,3.904941,4.430077,5.942501,4.237923,6.647016,4.296222,5.456809,4.688202,6.070707,0.005103,3.855777,4.271684,5.638888,5.791345,3.464075,3.199154,4.157178,3.384666,3.240618,9.250606,9.861703,6.479493,3479,PDO21 + hWnt3a 50ng,PDO21,01
331316,3.176733,5.411344,5.885756,5.189017,0.000000,7.027898,6.721495,6.624281,7.544682,6.772921,8.459337,5.878433,10.703256,6.130650,7.948382,9.488934,0.000000,7.268393,4.330506,5.310680,5.530312,0.000000,4.925195,6.582121,9.375894,5.595597,6.615939,7.761496,6.837664,7.476978,5.642974,0.000000,5.557701,7.122648,0.000349,5.439674,5.986502,6.736449,0.000000,0.000000,4.401527,5.207966,7.461409,6.890431,12.571322,13.222748,5.748723,3480,PDO21 + hWnt3a 50ng,PDO21,01


### Store the `Condition` information (optional)

#### Configuration

In [None]:
condition_colmns = ['Patient', 'Culture', 'gd_donor', 'Transduction', 'Treatment', 'Batch', 'Date', 'Replicate']

#### Generate the `Condition` column

In [None]:
if 'Condition' not in metadata.columns:
    # Create a condition column for every cell in the experiment
    processed_data['Condition'] = processed_data[condition_colmns].astype(str).agg('_'.join, axis=1)

    # Add `Condition` to the list of metadata columns
    metadata_columns += 'Condition'

processed_data

### Store the `Control` information (optional)

#### Configuration

In [None]:
# All gd monoculture controls including their transduction, treatment and batch.
control_columns = ['gd_donor', 'Transduction', 'Treatment', 'Batch', 'Date']

#### Generate the `Control` column

In [None]:
if 'Control' not in metadata.columns:
    # Define control for pairwise EMD. 
    processed_data['Control'] = "X_gd_" + processed_data[control_columns].astype(str).agg('_'.join, axis=1)

    # Add `Control` to the list of metadata columns
    metadata_columns += 'Control'

processed_data

### Initialise EMD dataframe

#### Compute the markers list

In [26]:
# For each column in the Dataframe, keep only the ones not in the `metadata_columns` variable
markers_list = [col for col in processed_data.columns if col not in metadata_columns]
# marker_list = list(processed_data.columns.values)
markers_list

['89Y_pHH3_S28_v7',
 '96Ru_96Ru',
 '98Ru_98Ru',
 '99Ru_99Ru',
 '100Ru_100Ru',
 '101Ru_101Ru',
 '102Ru_102Ru',
 '104Ru_104Ru',
 '111Cd_Vimentin RV202 (v67',
 '112Cd_FAP (1) v2',
 '113In_CD326 (EpCAM) (hu) (v6)',
 '114Cd_CK18 (v6)',
 '115In_Pan-CK_v9',
 '116Cd_GFP_v4',
 '127I_IdU',
 '142Nd_cCaspase 3_D175_v6',
 '143Nd_RRM2',
 '144Nd_SOX2 v2',
 '145Nd_pNDRG1 T346 v4',
 '146Nd_L1CAM',
 '147Sm_OPTN',
 '148Nd_CDK1 (1)',
 '149Sm_p4E-BP1_T37',
 '150Nd_pRB_S807_S811_v10',
 '151Eu_sqstm1',
 '153Eu_ANXA1',
 '155Gd_pAKT [S473] v12',
 '156Gd_pNF-kB p65 v8',
 '157Gd_MOPC21',
 '158Gd_pP38 MAPK v7',
 '160Gd_KI67(3)',
 '161Dy_pLATS1',
 '163Dy_H3K9Me3',
 '164Dy_TOP2A (3)',
 '165Ho_AlexaFluor488',
 '167Er_TROP 2(1)',
 '168Er_pSMAD2',
 '169Tm_EphB2',
 '170Er_CHGA v3',
 '171Yb_CD55 v4',
 '172Yb_BIRC3',
 '173Yb_pS6',
 '174Yb_cPARP [D214] (2) (v6)',
 '176Yb_CyclinB1 (2) (v7)',
 '191Ir_DNA 1',
 '193Ir_DNA 2',
 '209Bi_Me2HH3[K4]']

#### Compute the conditions list

In [28]:
# Get the list of unique conditions
conditions_list = pd.unique(processed_data['Condition'].tolist())
conditions_list

array(['PDO21 + hIL8 12.5ng', 'PDO21 + hEGF 25ng', 'PDO21 + hFSTL1 25ng',
       'PDO21 + hFGF 50ng', 'PDO21 + hCxCL12 50ng',
       'PDO21 + hCxCL12 100ng', 'PDO21 + hTNFa 25ng',
       'PDO21 + hPGE2 25nM', 'PDO21 + hVEGF 50ng', 'PDO21 + hHGF 50ng',
       'PDO21 + hIGF2 40ng', 'PDO21 + CAFs', 'PDO21 + hRSpondin 50ng',
       'PDO21 + hIL1b 12.5ng', 'PDO21 + hIL6 12.5ng',
       'PDO21 + hIL1b 25ng', 'PDO21 + hIGF2 20ng', 'PDO21 + hEGF 50ng',
       'PDO21 + hFSTL1 50ng', 'PDO21', 'PDO21 + hTGFb 100ng',
       'PDO21 + hIL8 25ng', 'PDO21 + hVEGF 25ng', 'PDO21 + hHGF 100ng',
       'PDO21 + hIL6 25ng', 'PDO21 + hTNFa 50ng', 'PDO21 + hWnt3a 50ng',
       'PDO21 + hFGF 25ng', 'PDO21 + hPGE2 50nM', 'PDO21 + hTGFb 50ng',
       'PDO21 + hRSpondin 25ng', 'PDO21 + hWnt3a 100ng'], dtype=object)

#### Compute the controls list (unused)

In [29]:
# Get the list of unique controls
controls_list = pd.unique(processed_data['Control'].tolist())
controls_list

array(['PDO21'], dtype=object)

#### Create the DataFrame that will receive the EMD values

In [30]:
# Empty df with NaN values to populate with the EMD values
emd_dataframe = pd.DataFrame(
    np.full(
        (len(conditions_list), len(markers_list)), 
        np.nan),
    columns = markers_list,
    index = conditions_list)


### Calculate EMD scores

In [31]:
# Loop over all the conditions
for condition in conditions_list:
    print(condition)

    # Dataframe of all events for the condition in the list
    condition_events = processed_data.loc[(processed_data["Condition"] == condition)]

    control_name = condition_events['Control'].values[0]

    # Dataframe of all events from the control that will be compared with the events of the current condition
    control_df = processed_data.loc[processed_data["Condition"].str.startswith(control_name)]

    # Loop over all the markers
    for marker in markers_list:

        # Check the sign by using the `median` values
        sign = np.sign(condition_events[marker].median() - control_df[marker].median())

        # In case the median values are equal, use the `mean` instead
        if sign == 0:
            sign = np.sign(condition_events[marker].mean() - control_df[marker].mean())

        # Compute the EMD by multiplying the sign by the EMD score
        emd = scprep.stats.EMD(
            condition_events[marker], 
            control_df[marker]
        )

        # Store the signed EMD in the result Dataframe for the given (condition, marke) pair
        emd_dataframe.loc[condition, marker] = sign * emd

# Ensure that all (condition, marke) pairs have been properly computed
assert not emd_dataframe.isna().values.any()

PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21
PDO21


In [32]:
emd_dataframe

Unnamed: 0,89Y_pHH3_S28_v7,96Ru_96Ru,98Ru_98Ru,99Ru_99Ru,100Ru_100Ru,101Ru_101Ru,102Ru_102Ru,104Ru_104Ru,111Cd_Vimentin RV202 (v67,112Cd_FAP (1) v2,113In_CD326 (EpCAM) (hu) (v6),114Cd_CK18 (v6),115In_Pan-CK_v9,116Cd_GFP_v4,127I_IdU,142Nd_cCaspase 3_D175_v6,143Nd_RRM2,144Nd_SOX2 v2,145Nd_pNDRG1 T346 v4,146Nd_L1CAM,147Sm_OPTN,148Nd_CDK1 (1),149Sm_p4E-BP1_T37,150Nd_pRB_S807_S811_v10,151Eu_sqstm1,153Eu_ANXA1,155Gd_pAKT [S473] v12,156Gd_pNF-kB p65 v8,157Gd_MOPC21,158Gd_pP38 MAPK v7,160Gd_KI67(3),161Dy_pLATS1,163Dy_H3K9Me3,164Dy_TOP2A (3),165Ho_AlexaFluor488,167Er_TROP 2(1),168Er_pSMAD2,169Tm_EphB2,170Er_CHGA v3,171Yb_CD55 v4,172Yb_BIRC3,173Yb_pS6,174Yb_cPARP [D214] (2) (v6),176Yb_CyclinB1 (2) (v7),191Ir_DNA 1,193Ir_DNA 2,209Bi_Me2HH3[K4]
PDO21 + hIL8 12.5ng,-0.12688,0.0806,0.110544,0.104662,-0.108164,-0.12877,0.141408,0.18585,-0.088423,-0.156279,0.047284,-0.227621,0.088975,-0.400931,0.225206,0.132011,0.125078,0.189338,-0.423682,-0.279397,-0.252199,0.178842,-0.175151,0.151464,0.119192,0.157876,-0.319568,0.11093,-0.239171,0.074553,0.202347,0.145006,0.163779,-0.229958,-0.000166,-0.235046,-0.22388,0.25359,-0.321866,-0.345495,-0.281196,0.169062,0.156669,0.133249,0.153098,0.155346,0.328116
PDO21 + hEGF 25ng,0.257427,-0.23437,-0.247021,-0.22813,-0.260619,0.323895,-0.406491,-0.376113,0.26122,0.342858,-0.260064,-0.550005,-0.490756,0.455007,-0.267355,-0.494664,-0.40244,-0.521505,0.882317,0.690891,-0.633221,-0.552258,0.826399,-0.636137,-0.365572,0.574231,0.90488,-0.384188,0.533848,-0.425279,0.549238,-0.579795,-0.514348,0.70859,0.000307,0.516996,-0.486447,-0.666691,0.676275,0.61683,0.542169,-0.540949,-0.476021,-0.517479,-0.459621,-0.461208,-0.653622
PDO21 + hFSTL1 25ng,-0.215855,0.136354,0.140596,-0.182161,0.148336,-0.181038,-0.26845,0.200147,-0.104231,-0.149832,-0.071362,0.248917,0.18193,-0.388685,0.193991,0.226928,-0.184113,0.232015,-1.296829,-0.467862,-0.498717,-0.281453,-0.739536,-0.364833,-0.127818,-0.314638,-0.825347,-0.141987,-0.230063,0.174414,-0.259491,0.285837,0.211449,-0.458308,-0.000193,-0.382147,-0.28346,0.315247,-0.320009,-0.564157,-0.591865,-0.483888,0.237807,-0.356289,0.186558,0.189301,0.405571
PDO21 + hFGF 50ng,-0.199281,0.0521,0.048131,-0.083708,-0.056712,-0.101836,-0.094375,-0.147324,-0.045663,0.080165,0.122579,0.295363,0.119917,0.349772,0.40106,-0.156636,0.16156,0.136811,0.610405,0.267105,-0.196329,0.334825,0.603067,0.511089,0.134318,0.183646,0.636862,0.13527,0.185563,0.389589,0.586769,0.413149,0.573944,0.921151,-0.000128,-0.077312,0.083754,0.599781,0.204437,-0.040747,-0.059526,-0.19323,-0.155803,-0.098779,0.108769,0.10986,0.62522
PDO21 + hCxCL12 50ng,-0.116015,0.136947,0.110625,-0.145619,-0.115845,-0.192379,0.231796,-0.253035,-0.16526,-0.232369,-0.071182,0.291252,0.132471,0.186272,0.147279,0.230129,-0.198369,-0.337397,-1.239758,-0.8189,-0.666857,-0.326839,-0.617809,-0.36818,-0.117171,-0.433775,-0.752951,-0.172266,-0.240037,0.365887,0.311444,0.294807,0.223021,-0.493289,-0.000208,-0.558512,-0.35823,0.310502,-0.456958,-0.697233,-0.64434,-0.353763,0.236421,-0.35491,0.193789,0.195416,0.411062
PDO21 + hCxCL12 100ng,-0.17351,0.137266,0.127422,-0.122576,-0.128079,-0.174706,0.258966,0.168605,-0.130809,-0.189735,0.055451,0.278127,0.158681,0.17622,-0.234746,0.223379,0.217736,-0.276287,-1.004525,-0.602387,-0.476693,-0.275861,-0.570523,-0.312362,0.1042,-0.284698,-0.618702,0.148981,-0.241808,0.230096,-0.2753,0.271187,0.20397,-0.370193,-0.000179,-0.384586,-0.249458,0.287851,-0.378803,-0.561235,-0.435404,-0.340358,0.228244,-0.294314,0.195161,0.196388,0.393404
PDO21 + hTNFa 25ng,0.392227,0.267449,0.256699,0.277667,0.301321,0.423487,0.534704,0.513603,0.159327,0.222763,0.478266,0.335338,0.384321,-0.36663,0.707992,0.570417,0.656737,0.600318,0.935244,0.684885,1.688061,0.911728,0.824146,0.735838,0.626405,0.523195,0.494845,0.649782,0.394719,0.589666,0.441038,0.750797,0.440913,0.715509,-0.000354,0.400742,0.469399,0.487197,0.368086,0.325606,1.014616,0.994917,0.558927,0.700168,0.567963,0.567531,0.469287
PDO21 + hPGE2 25nM,-0.303735,-0.351507,-0.367477,0.324725,0.372702,-0.414141,-0.521774,-0.486812,0.267494,0.382637,-0.368054,0.442607,-0.270027,0.511352,-1.037317,-0.405709,-0.533303,-0.498611,-0.678109,-0.425607,-0.409268,-0.802657,-1.372395,-1.220869,-0.770629,-0.477292,-0.67094,-0.678269,-0.444869,-0.955902,-0.714899,-1.119282,-0.858613,-0.956973,0.000843,0.506764,-0.499664,-1.9912,0.587795,0.966474,-0.527913,-0.580399,-0.542704,-0.357542,-0.719982,-0.721509,-1.335485
PDO21 + hVEGF 50ng,-0.097044,0.093014,0.125216,-0.137476,-0.123816,-0.169107,0.18134,0.178309,-0.122039,-0.202191,-0.074385,-0.246618,0.074459,-0.428542,-0.323559,0.18557,-0.186812,-0.299778,-1.04968,-0.702838,-0.586792,-0.286028,-0.575338,-0.350497,0.113585,-0.390948,-0.686495,0.164261,-0.253641,0.217677,-0.240117,0.271115,-0.25502,-0.434792,-0.000178,-0.407592,-0.256382,0.239355,-0.348216,-0.462373,-0.513874,-0.379114,0.240348,-0.262301,0.149609,0.15101,0.291681
PDO21 + hHGF 50ng,0.073419,0.086724,0.107684,0.077004,0.105623,0.081691,0.134396,0.104271,-0.068712,0.06971,0.119643,0.089979,0.108664,0.163546,0.660926,0.16868,0.227582,0.258705,0.855264,0.436734,-0.114119,0.441215,0.642766,0.455718,0.266629,0.218301,0.599867,0.227169,0.194417,0.365064,0.380652,0.3752,0.35121,0.618925,-0.000186,0.063669,0.139864,0.516482,0.139756,0.070739,0.074502,0.586078,0.122143,0.197514,0.196529,0.196607,0.38958
