# Pulling IDS Data Cubes into Pandas DataFrame (Cytometry Example)

## Import Libraries

In [18]:
import os
import json
import requests
import pandas as pd
import numpy as np

import pprint

## Constants for use in Notebook

In [11]:
# Location of authentication file
AUTH_DIR = "./"
AUTH_FILENAME = "auth.json"

In [51]:
# APIs used in this notebook
BASE_API = "https://api.tetrascience-uat.com/v1/"
API_EQL_SEARCH = BASE_API + "datalake/searchEql"
API_RETRIEVE_FILE = BASE_API + "datalake/retrieve"
API_PIPELINE_INFO = BASE_API + "pipeline/"
API_PIPELINE_CREATION = BASE_API + "pipeline/create"
API_FILE_UPLOAD = BASE_API + 'datalake/upload'

## Pull in Authenication Information for Headers

In [12]:
with open(os.path.join(AUTH_DIR, AUTH_FILENAME), "r") as f:
    auth_data = json.loads(f.read())

headers = {"ts-auth-token": auth_data["auth_token"],
           "x-org-slug": auth_data["org"]}

## Download example cytometry data

* Navigate to [flowcytometry.org](flowcytometry.org) to download freely available cytometry datasets for analysis. In particular, navigate to [this dataset](https://flowrepository.org/id/FR-FCM-Z2KP) with data from a study analyzing blood from individuals who had varying levels of COVID-19 (based on [this study](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7662088/))

* Click on the download button, and then on the download page click "ZIP & Download Files".

* Unzip the file to get a folder full of .fcs files

In [55]:
# Location of fcs dataset
CYTOMETRY_DIR = "./"
CYTOMETRY_FOLDER = "FlowRepository_FR-FCM-Z2KP_files/"

## Create Pipeline to convert Cytometry data to IDS

In [115]:
# example_response = requests.get(API_PIPELINE_INFO+"ef3e9b7f-195e-43ff-996b-11bc90b8c367", headers=headers)
# example_response.text

In [107]:
cytometry_pipeline_info = {'name': 'Example - Create Cytometry Tetra Data',
                           'description': 'Transform FCS to IDS',
                           'triggerType': 'custom',
                           'triggerCondition': {'groupOperator': 'AND',
                                                'groupLevel': 1,
                                                'groups': [{'groupLevel': 2,
                                                            'groupOperator': 'AND',
                                                            'groups': [{'key': 'category', 
                                                                        'operator': 'is', 
                                                                        'value': 'raw'}]},
                                                           {'groupLevel': 2,
                                                            'groupOperator': 'AND',
                                                            'groups': [{'key': 'tags',
                                                                        'operator': 'has a tag that is',
                                                                        'value': 'example-cytometry'}]}]},
                           'protocolSlug': 'bd-flow-cytometers-raw-to-ids',
                           'protocolVersion': 'v1.1.2',
                           'masterScriptNamespace': 'common',
                           'masterScriptSlug': 'bd-flow-cytometers-raw-to-ids',
                           'masterScriptVersion': 'v1.1.2'}

In [65]:
create_cytometry_pipeline = requests.post(API_PIPELINE_CREATION, headers=headers, data=json.dumps(cytometry_pipeline_info))
create_cytometry_pipeline.text

'{"id":"ef3e9b7f-195e-43ff-996b-11bc90b8c367","name":"Example - Create Cytometry Tetra Data","description":"Transform FCS to IDS","triggerType":"custom","triggerCondition":{"groupOperator":"AND","groupLevel":1,"groups":[{"groupLevel":2,"groupOperator":"AND","groups":[{"key":"category","operator":"is","value":"raw"}]},{"groupLevel":2,"groupOperator":"AND","groups":[{"key":"labels.example","operator":"is","value":"cytometry"}]}]},"protocolSlug":"bd-flow-cytometers-raw-to-ids","protocolVersion":"v1.1.2","createdAt":"2023-07-26T23:55:51.681Z","updatedAt":"2023-07-26T23:55:51.681Z","pipelineConfig":null,"masterScriptNamespace":"common","masterScriptSlug":"bd-flow-cytometers-raw-to-ids","masterScriptVersion":"v1.1.2","status":null,"standby":null,"retryBehavior":null,"priority":5,"maxParallelWorkflows":0,"taskScriptTimeoutMins":null,"stepsConfig":{}}'

## Upload Cytometry Data to TDP

In [57]:
fcs_files = [CYTOMETRY_DIR + CYTOMETRY_FOLDER + file for file in 
             os.listdir(os.path.join(CYTOMETRY_DIR, CYTOMETRY_FOLDER)) if ".fcs" in file]
fcs_files

['./FlowRepository_FR-FCM-Z2KP_files/export_COVID19 samples 21_04_20_ST3_COVID19_ICU_039_A ST3 210420_051_Live_cells.fcs',
 './FlowRepository_FR-FCM-Z2KP_files/export_COVID19 samples 23_04_20_ST3_COVID19_W_026_O ST3 230420_003_Live_cells.fcs',
 './FlowRepository_FR-FCM-Z2KP_files/export_COVID19 samples 23_04_20_ST3_COVID19_W_017_O ST3 230420_026_Live_cells.fcs',
 './FlowRepository_FR-FCM-Z2KP_files/export_COVID19 samples 23_04_20_ST3_COVID19_ICU_025_A ST3 230420_039_Live_cells.fcs',
 './FlowRepository_FR-FCM-Z2KP_files/export_COVID19 samples 23_04_20_ST3_COVID19_W_020_O ST3 230420_007_Live_cells.fcs',
 './FlowRepository_FR-FCM-Z2KP_files/export_COVID19 samples 23_04_20_ST3_COVID19_HC_006 ST3 230420_015_Live_cells.fcs',
 './FlowRepository_FR-FCM-Z2KP_files/export_COVID19 samples 23_04_20_ST3_COVID19_HC_009 ST3 230420_012_Live_cells.fcs',
 './FlowRepository_FR-FCM-Z2KP_files/export_COVID19 samples 21_04_20_ST3_COVID19_W_033_O ST3 210420_036_Live_cells.fcs',
 './FlowRepository_FR-FCM-Z2KP

In [129]:
file_upload_curl = "curl --location '%s' \
--header 'ts-auth-token: %s' \
--header 'x-org-slug: %s' \
--header 'Content-Transfer-Encoding: multipart/form-data' \
--form 'file=@%s' \
--form 'filename=%s' \
--form labels={'label':'tags','value':'example-cytometry'}" % (API_FILE_UPLOAD, auth_data["auth_token"], auth_data["org"], fcs_files[0], fcs_files[0])
# --form tags=['example-cytometry','api-upload']" % (API_FILE_UPLOAD, auth_data["auth_token"], auth_data["org"], fcs_files[0], fcs_files[0])

In [130]:
os.system(file_upload_curl)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
 98 10.5M    0     0   98 10.3M      0   379k  0:00:28  0:00:28 --:--:--  392k

{"statusCode":400,"error":"Bad Request","message":"Error validating payload","validations":[{"message":"\"labels\" must be an array","path":["labels"],"type":"array.base","context":{"label":"labels","value":"label:tags","key":"labels"}}]}

100 10.5M  100   238  100 10.5M      8   373k  0:00:29  0:00:28  0:00:01  355k100 10.5M  100   238  100 10.5M      8   373k  0:00:29  0:00:28  0:00:01  335k
curl: (3) URL rejected: Port number was not a decimal number between 0 and 65535


768

## Find all IDS files created by Pipeline

In [45]:
query = {
  "size": 10000,
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "integration.id": "ef3e9b7f-195e-43ff-996b-11bc90b8c367"
          }
        },
        {
          "match": {
            "integration.type": "datapipeline"
          }
        }
      ]
    }
  }
}

payload = json.dumps(query)

In [46]:
api_call = requests.post(API_EQL_SEARCH, headers=headers, data=payload)

In [47]:
results = json.loads(api_call.text)["hits"]["hits"]

In [48]:
len(results)

63

In [53]:
results[0].keys()

dict_keys(['_index', '_type', '_id', '_score', '_source'])

In [92]:
testfileid = results[0]["_source"]["fileId"]
print(testfileid)

50a756e7-7fd6-46d3-8c9e-82929ed19c02


In [93]:
api_call = requests.get(API_RETRIEVE_FILE+"?fileId="+testfileid, headers=headers)

In [95]:
IDS_info = json.loads(api_call.text)

In [130]:
data = [np.array(x["measures"][0]["value"]) for x in IDS_info["datacubes"]]
data = np.vstack(data).T

In [131]:
channels =[x["measures"][0]["name"] for x in IDS_info["datacubes"]]

In [132]:
time = IDS_info["datacubes"][0]["dimensions"][0]["scale"]

In [133]:
pd.DataFrame(data, index=time, columns=channels)

Unnamed: 0,FSC-A,FSC-H,FSC-W,SSC-A,SSC-H,SSC-W,FJComp-APC-A,FJComp-APC-H7-A,FJComp-APC-R700-A,FJComp-BB630-A,...,FJComp-BV650-A,FJComp-BV711-A,FJComp-BV750-P-A,FJComp-BV786-A,FJComp-BYG584-A,FJComp-BYG670-A,FJComp-BYG790-A,FJComp-FITC-A,FJComp-PE-CF594-A,FJComp-PE-Cy5.5-A
52.559196,75206.484375,60460.769531,145118.531250,38521.203125,35829.363281,109679.476562,-26.279877,37.950542,655.993469,72.333488,...,144.350006,267.332550,-108.974442,241.959930,254.706177,3166.049561,224.191406,799.648376,59.971340,14.986482
52.626579,78789.265625,65438.390625,151388.531250,33751.750000,31002.316406,107838.789062,3.387179,9.919949,1110.273804,183.456375,...,-142.771408,302.091827,243.083054,27.050117,147.950943,204.697281,254.165543,20.425129,78.229523,53.985115
52.668690,68122.992188,54291.113281,138201.031250,26363.425781,23826.796875,106964.664062,237.850647,32.161606,1106.962280,78.264000,...,102.678787,360.552826,-58.894714,177.047226,418.391541,1032.096191,385.505310,80.962227,388.802551,85.611320
52.675362,107050.398438,84495.179688,152318.593750,59341.621094,51996.875000,119945.328125,117.661797,80.853813,1349.825806,348.445007,...,41.150925,231.066788,-21.439489,67.804100,106.157074,294.789581,-36.141533,46.908463,115.888618,162.422791
52.686203,63816.945312,54338.250000,148976.109375,51197.652344,46842.886719,114617.921875,-15.077876,18.681108,1814.254395,3481.263672,...,976.651184,774.116638,69.086388,131.090851,-109.732552,-112.094185,311.105835,584.948547,-42.293922,212.907364
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187.177399,112839.765625,98341.132812,142445.859375,29081.863281,27505.736328,105076.835938,6.316328,51.644669,942.212097,32.781143,...,174.356812,17.326708,60.387341,-49.492531,445.735321,20.294441,-12.380395,77.997650,230.865921,147.672729
187.189468,89548.960938,74433.585938,142836.250000,28729.320312,26998.513672,107352.179688,-47.678730,13.358541,964.234131,74.517166,...,18.119425,144.460297,-49.831276,-14.251887,363.565735,42.650570,29.225389,41.909229,-48.261032,96.216087
187.197693,85100.273438,69878.351562,149342.781250,32437.927734,31116.955078,105052.789062,18.279118,42.405369,701.323181,98.870407,...,172.429169,892.604736,204.954880,-157.672226,217.989502,120.296196,215.177963,206.483566,103.814499,162.783371
187.208221,87624.460938,72880.015625,145184.968750,48090.003906,46063.785156,111395.398438,78.057076,17.310251,1029.988525,92.421181,...,41.120106,157.431290,224.702728,-38.830032,375.092560,177.576324,407.668121,370.730530,77.303802,-7.522663


## References

* Spidlen J, Breuer K, Rosenberg C, Kotecha N and Brinkman RR. FlowRepository - A Resource of Annotated Flow Cytometry Datasets Associated with Peer-reviewed Publications. Cytometry A. 2012 Sep; 81(9):727-31
* Neumann, J., Prezzemolo, T., Vanderbeke, L., Roca, C. P., Gerbaux, M., Janssens, S., ... & Yserbyt, J. (2020). Increased IL‐10‐producing regulatory T cells are characteristic of severe cases of COVID‐19. Clinical & translational immunology, 9(11), e1204.