In [1]:
import json
import platform
from pathlib import Path
from datetime import datetime, timedelta, date, time
import math

import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option("display.precision", 5)
from tqdm import tqdm
import teselagen
from teselagen.api import TeselaGenClient

print("teselagen version: ", teselagen.__version__)

teselagen version:  0.4.0


# ETL Process
Here we perform a series of steps that will get the data originally stored in the Amber250 excel file into CSV files formatted for TEST imports

## Parse Amber250 Data Export Excel File

The Amber250 Excel file has multiple sheets with the measurements of every Assay Subject (HT1 to HT12)

Here we are going to read each sheet and create a CSV file for each of them

In [2]:
# Read the excel spreadsheet file (here we use pandas)
excel_filepath = "./data/ABFr_3HP_ambr_01.xlsx"
# 'subjects_data' it's a dictionary we'll be using to store data split by the different subjects (HT1 to HT12).
subjects_data = {}
reader = pd.ExcelFile(excel_filepath, engine="openpyxl")
for subject_name in reader.sheet_names:
    print(subject_name)
    subject_df = pd.read_excel(excel_filepath, sheet_name=subject_name, engine="openpyxl")
    # We set a Subject ID Index called Reactor
    subject_df.insert(0, "Reactor", subject_name)
    subjects_data[subject_name] = subject_df

HT1
HT2
HT3
HT4
HT5
HT6
HT7
HT8
HT9
HT10
HT11
HT12


In [3]:
# Display the different dataframes and their shape stored in our subjects_data dictionary and an example of one of them
for subject_name, subject_df in subjects_data.items():
    print(subject_name, subject_df.shape)

HT1 (51199, 31)
HT2 (51519, 31)
HT3 (51520, 31)
HT4 (51536, 31)
HT5 (51547, 31)
HT6 (51557, 31)
HT7 (51571, 31)
HT8 (51584, 31)
HT9 (51583, 31)
HT10 (51602, 31)
HT11 (51620, 31)
HT12 (51607, 31)


## Transform the data into a format suitable for a TEST import. 

Define the measurement units: TEST data import require measurements to have a unit associated to its values. The Amber250 spreadsheet however does not export this information, so we are manually going to define the units based on offline documents ABPDU has shared.

Process the data: TEST data import supports data with multiple measurements, however in this case each measurement comes with its own independent time column.

So we'll have to split each measurement into its own file and import them separately into the assay.

In [4]:
# Here we define each measurement's units. Units are very important when importing scientific data into TEST.
measurement_units = {
    "Time": "hrs",
    "Acid volume pumped": "mL",
    "Air flow": "lpm",
    "Antifoam volume pumped": "mL",
    "Base volume pumped": "mL",
    "CER": "dimensionless",
    "DO": "dimensionless",
    "Feed#1 volume pumped": "mL",
    "OUR": "dimensionless",
    "pH": "dimensionless",
    "RQ": "dimensionless",
    "Stir speed": "rpm",
    "Temperature": "C",
    "Volume": "L",
    "Volume - sampled": "L",
    "Volume of inocula": "L"
}


### Build a data decimation or downsampling function
We will use this function to perform data decimation for each of our measurement tables.

This is not mandatory but it will make the import times faster and database storage lighter

In [5]:
def decimate_data(dataframe: pd.DataFrame, sampling_period: str = None,  max_datapoints: int = 100, with_datetime: bool = False, verbose: bool = False) -> pd.DataFrame:
    
    # Create a date time column for decimation with pandas resample. We only have "hour" information, thus a dummy date will be used.
    dataframe["DateTime"] = dataframe.Time.apply(lambda hours: datetime.combine(date.today(), time()) + timedelta(hours=hours))
    # Compute the time span in minutes.
    minutes_span=(dataframe["DateTime"].max()-dataframe["DateTime"].min()).total_seconds() / 60

    # Compute the sampling period automatially based on the number of datapoints wanted (defaulted to 200).
    if minutes_span > max_datapoints and sampling_period is None:
        minutes_step = math.floor(minutes_span / max_datapoints)
        dataframe.set_index("DateTime", inplace=True)
        dataframe=dataframe.resample(rule=f"{minutes_step}Min").first().dropna().reset_index(drop=not with_datetime)
        if verbose:
            print("minutes_span", minutes_span)
            print("minutes_step", minutes_step)
            print("datapoints left", dataframe.shape[0])
    elif sampling_period is not None: 
        dataframe.set_index("DateTime", inplace=True)
        dataframe=dataframe.resample(rule=sampling_period).first().dropna().reset_index(drop=not with_datetime)
    else:
        dataframe.set_index("DateTime", inplace=True)
        dataframe.dropna().reset_index(drop=not with_datetime)
    
    return dataframe

### Construct a data table (dataframe) for each measurement constructed with the measurement data from all the subjects.
The result will be one table per measurement column, with the following columns: Reactor, Time, Time Unit, "MeasurementName", "MeasurementName" Unit.

In [6]:
# Every measurement in the data comes with its own Time column which in turn has its own time values,
# this prevents us from importing the different measurements together at once because they are not associated or aligned against a unique reference dimension.
# Fortunately we can split them into different one measurement tabular files and import them separately.

# Here we store an auxiliary list of all the columns in the data. (all subjects have the same columns so we'll just get them from the first one)
column_names = list(subjects_data.values())[0].columns.values
# 'measurements_data' is a dictionary mapping the different measurement names to their dataframes.
measurements_data = {}
measurements_metadata = {}
# Loop through each measurement in the data
for measurement_number, measurement_name in enumerate(column_names[slice(2, None, 2)]):
    # if measurement_name == "Base volume pumped": break
    measurement_df = None
    # Loop through the data of every subject
    for subject_name, subject_df in subjects_data.items():
        # if subject_name == "HT1": continue
        # elif subject_name == "HT3": break
        # Get the current measurement values
        measurement_values = subject_df[measurement_name]
        # Get the current timepoint for the above vslues
        measurement_timepoints = subject_df[column_names[2 * (measurement_number + 1) - 1]]
        
        # This skips any measurement with no values (e.g., 'Antifoam volume pumped').
        if(measurement_values.dropna().shape[0] == 0): continue
        
        # Here each measurement result table is being created.
        subject_measurement_df = pd.DataFrame({
            "Reactor": subject_df["Reactor"],
            "Time": measurement_timepoints,
            "Time units": measurement_units["Time"],
            measurement_name: measurement_values,
            f"{measurement_name} units": measurement_units[measurement_name]
        })
        # measurements_data.append(measurement_df)
        # Drops any rows with NaN values.
        subject_measurement_df.dropna(inplace=True)
        
        # Decimate data automatically based on the measurement time span.
        subject_measurement_df = decimate_data(dataframe=subject_measurement_df)
                
        if measurement_df is None:
            measurement_df = subject_measurement_df
        else:
            measurement_df = measurement_df.append(subject_measurement_df)
    
    if measurement_df is not None: measurements_data[measurement_name] = measurement_df

In [7]:
# Display the different dataframes and their shape stored in our subjects_data dictionary and an example of one of them
for measurement, measurement_df in measurements_data.items():
    print(measurement, measurement_df.shape)
# measurements_data['DO']

Acid volume pumped (170, 5)
Air flow (1200, 5)
Base volume pumped (204, 5)
CER (1212, 5)
DO (1212, 5)
Feed#1 volume pumped (105, 5)
OUR (1217, 5)
pH (1188, 5)
RQ (1217, 5)
Stir speed (1212, 5)
Temperature (846, 5)
Volume (1200, 5)
Volume - sampled (180, 5)
Volume of inocula (12, 5)


In [8]:
# Once we have each subject's measurements separated, we proceed to generate a CSV file for each of these dataframes.
# We are going to use these CSVs later on once we call TEST API.

# Here we export these dataframes as CSV files into the directories created for the subjects subject (HT1 to HT12).
measurements_data_dir = Path("./data/measurements")
Path(measurements_data_dir).mkdir(parents=True, exist_ok=True)
# Loop through the data of every subject
for measurement_name, measurement_data in measurements_data.items():
    measurement_data_file = measurements_data_dir / Path(f"{measurement_name}.csv")
    # Creates the directory in case is does not exist.
    measurement_data.to_csv(measurement_data_file, index=False)

# Connect to the TeselaGen Client

Here we are going to import the TeselaGenClient from the 'teselagen' Python package.

We are going to create a client instance and call its 'login' method with valid credentials.

Then we are going to select the Laboratory we are going to want to work in.

In [9]:
#host_url="http://host.docker.internal:3000"
host_url="https://pr-app-7840.teselagen.net/"
client = TeselaGenClient(host_url=host_url, module_name="test")
client.login(
    username='test@teselagen.com',
    # apiKey='' # Get your API Key from the TeselaGen web Application at: Settings > API Key > Generate CLI API Key
)

Connection Accepted


## Select Laboratory/Project

In [10]:
## Fetch My Laboratories
display(client.get_laboratories())

## Select a Laboratory
client.select_laboratory(lab_name='The Test Lab')
#client.unselect_laboratory()

[{'id': '1', 'name': 'The Test Lab'}]

Selected Lab: The Test Lab


# Prepare Laboratory Environment
### Before importing data, we first prepare our new Laboratory environment.

1) Create TEST **data types** according to the multiomics files. These are used to map the different data file headers.
   The different data type records we are going to create are:
   
      b. Mesurement Target (names and decribes each measurement)
      c. Assay Subject Class (class or category of the subject under evaluation (i.e., the bioreactor) ) 
      d. Reference Dimension (Time dimension)
      e. Unit (different used units across measurements)

2) Create an experiment, this will be the scope of our files and assay measurements.

## 1. Creating Data Types
Here we are going to create all the necessary data type records needed according to the Ambr250 file headers.
In TEST, data type records are strictly related to the mapping and understanding of each column of tabular data. There are different **types** of data types (refer to [Metadata Documentation](https://docs.teselagen.com/en/articles/4508837-test-data-mapping)).

One way of understanding TEST data type records is that these are used to **map** (i.e., give meaning) to columns in tabular data, much like tabular headers do but in a more structured and organized manner.

The following Notebook cells show how to create these data type records. For each record created, an ID will be returned. These IDs will be particularly important when creating the different **mappers** (array of structured headers) used to import the tabular data into TEST data lake.

### Load: Unit Dimensions
Units are very important when when getting your data structured. TEST units are composed of the **unit name** (or symbol) and associated with a physical **unit dimension** (e.g., Time, Volume, Pressure, etc).

In [11]:
# Unit dimensions of each of the above units.
colum_unit_dimensions = [
    {'name':'Elapsed Time'},
    {'name':'Volume'},
    {'name':'Volumetric Flow Rate'}, 
    {'name':'Rotational Speed'}, 
    {'name':'Temperature'},
    {'name':'Dimensionless'} # Dummy dimension for dimensionless measurements (s.a., OD)
]
unitDimensions = client.test.create_metadata(metadataType="unitDimension", metadataRecord=colum_unit_dimensions)
unitDimensionNameToId = {unitDimension['name']: unitDimension['id'] for unitDimension in unitDimensions}
unitDimensionNameToId

{'Elapsed Time': '1',
 'Volume': '2',
 'Volumetric Flow Rate': '9',
 'Rotational Speed': '7',
 'Temperature': '8',
 'Dimensionless': '6'}

### Load: Unit Scales
Unit scales are another useful feature of TEST Units. They provide a way to group different units together in order to convert measurement from one to another interchangeably when doing data analysis.

These however are not strictly required and can be omitted.

In [12]:
# Unit scales for the above unit dimensions.
colum_unit_scales = [
    {'name':'Elapsed time scale', 'unitDimensionId': unitDimensionNameToId['Elapsed Time']},
    {'name':'Volume scale', 'unitDimensionId': unitDimensionNameToId['Volume']},
    {'name':'Volumetric flow rate scale', 'unitDimensionId': unitDimensionNameToId['Volumetric Flow Rate']},
    {'name':'Rotational Speed scale', 'unitDimensionId': unitDimensionNameToId['Rotational Speed']},
    {'name':'Temperature scale', 'unitDimensionId': unitDimensionNameToId['Temperature']},
    {'name':'Dimensionless scale', 'unitDimensionId': unitDimensionNameToId['Dimensionless']}
]
unitScales = client.test.create_metadata(metadataType='unitScale', metadataRecord=colum_unit_scales)
unitScaleNameToId = {unitScale['name']: unitScale['id'] for unitScale in unitScales}
unitScaleNameToId

{'Elapsed time scale': '7',
 'Volume scale': '8',
 'Volumetric flow rate scale': '9',
 'Rotational Speed scale': '10',
 'Temperature scale': '11',
 'Dimensionless scale': '12'}

### Load: Units
As explained above, these are the unit names or symbols.

In [13]:
print(list(measurement_units.values()))

['hrs', 'mL', 'lpm', 'mL', 'mL', 'dimensionless', 'dimensionless', 'mL', 'dimensionless', 'dimensionless', 'dimensionless', 'rpm', 'C', 'L', 'L', 'L']


In [14]:
# Unit names (aliases) for each data column.
column_units = [
    {'name':'hrs', 'unitScaleId': unitScaleNameToId['Elapsed time scale']},
    {'name':'mL', 'unitScaleId': unitScaleNameToId['Volume scale']},
    {'name':'lpm', 'unitScaleId': unitScaleNameToId['Volumetric flow rate scale']},
    {'name':'rpm', 'unitScaleId': unitScaleNameToId['Rotational Speed scale']},
    {'name':'C', 'unitScaleId': unitScaleNameToId['Temperature scale']},
    {'name':'dimensionless', 'unitScaleId': unitScaleNameToId['Dimensionless scale']},
]
units = client.test.create_metadata(metadataType='unit', metadataRecord=column_units)
unitNameToId = {unit['name']: unit['id'] for unit in units}
unitNameToId

{'hrs': '1',
 'mL': '18',
 'lpm': '15',
 'rpm': '14',
 'C': '16',
 'dimensionless': '17'}

### Load: Assay Subject Class
TEST **Assay Subject Class** type is of great importance. This supports the classification of the subjects under evaluation or under measure. Every measurement needs to be linked to an Assay Subject. It allows consistency,traceability and organization for you data.

within the scope of your **TEST Laboratory**, you can be studying different types of subjects (e.g., Bacterial Strains, Enzyme mutants, Bioreactor Runs, etc.). Assay Subject Class provides a way to classify them or group them accordingly.

Here we are just going to create a single Assay Subject Class called **Run**, which will be the class of al Ambr250 Runs, from HT1 to HT12.

In [15]:
# Assay Subject corresponds to the "ID" of the "object" under evaluation.
assay_subject_class=[{'name':'Reactor', 'description': 'Ambr250 Reactor'}]
assaySubjectClasses = client.test.create_metadata(metadataType='assaySubjectClass', metadataRecord=assay_subject_class)
assaySubjectClassNameToId = {assaySubjectClass['name']: assaySubjectClass['id'] for assaySubjectClass in assaySubjectClasses}
assaySubjectClassNameToId

{'Reactor': '12'}

### Load: Reference Dimension
Measurements are usually taken against a reference dimension. The most common being **Time**. TEST Reference Dimension is just that, you will hardly ever need to create a new Reference Dimension but this allows to associated each of your measurements against a reference.

Think it as a 2D plot, where your measurement corresponds to the Y-Axis, and your reference to the X-Axis.

In [16]:
# Reference Dimensions correspond to an independent variable against which your measurements are taken.
reference_dimensions=[
    {'name':'Elapsed Time', 'description': 'Time represented as elapsed hours.', "unitDimensionId":1}
]
referenceDimensions = client.test.create_metadata(metadataType='referenceDimension', metadataRecord=reference_dimensions)
referenceDimensionNameToId = {referenceDimension['name']: referenceDimension['id'] for referenceDimension in referenceDimensions}
referenceDimensionNameToId

{'Elapsed Time': '1'}

### Load: Measurement Targets
Usually organizational data becomes messy or difficult to align across the many scientist recording their own measurements into sometimes losely-structured spreadsheets with non-standardized measurement column names. 

This last TEST data type tackles just that, and each measurement needs to be mapped to a **Measurement Target**. This way every piece of data that gets imported into TEST Data Lake, will be structured and easily comprehensible by the organization.

In [17]:
# These correspond to the "Targets" being measured in this data. 
# Each of them are assigned a 'name'
# and also a 'description' that can help clarify what the measurement target is to the organization.
measurement_targets = [{"name": measurement_name, "description": "edit"} for measurement_name in column_names[slice(2,None,2)]]
measurementTargets = client.test.create_metadata(metadataType='measurementTarget', metadataRecord=measurement_targets)
measurementTargetNameToId = {measurementTarget['name']: measurementTarget['id'] for measurementTarget in measurementTargets}
measurementTargetNameToId

{'Acid volume pumped': '53',
 'Air flow': '50',
 'Antifoam volume pumped': '45',
 'Base volume pumped': '47',
 'CER': '49',
 'DO': '46',
 'Feed#1 volume pumped': '48',
 'OUR': '51',
 'pH': '59',
 'RQ': '52',
 'Stir speed': '58',
 'Temperature': '56',
 'Volume': '57',
 'Volume - sampled': '55',
 'Volume of inocula': '54'}

## 2. Create Experiment (Study)
Experiments are part of TEST organizational hierarchy. These live under a Laboratory scope and can be used to group many **Assays** together.

For this data, we are going to create an Experiment where we're going to store all our assay files and data we built above.

In [18]:
## This will create a new Experiment. The output will give as the Experiment ID that we'll be using later.
experiment_name="Ambr250"
experiment = client.test.create_experiment(experiment_name=experiment_name)
print(experiment)
experiment_id = experiment['id']

{'id': '24', 'name': 'Ambr250'}


# TEST Data Import
Now that the Laboratory has been prepared, we are ok to begin the data import process.

We are going to import the data we split into assays into TEST Data Lake.

1) Create **mapper** objects for each measurement results. These will map the **metadata** we created with the columns in the data.

2) Import the data using the stored CSV along with the **mapper** object

## Mapper Object
Mapper Objects are always hand in hand with tabular data when importing into TEST Data Lake. These mappers are used to assign a TEST data type (like the ones we created above) to each of the data columns. This way data in TEST Data Lake is always kept structured, comprehensible, traceable and organized.

A mapper object is simply a list or an array of a few key/value properties:
  - name: this is the name of the column
  - class: this is the data type class (e.g., assaySubjectClass, measurementTarget or unit).
  - subClass: this is the data tybe subclass (e.g., "Reactor" for assaySubjectClasses, or "Elapsed Time" for referenceDimensions)

In [19]:
# Since all of the assays have the same tabular format, each .
mapper_objects = {}
for measurement_name in measurements_data.keys():
    mapper_object = [
        {
            "name": "Reactor", # Name of the column containing the Run Names (HT1 to HT12) 
            "class": "assaySubjectClass", # TEST Assay Subject Class data type.
            "subClass": assaySubjectClassNameToId["Reactor"] # ID of the "Reactor" data type record.
        },
        {
            "name": "Time", # Name of the column containing the measurement times.
            "class": "referenceDimension", # TEST Reference Dimension data type.
            "subClass": referenceDimensionNameToId['Elapsed Time'] # ID of the referenceDimension data type record.
        },
        {
            "name": "Time units", # Name of the column containing the unit of time used for each measurement.
            "class": "d-unit",
            # ID of the referenceDimension metadata record.
            # This is in order to assign this "Unit" column to the Time column measurements.
            "subClass": referenceDimensionNameToId['Elapsed Time']
        },
        {
            "name": measurement_name,
            "class": "measurementTarget",
            "subClass": measurementTargetNameToId[measurement_name]
        },
        {
            "name": f"{measurement_name} units",
            "class": "unit",
            "subClass": measurementTargetNameToId[measurement_name]
        }
    ]
    mapper_objects[measurement_name] = mapper_object

In [20]:
# Let's visualize how the Mapper objects look like.
display(mapper_objects[measurement_name])

[{'name': 'Reactor', 'class': 'assaySubjectClass', 'subClass': '12'},
 {'name': 'Time', 'class': 'referenceDimension', 'subClass': '1'},
 {'name': 'Time units', 'class': 'd-unit', 'subClass': '1'},
 {'name': 'Volume of inocula', 'class': 'measurementTarget', 'subClass': '54'},
 {'name': 'Volume of inocula units', 'class': 'unit', 'subClass': '54'}]

## Use the mapper objects together with the measurement files to import its data into TEST Data Lake

In [21]:
# This will loop though the measurement CSVs and submit an import proecss for each of those into the same Assay.
# Each of these submissions returns back an 'importId' that we can use to query the state/progress of the import process.
# Acid volume pumped (170, 5)
# Air flow (1200, 5)
# Base volume pumped (204, 5)
# CER (1212, 5)
# DO (1212, 5)
# Feed#1 volume pumped (105, 5)
# OUR (1217, 5)
# pH (1188, 5)
# RQ (1217, 5)
# Stir speed (1212, 5)
# Temperature (846, 5)
# Volume (1200, 5)
# Volume - sampled (180, 5)
# Volume of inocula (12, 5)
import_responses = []
measurements_dir = Path("./data/measurements")
assay_name = "Amber250 Assay 4"
for measurement_name in measurements_data.keys():
    # if measurement_name not in ['Volume - sampled']:
    #     continue
    assay_result_filepath = measurements_dir / Path(f"{measurement_name}.csv")
    # assay_name = f"{measurement_name} Assay"
    import_response = client.test.import_assay_results(
        filepath=assay_result_filepath,
        assay_name=assay_name,
        experiment_id=experiment_id,
        mapper=mapper_objects[measurement_name]
    )
    print(f"Import ID={import_response['importId']} processing {measurement_name} data file into '{assay_name}' assay.")
    import_responses.append(import_response)

Import ID=25 processing Acid volume pumped data file into 'Amber250 Assay 4' assay.
Import ID=26 processing Air flow data file into 'Amber250 Assay 4' assay.
Import ID=27 processing Base volume pumped data file into 'Amber250 Assay 4' assay.
Import ID=28 processing CER data file into 'Amber250 Assay 4' assay.
Import ID=29 processing DO data file into 'Amber250 Assay 4' assay.
Import ID=30 processing Feed#1 volume pumped data file into 'Amber250 Assay 4' assay.
Import ID=31 processing OUR data file into 'Amber250 Assay 4' assay.
Import ID=32 processing pH data file into 'Amber250 Assay 4' assay.
Import ID=33 processing RQ data file into 'Amber250 Assay 4' assay.
Import ID=34 processing Stir speed data file into 'Amber250 Assay 4' assay.
Import ID=35 processing Temperature data file into 'Amber250 Assay 4' assay.
Import ID=36 processing Volume data file into 'Amber250 Assay 4' assay.
Import ID=37 processing Volume - sampled data file into 'Amber250 Assay 4' assay.
Import ID=38 processing

In [25]:
# Here we show how we can query the state/progress of an import process for one of the submitted ones above.
import_process = client.test.get_assay_results_import_status(importId=38)
display(import_process)

{'url': 'https://pr-app-7840.teselagen.net//test/cli-api/assays/results/import/38',
 'status': True,
 'content': {'importId': '38',
  'assayId': '23',
  'status': {'code': 'FINISHED', 'description': 'Importer job finished'},
  'message': None}}

In [23]:
display(client.test.get_assays())
12*13

[{'id': '1',
  'name': 'Example Assay',
  'experiment': {'id': '1', 'name': 'Example Experiment'}},
 {'id': '17',
  'name': 'Wild Type Optical Density',
  'experiment': {'id': '22', 'name': 'Multiomics data for WT Strain'}},
 {'id': '18',
  'name': 'Wild Type External Metabolites',
  'experiment': {'id': '22', 'name': 'Multiomics data for WT Strain'}},
 {'id': '19',
  'name': 'Wild Type Transcriptomics',
  'experiment': {'id': '22', 'name': 'Multiomics data for WT Strain'}},
 {'id': '20',
  'name': 'Wild Type Proteomics',
  'experiment': {'id': '22', 'name': 'Multiomics data for WT Strain'}},
 {'id': '21',
  'name': 'Wild Type Metabolomics',
  'experiment': {'id': '22', 'name': 'Multiomics data for WT Strain'}},
 {'id': '22',
  'name': 'Isoprenol Production',
  'experiment': {'id': '23', 'name': 'Multiomics BE strains data'}},
 {'id': '23',
  'name': 'Amber250 Assay 4',
  'experiment': {'id': '24', 'name': 'Ambr250'}}]

156

In [26]:
results_with_subject_data=client.test.get_assay_results(assay_id=23, as_dataframe=True, with_subject_data=True)
pd.DataFrame(results_with_subject_data)

Using the 'page_size' argument for pagination is advised (default page_size=200).
Using the 'page_number' argument for pagination is advised (default page_number=1).


Unnamed: 0,assayId,fileId,data
0,23,30,Subject ID Subject Name Subject Class Ela...
1,23,38,Subject ID Subject Name Subject Class Elap...


In [None]:
pd.DataFrame(results_with_subject_data[3]["data"])
