In [1]:
import polars as pl

db_uri = 'postgresql://pharmbio_readonly:readonly@imagedb-pg-postgresql.services.svc.cluster.local/imagedb'

query = """
        SELECT project
        FROM image_analyses_per_plate
        GROUP BY project
        ORDER BY project 
        """

# Query database and store result in Polars dataframe
df_projects = pl.read_database(query, db_uri)

df_projects.head(5)

project
str
"""160621-Wash-Op…"
"""2020_11_04_CPJ…"
"""24OHC-v1"""
"""A549-VictorChi…"
"""Aish"""


In [2]:
import polars as pl
from collections import Counter

db_uri = 'postgresql://pharmbio_readonly:readonly@imagedb-pg-postgresql.services.svc.cluster.local/imagedb'

NameContains = 'AROS-'
query = f"""
        SELECT *
        FROM image_analyses_per_plate
        WHERE project ILIKE '%%{NameContains}%%'
        AND meta->>'type' = 'cp-qc'
        AND analysis_date IS NOT NULL
        ORDER BY plate_barcode 
        """

# Query database and store result in Polars dataframe
df_cp_results = pl.read_database(query, db_uri)

# df_cp_results['analysis_id'].to_list()
# counter = Counter(df_cp_results['plate_barcode'].to_list())
# [item for item, count in counter.items() if count > 1]
df_cp_results.unique('project')['project'].to_list()

['AROS-CP', 'AROS-Reproducibility-MoA-Full']

In [3]:
from collections import Counter

# your list
my_list = ['apple', 'banana', 'apple', 'pear', 'banana', 'kiwi']

# count the occurrences of each item
counter = Counter(my_list)

print(counter)

Counter({'apple': 2, 'banana': 2, 'pear': 1, 'kiwi': 1})


In [4]:
import polars as pl

db_uri = "postgresql://pharmbio_readonly:readonly@imagedb-pg-postgresql.services.svc.cluster.local/imagedb"

NameContains = "AROS-R"
query = f"""
        SELECT *
        FROM image_analyses_per_plate
        WHERE project LIKE '{NameContains}%%'
        AND meta->>'type' = 'cp-qc'
        AND analysis_date IS NOT NULL
        ORDER BY plate_barcode 
        """

# Query database and store result in Polars dataframe
df_cp_results = pl.read_database(query, db_uri)

# Check for duplicates
duplicates = df_cp_results.filter(pl.col("plate_barcode").is_duplicated())

if not duplicates.is_empty():
    # Group the duplicated data by 'plate_barcode' and count the occurrences
    grouped_duplicates = duplicates.groupby("plate_barcode")
    for name, group in grouped_duplicates:
        print(
            f"The plate with barcode {name} is replicated {len(group)} times with analysis_id of {group['analysis_id'].to_list()}"
        )

df_cp_results.n_unique("plate_barcode")

df_cp_results

The plate with barcode P013726 is replicated 2 times with analysis_id of [3249, 3241]


project,plate_barcode,plate_acq_name,plate_acq_id,analysis_id,analysis_date,analysis_error,meta,pipeline_name,results
str,str,str,i32,i32,str,str,str,str,str
"""AROS-Reproduci…","""P013725""","""P013725""",3072,3248,"""2023-03-23""",,"""{""priority"":nu…","""384-96_QC-batc…","""/share/data/ce…"
"""AROS-Reproduci…","""P013726""","""P013726""",3073,3249,"""2023-03-23""",,"""{""priority"":nu…","""384-96_QC-batc…","""/share/data/ce…"
"""AROS-Reproduci…","""P013726""","""P013726""",3073,3241,"""2023-03-23""",,"""{""priority"":nu…","""384-96_QC-batc…","""/share/data/ce…"


In [5]:
# keeping the highet analysis_id value of replicated rows
df_cp_results.sort("analysis_id", descending=True).unique('plate_barcode', keep='first').sort("analysis_id")

project,plate_barcode,plate_acq_name,plate_acq_id,analysis_id,analysis_date,analysis_error,meta,pipeline_name,results
str,str,str,i32,i32,str,str,str,str,str
"""AROS-Reproduci…","""P013725""","""P013725""",3072,3248,"""2023-03-23""",,"""{""priority"":nu…","""384-96_QC-batc…","""/share/data/ce…"
"""AROS-Reproduci…","""P013726""","""P013726""",3073,3249,"""2023-03-23""",,"""{""priority"":nu…","""384-96_QC-batc…","""/share/data/ce…"


In [6]:
# drop rows by analysis_id
df_cp_results.filter(~pl.col('analysis_id').is_in([475, 471, 479]))

project,plate_barcode,plate_acq_name,plate_acq_id,analysis_id,analysis_date,analysis_error,meta,pipeline_name,results
str,str,str,i32,i32,str,str,str,str,str
"""AROS-Reproduci…","""P013725""","""P013725""",3072,3248,"""2023-03-23""",,"""{""priority"":nu…","""384-96_QC-batc…","""/share/data/ce…"
"""AROS-Reproduci…","""P013726""","""P013726""",3073,3249,"""2023-03-23""",,"""{""priority"":nu…","""384-96_QC-batc…","""/share/data/ce…"
"""AROS-Reproduci…","""P013726""","""P013726""",3073,3241,"""2023-03-23""",,"""{""priority"":nu…","""384-96_QC-batc…","""/share/data/ce…"


In [7]:
# keep rows by analysis_id
df_cp_results.filter(pl.col('analysis_id').is_in([475, 471, 479]))

project,plate_barcode,plate_acq_name,plate_acq_id,analysis_id,analysis_date,analysis_error,meta,pipeline_name,results
str,str,str,i32,i32,str,str,str,str,str


In [8]:
import polars as pl
import os

def get_file_extension(filename):
    """Helper function to get file extension"""
    possible_extensions = ['.parquet', '.csv', '.tsv']
    for ext in possible_extensions:
        full_filename = filename + ext
        if os.path.isfile(full_filename):
            return ext
    print(f'Warning: File {filename} with extensions {possible_extensions} not found.')
    return None

def read_file(filename, extension):
    """Helper function to read file based on its extension"""
    if extension == '.parquet':
        return pl.read_parquet(filename + extension)
    elif extension in ['.csv', '.tsv']:
        delimiter = ',' if extension == '.csv' else '\t'
        return pl.read_csv(filename + extension, separator=delimiter)
    return None

# Filter out rows with specific analysis_id
df_filtered_results = df_cp_results.sort("analysis_id", descending=True).unique('plate_barcode', keep='first').sort("analysis_id")

# Add qc-file column based on 'results' and 'plate_barcode' columns
df_filtered_results = df_filtered_results.with_columns(
    (pl.col('results') + 'qcRAW_images_'+ pl.col('plate_barcode')).alias('qc-file')
)

print(f"Quality control data of {df_filtered_results.height} plates imported:\n")

# Read and process all the files in a list, skipping files not found
dfs = []
for row in df_filtered_results.iter_rows(named=True):
    ext = get_file_extension(row['qc-file'])
    print(f"\t{row['qc-file']}{ext}")
    if ext is not None:
        df = read_file(row['qc-file'], ext)
        df = df.with_columns(
            pl.lit(row['plate_acq_id']).alias('Metadata_AcqID'),
            pl.lit(row['plate_barcode']).alias('Metadata_Barcode')
        )
        dfs.append(df)

# Concatenate all the dataframes at once
df_concatenated_files = pl.concat(dfs, how='vertical')

df_concatenated_files

Quality control data of 2 plates imported:

	/share/data/cellprofiler/automation/results/P013725/3072/3248/qcRAW_images_P013725.parquet
	/share/data/cellprofiler/automation/results/P013726/3073/3249/qcRAW_images_P013726.parquet


AreaOccupied_AreaOccupied_nuclei,AreaOccupied_Perimeter_nuclei,AreaOccupied_TotalArea_nuclei,Count_nuclei,ExecutionTime_01LoadData,ExecutionTime_02MeasureImageQuality,ExecutionTime_03FlagImage,ExecutionTime_04MeasureImageQuality,ExecutionTime_05FlagImage,ExecutionTime_06MeasureImageQuality,ExecutionTime_07FlagImage,ExecutionTime_08MeasureImageQuality,ExecutionTime_09FlagImage,ExecutionTime_10MeasureImageQuality,ExecutionTime_11FlagImage,ExecutionTime_12IdentifyPrimaryObjects,ExecutionTime_13MeasureImageAreaOccupied,ExecutionTime_14MeasureObjectSizeShape,ExecutionTime_15CalculateMath,ExecutionTime_16CalculateMath,FileName_CONC,FileName_HOECHST,FileName_MITO,FileName_PHAandWGA,FileName_SYTO,Group_Index,Group_Length,Group_Number,Height_CONC,Height_HOECHST,Height_MITO,Height_PHAandWGA,Height_SYTO,ImageNumber,ImageQuality_Correlation_CONC_10,ImageQuality_Correlation_CONC_30,ImageQuality_Correlation_HOECHST_200,…,StDev_nuclei_AreaShape_Zernike_8_2,StDev_nuclei_AreaShape_Zernike_8_4,StDev_nuclei_AreaShape_Zernike_8_6,StDev_nuclei_AreaShape_Zernike_8_8,StDev_nuclei_AreaShape_Zernike_9_1,StDev_nuclei_AreaShape_Zernike_9_3,StDev_nuclei_AreaShape_Zernike_9_5,StDev_nuclei_AreaShape_Zernike_9_7,StDev_nuclei_AreaShape_Zernike_9_9,StDev_nuclei_Location_Center_X,StDev_nuclei_Location_Center_Y,StDev_nuclei_Location_Center_Z,Threshold_FinalThreshold_nuclei,Threshold_GuideThreshold_nuclei,Threshold_OrigThreshold_nuclei,Threshold_SumOfEntropies_nuclei,Threshold_WeightedVariance_nuclei,URL_CONC,URL_HOECHST,URL_MITO,URL_PHAandWGA,URL_SYTO,Width_CONC,Width_HOECHST,Width_MITO,Width_PHAandWGA,Width_SYTO,qc_flag_rawCONC_Blurred,qc_flag_rawCONC_Saturated,qc_flag_rawHOECHST_Blurry,qc_flag_rawHOECHST_Saturated,qc_flag_rawMITO_Blurry,qc_flag_rawMITO_Saturated,qc_flag_rawPHAandWGA_Blurry,qc_flag_rawPHAandWGA_Saturated,qc_flag_rawSYTO_Blurred,qc_flag_rawSYTO_Saturated
f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,str,str,str,str,str,i32,i32,i32,i32,i32,i32,i32,i32,i32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,str,str,str,str,str,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32
773018.0,56064.0,9e6,261.0,4.65,26.85,0.0,30.790001,0.01,31.780001,0.0,27.940001,0.01,27.879999,0.0,15.26,0.16,15.69,0.01,0.0,"""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…",0,9,1,3000,3000,3000,3000,3000,1,0.654431,0.239239,0.025578,…,0.00371,0.004254,0.003217,0.002771,0.003303,0.00297,0.002973,0.002408,0.001293,930.271423,896.320251,0.0,0.092389,0.122753,0.039247,-11.759879,0.424398,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
420668.0,31480.0,9e6,150.0,3.41,27.52,0.01,29.57,0.0,30.93,0.0,26.34,0.0,26.219999,0.0,14.54,0.12,14.05,0.0,0.0,"""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…",1,9,1,3000,3000,3000,3000,3000,2,0.691455,0.252629,0.00696,…,0.003681,0.004384,0.003087,0.002918,0.003283,0.002957,0.002676,0.002268,0.001368,856.213867,868.443787,0.0,0.083361,0.113877,0.025159,-11.736903,0.324984,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,1,0,1,0,0,0,0,0,0,0
272083.0,20641.0,9e6,100.0,2.32,26.780001,0.01,28.709999,0.0,30.98,0.0,27.23,0.01,25.73,0.01,14.27,0.1,13.49,0.0,0.0,"""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…",2,9,1,3000,3000,3000,3000,3000,3,0.686241,0.26769,0.003846,…,0.003868,0.004274,0.003138,0.002933,0.003128,0.002984,0.002795,0.002032,0.001511,838.988281,818.611145,0.0,0.083149,0.115437,0.019271,-11.659938,0.267819,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,1,0,1,0,0,0,0,0,0,0
834700.0,60897.0,9e6,285.0,1.72,27.67,0.01,27.76,0.0,32.25,0.01,27.219999,0.01,26.76,0.0,14.9,0.17,15.6,0.0,0.0,"""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…",3,9,1,3000,3000,3000,3000,3000,4,0.654919,0.241246,0.029194,…,0.003802,0.004404,0.002978,0.002822,0.003251,0.002716,0.002711,0.002284,0.001346,796.049194,881.816711,0.0,0.096945,0.129696,0.040707,-11.841964,0.482744,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
633936.0,48049.0,9e6,234.0,1.61,27.65,0.01,27.41,0.01,31.16,0.01,26.92,0.01,26.9,0.0,14.71,0.14,14.9,0.0,0.0,"""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…",4,9,1,3000,3000,3000,3000,3000,5,0.693417,0.278889,0.037512,…,0.003903,0.004473,0.003061,0.002799,0.003212,0.002751,0.002696,0.002152,0.001421,782.142029,753.385254,0.0,0.09624,0.129909,0.034037,-11.73406,0.46529,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
393420.0,28702.0,9e6,136.0,1.72,29.16,0.0,27.690001,0.0,31.459999,0.0,26.610001,0.01,26.780001,0.0,14.75,0.11,13.92,0.0,0.0,"""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…",5,9,1,3000,3000,3000,3000,3000,6,0.705851,0.316362,0.028071,…,0.003709,0.00422,0.003106,0.00284,0.003284,0.002726,0.002728,0.001929,0.00142,802.107056,754.64325,0.0,0.080099,0.109119,0.023339,-11.75706,0.329638,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,1,0,1,0,0,0,0,0,0,0
799420.0,59705.0,9e6,287.0,1.68,27.48,0.01,26.879999,0.01,33.619999,0.0,29.309999,0.01,28.860001,0.0,15.48,0.17,16.27,0.0,0.01,"""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…",6,9,1,3000,3000,3000,3000,3000,7,0.666342,0.254533,0.028148,…,0.0039,0.004396,0.003151,0.002918,0.003152,0.002655,0.002814,0.002359,0.001738,828.016357,873.414062,0.0,0.093206,0.122968,0.040391,-11.782599,0.463115,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
893551.0,65238.0,9e6,305.0,1.68,27.91,0.01,27.389999,0.0,33.41,0.01,28.1,0.01,28.360001,0.02,15.4,0.18,16.27,0.0,0.0,"""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…",7,9,1,3000,3000,3000,3000,3000,8,0.614624,0.213272,0.018748,…,0.003939,0.004479,0.002982,0.003046,0.003026,0.002743,0.002651,0.002156,0.001286,842.095398,753.553101,0.0,0.093586,0.122753,0.044136,-11.933735,0.611394,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
790258.0,56820.0,9e6,260.0,1.49,27.639999,0.0,27.08,0.01,33.389999,0.0,28.6,0.0,29.27,0.01,15.3,0.17,15.62,0.0,0.01,"""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…",8,9,1,3000,3000,3000,3000,3000,9,0.636865,0.232516,0.043463,…,0.003935,0.004488,0.003039,0.002905,0.003276,0.002998,0.002643,0.002044,0.001363,822.294617,844.538635,0.0,0.092041,0.122324,0.038842,-11.806087,0.476097,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
779727.0,55718.0,9e6,261.0,3.26,18.25,0.0,18.27,0.01,20.879999,0.0,18.690001,0.0,19.34,0.01,10.13,0.1,10.38,0.0,0.01,"""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…",0,9,1,3000,3000,3000,3000,3000,1,0.654594,0.263271,0.039114,…,0.0039,0.004406,0.00338,0.002791,0.003126,0.002992,0.002635,0.002011,0.001492,793.974121,794.828613,0.0,0.091249,0.121366,0.038946,-11.797982,0.458141,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0


In [9]:
import polars as pl
import os

def get_file_extension(filename):
    """Helper function to get file extension"""
    possible_extensions = ['.parquet', '.csv', '.tsv']
    for ext in possible_extensions:
        full_filename = filename + ext
        if os.path.isfile(full_filename):
            return ext
    print(f'Warning: File {filename} with extensions {possible_extensions} not found.')
    return None

def read_file(filename, extension):
    """Helper function to read file based on its extension"""
    if extension == '.parquet':
        return pl.read_parquet(filename + extension)
    elif extension in ['.csv', '.tsv']:
        delimiter = ',' if extension == '.csv' else '\t'
        return pl.read_csv(filename + extension, delimiter=delimiter)
    return None

# Filter out rows with specific analysis_id
df_filtered_results = df_cp_results.filter(~pl.col('analysis_id').is_in([3241]))

# Add qc-file column based on 'results' and 'plate_barcode' columns
df_filtered_results = df_filtered_results.with_columns(
    (pl.col('results') + 'qcRAW_images_'+ pl.col('plate_barcode')).alias('qc-file')
)

print(f'Experiment has {df_filtered_results.height} files in its path.\n')

# Read and process all the files in a list, skipping files not found
dfs = [
    read_file(row['qc-file'], get_file_extension(row['qc-file'])).with_columns(
        pl.lit(row['plate_acq_id']).alias('Metadata_AcqID'),
        pl.lit(row['plate_barcode']).alias('Metadata_Barcode')
    ) 
    for row in df_filtered_results.iter_rows(named=True) 
    if get_file_extension(row['qc-file']) is not None
]

# Concatenate all the dataframes at once
df_concatenated_files = pl.concat(dfs, how='vertical')

df_concatenated_files


Experiment has 2 files in its path.



AreaOccupied_AreaOccupied_nuclei,AreaOccupied_Perimeter_nuclei,AreaOccupied_TotalArea_nuclei,Count_nuclei,ExecutionTime_01LoadData,ExecutionTime_02MeasureImageQuality,ExecutionTime_03FlagImage,ExecutionTime_04MeasureImageQuality,ExecutionTime_05FlagImage,ExecutionTime_06MeasureImageQuality,ExecutionTime_07FlagImage,ExecutionTime_08MeasureImageQuality,ExecutionTime_09FlagImage,ExecutionTime_10MeasureImageQuality,ExecutionTime_11FlagImage,ExecutionTime_12IdentifyPrimaryObjects,ExecutionTime_13MeasureImageAreaOccupied,ExecutionTime_14MeasureObjectSizeShape,ExecutionTime_15CalculateMath,ExecutionTime_16CalculateMath,FileName_CONC,FileName_HOECHST,FileName_MITO,FileName_PHAandWGA,FileName_SYTO,Group_Index,Group_Length,Group_Number,Height_CONC,Height_HOECHST,Height_MITO,Height_PHAandWGA,Height_SYTO,ImageNumber,ImageQuality_Correlation_CONC_10,ImageQuality_Correlation_CONC_30,ImageQuality_Correlation_HOECHST_200,…,StDev_nuclei_AreaShape_Zernike_8_2,StDev_nuclei_AreaShape_Zernike_8_4,StDev_nuclei_AreaShape_Zernike_8_6,StDev_nuclei_AreaShape_Zernike_8_8,StDev_nuclei_AreaShape_Zernike_9_1,StDev_nuclei_AreaShape_Zernike_9_3,StDev_nuclei_AreaShape_Zernike_9_5,StDev_nuclei_AreaShape_Zernike_9_7,StDev_nuclei_AreaShape_Zernike_9_9,StDev_nuclei_Location_Center_X,StDev_nuclei_Location_Center_Y,StDev_nuclei_Location_Center_Z,Threshold_FinalThreshold_nuclei,Threshold_GuideThreshold_nuclei,Threshold_OrigThreshold_nuclei,Threshold_SumOfEntropies_nuclei,Threshold_WeightedVariance_nuclei,URL_CONC,URL_HOECHST,URL_MITO,URL_PHAandWGA,URL_SYTO,Width_CONC,Width_HOECHST,Width_MITO,Width_PHAandWGA,Width_SYTO,qc_flag_rawCONC_Blurred,qc_flag_rawCONC_Saturated,qc_flag_rawHOECHST_Blurry,qc_flag_rawHOECHST_Saturated,qc_flag_rawMITO_Blurry,qc_flag_rawMITO_Saturated,qc_flag_rawPHAandWGA_Blurry,qc_flag_rawPHAandWGA_Saturated,qc_flag_rawSYTO_Blurred,qc_flag_rawSYTO_Saturated
f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,str,str,str,str,str,i32,i32,i32,i32,i32,i32,i32,i32,i32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,str,str,str,str,str,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32
773018.0,56064.0,9e6,261.0,4.65,26.85,0.0,30.790001,0.01,31.780001,0.0,27.940001,0.01,27.879999,0.0,15.26,0.16,15.69,0.01,0.0,"""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…",0,9,1,3000,3000,3000,3000,3000,1,0.654431,0.239239,0.025578,…,0.00371,0.004254,0.003217,0.002771,0.003303,0.00297,0.002973,0.002408,0.001293,930.271423,896.320251,0.0,0.092389,0.122753,0.039247,-11.759879,0.424398,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
420668.0,31480.0,9e6,150.0,3.41,27.52,0.01,29.57,0.0,30.93,0.0,26.34,0.0,26.219999,0.0,14.54,0.12,14.05,0.0,0.0,"""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…",1,9,1,3000,3000,3000,3000,3000,2,0.691455,0.252629,0.00696,…,0.003681,0.004384,0.003087,0.002918,0.003283,0.002957,0.002676,0.002268,0.001368,856.213867,868.443787,0.0,0.083361,0.113877,0.025159,-11.736903,0.324984,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,1,0,1,0,0,0,0,0,0,0
272083.0,20641.0,9e6,100.0,2.32,26.780001,0.01,28.709999,0.0,30.98,0.0,27.23,0.01,25.73,0.01,14.27,0.1,13.49,0.0,0.0,"""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…",2,9,1,3000,3000,3000,3000,3000,3,0.686241,0.26769,0.003846,…,0.003868,0.004274,0.003138,0.002933,0.003128,0.002984,0.002795,0.002032,0.001511,838.988281,818.611145,0.0,0.083149,0.115437,0.019271,-11.659938,0.267819,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,1,0,1,0,0,0,0,0,0,0
834700.0,60897.0,9e6,285.0,1.72,27.67,0.01,27.76,0.0,32.25,0.01,27.219999,0.01,26.76,0.0,14.9,0.17,15.6,0.0,0.0,"""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…",3,9,1,3000,3000,3000,3000,3000,4,0.654919,0.241246,0.029194,…,0.003802,0.004404,0.002978,0.002822,0.003251,0.002716,0.002711,0.002284,0.001346,796.049194,881.816711,0.0,0.096945,0.129696,0.040707,-11.841964,0.482744,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
633936.0,48049.0,9e6,234.0,1.61,27.65,0.01,27.41,0.01,31.16,0.01,26.92,0.01,26.9,0.0,14.71,0.14,14.9,0.0,0.0,"""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…",4,9,1,3000,3000,3000,3000,3000,5,0.693417,0.278889,0.037512,…,0.003903,0.004473,0.003061,0.002799,0.003212,0.002751,0.002696,0.002152,0.001421,782.142029,753.385254,0.0,0.09624,0.129909,0.034037,-11.73406,0.46529,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
393420.0,28702.0,9e6,136.0,1.72,29.16,0.0,27.690001,0.0,31.459999,0.0,26.610001,0.01,26.780001,0.0,14.75,0.11,13.92,0.0,0.0,"""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…",5,9,1,3000,3000,3000,3000,3000,6,0.705851,0.316362,0.028071,…,0.003709,0.00422,0.003106,0.00284,0.003284,0.002726,0.002728,0.001929,0.00142,802.107056,754.64325,0.0,0.080099,0.109119,0.023339,-11.75706,0.329638,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,1,0,1,0,0,0,0,0,0,0
799420.0,59705.0,9e6,287.0,1.68,27.48,0.01,26.879999,0.01,33.619999,0.0,29.309999,0.01,28.860001,0.0,15.48,0.17,16.27,0.0,0.01,"""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…",6,9,1,3000,3000,3000,3000,3000,7,0.666342,0.254533,0.028148,…,0.0039,0.004396,0.003151,0.002918,0.003152,0.002655,0.002814,0.002359,0.001738,828.016357,873.414062,0.0,0.093206,0.122968,0.040391,-11.782599,0.463115,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
893551.0,65238.0,9e6,305.0,1.68,27.91,0.01,27.389999,0.0,33.41,0.01,28.1,0.01,28.360001,0.02,15.4,0.18,16.27,0.0,0.0,"""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…",7,9,1,3000,3000,3000,3000,3000,8,0.614624,0.213272,0.018748,…,0.003939,0.004479,0.002982,0.003046,0.003026,0.002743,0.002651,0.002156,0.001286,842.095398,753.553101,0.0,0.093586,0.122753,0.044136,-11.933735,0.611394,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
790258.0,56820.0,9e6,260.0,1.49,27.639999,0.0,27.08,0.01,33.389999,0.0,28.6,0.0,29.27,0.01,15.3,0.17,15.62,0.0,0.01,"""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…",8,9,1,3000,3000,3000,3000,3000,9,0.636865,0.232516,0.043463,…,0.003935,0.004488,0.003039,0.002905,0.003276,0.002998,0.002643,0.002044,0.001363,822.294617,844.538635,0.0,0.092041,0.122324,0.038842,-11.806087,0.476097,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0
779727.0,55718.0,9e6,261.0,3.26,18.25,0.0,18.27,0.01,20.879999,0.0,18.690001,0.0,19.34,0.01,10.13,0.1,10.38,0.0,0.01,"""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…",0,9,1,3000,3000,3000,3000,3000,1,0.654594,0.263271,0.039114,…,0.0039,0.004406,0.00338,0.002791,0.003126,0.002992,0.002635,0.002011,0.001492,793.974121,794.828613,0.0,0.091249,0.121366,0.038946,-11.797982,0.458141,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0


In [10]:
# Add some columns
df_data = df_concatenated_files.clone()

df_data.with_columns(
    (pl.col('Metadata_AcqID').cast(pl.Utf8) + '_' + pl.col('Metadata_Well') + '_' + pl.col('Metadata_Site').cast(pl.Utf8)).alias('ImageID')
)

# df_data['Metadata_AcqID'] = df_data['Metadata_AcqID'].astype(int).astype(str)
# df_data['Metadata_Site'] = df_data['Metadata_Site'].astype(int).astype(str)
# df_data['ImageID'] = df_data['Metadata_AcqID'] + '_' + df_data['Metadata_Well'] + '_' + df_data['Metadata_Site']
# df_data['barcode'] = df_data['Metadata_Barcode']
# df_data['well_id'] = df_data['Metadata_Well']
# df_data['plate'] = df_data['Metadata_Barcode']
# df_data['plate-name'] = df_data['Metadata_Barcode']
# df_data['plateWell'] = df_data['Metadata_Barcode'] + '_' + df_data['Metadata_Well']
# df_data['site'] = df_data['Metadata_Site']

# display(df_data.tail(2))

AreaOccupied_AreaOccupied_nuclei,AreaOccupied_Perimeter_nuclei,AreaOccupied_TotalArea_nuclei,Count_nuclei,ExecutionTime_01LoadData,ExecutionTime_02MeasureImageQuality,ExecutionTime_03FlagImage,ExecutionTime_04MeasureImageQuality,ExecutionTime_05FlagImage,ExecutionTime_06MeasureImageQuality,ExecutionTime_07FlagImage,ExecutionTime_08MeasureImageQuality,ExecutionTime_09FlagImage,ExecutionTime_10MeasureImageQuality,ExecutionTime_11FlagImage,ExecutionTime_12IdentifyPrimaryObjects,ExecutionTime_13MeasureImageAreaOccupied,ExecutionTime_14MeasureObjectSizeShape,ExecutionTime_15CalculateMath,ExecutionTime_16CalculateMath,FileName_CONC,FileName_HOECHST,FileName_MITO,FileName_PHAandWGA,FileName_SYTO,Group_Index,Group_Length,Group_Number,Height_CONC,Height_HOECHST,Height_MITO,Height_PHAandWGA,Height_SYTO,ImageNumber,ImageQuality_Correlation_CONC_10,ImageQuality_Correlation_CONC_30,ImageQuality_Correlation_HOECHST_200,…,StDev_nuclei_AreaShape_Zernike_8_4,StDev_nuclei_AreaShape_Zernike_8_6,StDev_nuclei_AreaShape_Zernike_8_8,StDev_nuclei_AreaShape_Zernike_9_1,StDev_nuclei_AreaShape_Zernike_9_3,StDev_nuclei_AreaShape_Zernike_9_5,StDev_nuclei_AreaShape_Zernike_9_7,StDev_nuclei_AreaShape_Zernike_9_9,StDev_nuclei_Location_Center_X,StDev_nuclei_Location_Center_Y,StDev_nuclei_Location_Center_Z,Threshold_FinalThreshold_nuclei,Threshold_GuideThreshold_nuclei,Threshold_OrigThreshold_nuclei,Threshold_SumOfEntropies_nuclei,Threshold_WeightedVariance_nuclei,URL_CONC,URL_HOECHST,URL_MITO,URL_PHAandWGA,URL_SYTO,Width_CONC,Width_HOECHST,Width_MITO,Width_PHAandWGA,Width_SYTO,qc_flag_rawCONC_Blurred,qc_flag_rawCONC_Saturated,qc_flag_rawHOECHST_Blurry,qc_flag_rawHOECHST_Saturated,qc_flag_rawMITO_Blurry,qc_flag_rawMITO_Saturated,qc_flag_rawPHAandWGA_Blurry,qc_flag_rawPHAandWGA_Saturated,qc_flag_rawSYTO_Blurred,qc_flag_rawSYTO_Saturated,ImageID
f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,str,str,str,str,str,i32,i32,i32,i32,i32,i32,i32,i32,i32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,str,str,str,str,str,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,str
773018.0,56064.0,9e6,261.0,4.65,26.85,0.0,30.790001,0.01,31.780001,0.0,27.940001,0.01,27.879999,0.0,15.26,0.16,15.69,0.01,0.0,"""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…","""B20_s1_x0_y0_F…",0,9,1,3000,3000,3000,3000,3000,1,0.654431,0.239239,0.025578,…,0.004254,0.003217,0.002771,0.003303,0.00297,0.002973,0.002408,0.001293,930.271423,896.320251,0.0,0.092389,0.122753,0.039247,-11.759879,0.424398,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0,"""3072_B20_1"""
420668.0,31480.0,9e6,150.0,3.41,27.52,0.01,29.57,0.0,30.93,0.0,26.34,0.0,26.219999,0.0,14.54,0.12,14.05,0.0,0.0,"""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…","""B20_s2_x1_y0_F…",1,9,1,3000,3000,3000,3000,3000,2,0.691455,0.252629,0.00696,…,0.004384,0.003087,0.002918,0.003283,0.002957,0.002676,0.002268,0.001368,856.213867,868.443787,0.0,0.083361,0.113877,0.025159,-11.736903,0.324984,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,1,0,1,0,0,0,0,0,0,0,"""3072_B20_2"""
272083.0,20641.0,9e6,100.0,2.32,26.780001,0.01,28.709999,0.0,30.98,0.0,27.23,0.01,25.73,0.01,14.27,0.1,13.49,0.0,0.0,"""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…","""B20_s3_x2_y0_F…",2,9,1,3000,3000,3000,3000,3000,3,0.686241,0.26769,0.003846,…,0.004274,0.003138,0.002933,0.003128,0.002984,0.002795,0.002032,0.001511,838.988281,818.611145,0.0,0.083149,0.115437,0.019271,-11.659938,0.267819,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,1,0,1,0,0,0,0,0,0,0,"""3072_B20_3"""
834700.0,60897.0,9e6,285.0,1.72,27.67,0.01,27.76,0.0,32.25,0.01,27.219999,0.01,26.76,0.0,14.9,0.17,15.6,0.0,0.0,"""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…","""B20_s4_x0_y1_F…",3,9,1,3000,3000,3000,3000,3000,4,0.654919,0.241246,0.029194,…,0.004404,0.002978,0.002822,0.003251,0.002716,0.002711,0.002284,0.001346,796.049194,881.816711,0.0,0.096945,0.129696,0.040707,-11.841964,0.482744,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0,"""3072_B20_4"""
633936.0,48049.0,9e6,234.0,1.61,27.65,0.01,27.41,0.01,31.16,0.01,26.92,0.01,26.9,0.0,14.71,0.14,14.9,0.0,0.0,"""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…","""B20_s5_x1_y1_F…",4,9,1,3000,3000,3000,3000,3000,5,0.693417,0.278889,0.037512,…,0.004473,0.003061,0.002799,0.003212,0.002751,0.002696,0.002152,0.001421,782.142029,753.385254,0.0,0.09624,0.129909,0.034037,-11.73406,0.46529,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0,"""3072_B20_5"""
393420.0,28702.0,9e6,136.0,1.72,29.16,0.0,27.690001,0.0,31.459999,0.0,26.610001,0.01,26.780001,0.0,14.75,0.11,13.92,0.0,0.0,"""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…","""B20_s6_x2_y1_F…",5,9,1,3000,3000,3000,3000,3000,6,0.705851,0.316362,0.028071,…,0.00422,0.003106,0.00284,0.003284,0.002726,0.002728,0.001929,0.00142,802.107056,754.64325,0.0,0.080099,0.109119,0.023339,-11.75706,0.329638,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,1,0,1,0,0,0,0,0,0,0,"""3072_B20_6"""
799420.0,59705.0,9e6,287.0,1.68,27.48,0.01,26.879999,0.01,33.619999,0.0,29.309999,0.01,28.860001,0.0,15.48,0.17,16.27,0.0,0.01,"""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…","""B20_s7_x0_y2_F…",6,9,1,3000,3000,3000,3000,3000,7,0.666342,0.254533,0.028148,…,0.004396,0.003151,0.002918,0.003152,0.002655,0.002814,0.002359,0.001738,828.016357,873.414062,0.0,0.093206,0.122968,0.040391,-11.782599,0.463115,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0,"""3072_B20_7"""
893551.0,65238.0,9e6,305.0,1.68,27.91,0.01,27.389999,0.0,33.41,0.01,28.1,0.01,28.360001,0.02,15.4,0.18,16.27,0.0,0.0,"""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…","""B20_s8_x1_y2_F…",7,9,1,3000,3000,3000,3000,3000,8,0.614624,0.213272,0.018748,…,0.004479,0.002982,0.003046,0.003026,0.002743,0.002651,0.002156,0.001286,842.095398,753.553101,0.0,0.093586,0.122753,0.044136,-11.933735,0.611394,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0,"""3072_B20_8"""
790258.0,56820.0,9e6,260.0,1.49,27.639999,0.0,27.08,0.01,33.389999,0.0,28.6,0.0,29.27,0.01,15.3,0.17,15.62,0.0,0.01,"""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…","""B20_s9_x2_y2_F…",8,9,1,3000,3000,3000,3000,3000,9,0.636865,0.232516,0.043463,…,0.004488,0.003039,0.002905,0.003276,0.002998,0.002643,0.002044,0.001363,822.294617,844.538635,0.0,0.092041,0.122324,0.038842,-11.806087,0.476097,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0,"""3072_B20_9"""
779727.0,55718.0,9e6,261.0,3.26,18.25,0.0,18.27,0.01,20.879999,0.0,18.690001,0.0,19.34,0.01,10.13,0.1,10.38,0.0,0.01,"""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…","""A08_s1_x0_y0_F…",0,9,1,3000,3000,3000,3000,3000,1,0.654594,0.263271,0.039114,…,0.004406,0.00338,0.002791,0.003126,0.002992,0.002635,0.002011,0.001492,793.974121,794.828613,0.0,0.091249,0.121366,0.038946,-11.797982,0.458141,"""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…","""file:/share/mi…",3000,3000,3000,3000,3000,0,0,1,0,0,0,0,0,0,0,"""3072_A08_1"""


In [11]:
try:
    data = df_concatenated_files.clone()
    plate_names = data.select('Metadata_Barcode').unique().sort(by='Metadata_Barcode').to_series().to_list()
    print(plate_names)
except Exception:
    print('Plate names not specified')
    plate_names = []

data = data.sort(['Metadata_Barcode','Metadata_Well', 'Metadata_Site'])

wells = data.select('Metadata_Well').unique().sort(by='Metadata_Well').to_series().to_list()
number_of_wells = len(wells)
print(f'Number of wells: {number_of_wells}')

rows = sorted(list({w[0] for w in wells}))
number_of_rows = len(rows)
print(*rows)

columns = sorted(list({w[1:] for w in wells}))
number_of_columns = len(columns)
print(*columns)

all_wells = [(x+y) for x in rows for y in columns]

sites = data.select('Metadata_Site').unique().sort(by='Metadata_Site').to_series().to_list()
number_of_sites = len(sites)
print(f'Number of sites: {number_of_sites}')

total_images = data.shape[0]
expected_images = len(plate_names) * number_of_wells * number_of_sites

print(f'Processed {total_images} of {expected_images} images')

['P013725', 'P013726']
Number of wells: 380
A B C D E F G H I J K L M N O P
01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
Number of sites: 9
Processed 6840 of 6840 images


In [12]:
import re

# Collect columns related to image quality
image_quality_cols = [col for col in data.columns if "ImageQuality_" in col]

# Remove 'ImageQuality_' prefix from column names
image_quality_module = [col.replace('ImageQuality_', '') for col in image_quality_cols]

# Get unique measures from column names, assuming measure is before first underscore
image_quality_measures = sorted({re.sub('_.*', '', measure) for measure in image_quality_module})
count_measures = len(image_quality_measures)

print(f'Image Quality module has measured {count_measures} parameters: {", ".join(image_quality_measures)}')

Image Quality module has measured 22 parameters: Correlation, FocusScore, LocalFocusScore, MADIntensity, MaxIntensity, MeanIntensity, MedianIntensity, MinIntensity, PercentMaximal, PercentMinimal, PowerLogLogSlope, Scaling, StdIntensity, ThresholdBackground, ThresholdKapur, ThresholdMCT, ThresholdMoG, ThresholdOtsu, ThresholdRidlerCalvard, ThresholdRobustBackground, TotalArea, TotalIntensity


In [14]:
not_so_useful = ['TotalArea', 'Scaling', 'TotalIntensity', 'Correlation', 'PercentMinimal',
                 'LocalFocusScore', 'MinIntensity', 'MedianIntensity', 'MADIntensity',
                 'ThresholdMoG', 'ThresholdBackground', 'ThresholdKapur', 'ThresholdMCT',
                 'ThresholdOtsu', 'ThresholdRidlerCalvard', 'ThresholdRobustBackground',
                 'PercentMaximal']

image_quality_measures = [measure for measure in image_quality_measures if measure not in not_so_useful]
count_measures = len(image_quality_measures)

print(f'I will use {count_measures} parameters: {", ".join(image_quality_measures)}')

data_frame_dictionary = {measure: data[[col for col in image_quality_cols if f'_{measure}' in col]] for measure in image_quality_measures}
data_frame_list = sorted(list(data_frame_dictionary.keys()))

I will use 5 parameters: FocusScore, MaxIntensity, MeanIntensity, PowerLogLogSlope, StdIntensity


In [15]:
# Correlation, LocalFocusScore, ThresholdMoG, ThresholdOtsu
for i in range(len(data_frame_list)):
    if len(data_frame_dictionary[data_frame_list[i]].columns) > 5:
        print(i+1, data_frame_list[i], len(data_frame_dictionary[data_frame_list[i]].columns))

In [16]:
channel_names = [
    re.sub('.*_', '', c)
    for c in list(data_frame_dictionary[data_frame_list[0]].columns)
]
channel_names

['CONC', 'HOECHST', 'MITO', 'PHAandWGA', 'SYTO']

In [17]:
# Polars import
import polars as pl

# Set of measures to not keep
not_so_useful_set = {
    'TotalArea',
    'Scaling',
    'TotalIntensity',
    'Correlation',
    'PercentMinimal',
    'LocalFocusScore',
    'MinIntensity',
    'MedianIntensity',
    'MADIntensity',
    'ThresholdMoG',
    'ThresholdBackground',
    'ThresholdKapur',
    'ThresholdMCT',
    'ThresholdOtsu',
    'ThresholdRidlerCalvard',
    'ThresholdRobustBackground',
    'PercentMaximal',
}

# Filter and transform column names
image_quality_cols = [col for col in data.columns if col.startswith("ImageQuality_")]
image_quality_measures_all = {col.replace('ImageQuality_', '').split('_')[0] for col in image_quality_cols}

print(f'Image Quality module has measured {len(image_quality_measures_all)} parameters: {", ".join(image_quality_measures_all)}')

# Filter out the not so useful measures
image_quality_measures_filtered = {measure for measure in image_quality_measures_all if measure not in not_so_useful_set}
print(f'I will use {len(image_quality_measures_filtered)} parameters: {", ".join(image_quality_measures_filtered)}')

# Create the DataFrame dictionary
data_frame_dictionary = {measure: data.select([col for col in image_quality_cols if f'_{measure}' in col]) for measure in image_quality_measures_filtered}
data_frame_list = sorted(data_frame_dictionary.keys())


Image Quality module has measured 22 parameters: MaxIntensity, ThresholdBackground, TotalIntensity, Scaling, ThresholdRidlerCalvard, PercentMaximal, MedianIntensity, TotalArea, PowerLogLogSlope, ThresholdOtsu, StdIntensity, PercentMinimal, MADIntensity, MinIntensity, MeanIntensity, Correlation, ThresholdMoG, ThresholdRobustBackground, LocalFocusScore, FocusScore, ThresholdKapur, ThresholdMCT
I will use 5 parameters: FocusScore, PowerLogLogSlope, MeanIntensity, MaxIntensity, StdIntensity


In [18]:
# Set of measures to keep
useful_measures = {
    'FocusScore',
    'MaxIntensity',
    'MeanIntensity',
    'PowerLogLogSlope',
    'StdIntensity',
}

# Filter and transform column names
image_quality_cols = [col for col in data.columns if col.startswith("ImageQuality_")]
image_quality_measures_all = {col.replace('ImageQuality_', '').split('_')[0] for col in image_quality_cols}

print(f'Image Quality module has measured {len(image_quality_measures_all)} parameters: {", ".join(image_quality_measures_all)}')

# Filter out the not so useful measures
image_quality_measures_filtered = {measure for measure in image_quality_measures_all if measure in useful_measures}
print(f'I will use {len(image_quality_measures_filtered)} parameters: {", ".join(image_quality_measures_filtered)}')

# Create the DataFrame dictionary
data_frame_dictionary = {measure: data.select([col for col in image_quality_cols if f'_{measure}' in col]) for measure in image_quality_measures_filtered}
data_frame_list = sorted(data_frame_dictionary.keys())

Image Quality module has measured 22 parameters: MaxIntensity, ThresholdBackground, TotalIntensity, Scaling, ThresholdRidlerCalvard, PercentMaximal, MedianIntensity, TotalArea, PowerLogLogSlope, ThresholdOtsu, StdIntensity, PercentMinimal, MADIntensity, MinIntensity, MeanIntensity, Correlation, ThresholdMoG, ThresholdRobustBackground, LocalFocusScore, FocusScore, ThresholdKapur, ThresholdMCT
I will use 5 parameters: FocusScore, PowerLogLogSlope, MeanIntensity, MaxIntensity, StdIntensity


In [19]:
# Correlation, LocalFocusScore, ThresholdMoG, ThresholdOtsu

from pharmbio.qc import get_qc_data_dict

get_qc_data_dict(data, module_to_keep={'Correlation'})['Correlation']

[
    re.sub('^.*?_.*?_', '', c)
    for c in list(get_qc_data_dict(data, module_to_keep={'PowerLogLogSlope'})['PowerLogLogSlope'].columns)
]

['CONC', 'HOECHST', 'MITO', 'PHAandWGA', 'SYTO']

In [None]:
def norm_std_df(df: pl.DataFrame, method="standardize"):
    methods = {
        "normalize": lambda x: (x - x.min()) / (x.max() - x.min()),
        "standardize": lambda x: (x - x.mean()) / x.std(ddof=1),
    }

    df = df.select(
        [
            (
                methods[method](df[col])
                if df[col].dtype in [pl.Float32, pl.Float64, pl.Int32, pl.Int64]
                else df[col]
            ).alias(col)
            for col in df.columns
        ]
    )
    return df

lower_limit_scaled = -4.5
upper_limit_scaled = 4.5

for image_quality_name in data_frame_list:
    # Get the current dataframe from the dictionary
    current_dataframe = data_frame_dictionary[image_quality_name]

    # Scale the dataframe values
    current_dataframe_scaled = norm_std_df(current_dataframe, method="standardize")

    # Create a new flag
    new_flag_scaled = (
        f"OutlierScaled_{image_quality_name}_{lower_limit_scaled}_{upper_limit_scaled}"
    )
    data = data.with_columns(
        pl.lit(
            [
                1 if i == True else 0
                for i in current_dataframe_scaled.apply(
                    lambda row: any(
                        (val < lower_limit_scaled) | (val > upper_limit_scaled)
                        for val in row
                    )
                ).to_series()
            ]
        ).alias(new_flag_scaled)
    )
    
data = data.with_columns(
    pl.max(pl.col([item for item in data.columns if item.startswith('OutlierScaled_')])).alias('total')
)

print(data.select([item for item in data.columns if item.startswith('OutlierScaled_')] + ['total']).sum())

In [38]:
import polars as pl
from collections import defaultdict


treshold_dict = {"MaxIntensity": (-5, 5), "StdIntensity": (-3, 3)}

# Set the default treshold
default_sd_step = (-4.5, 4.5)

# Define dictionary to hold the range for each image_quality_name, defaulting to the above values
sd_step_dict = defaultdict(lambda: default_sd_step)

for key, value in treshold_dict.items():
    sd_step_dict[key] = value


for image_quality_name in data_frame_list:
    # Get the current dataframe from the dictionary
    current_dataframe = data_frame_dictionary[image_quality_name]

    # Scale the dataframe values
    current_dataframe_scaled = norm_std_df(current_dataframe, method="standardize")

    # Get the lower and upper treshold for the current image_quality_name
    lower_limit_scaled, upper_limit_scaled = sd_step_dict[image_quality_name]

    # Create a new flag
    new_flag_scaled = (
        f"OutlierZscore_{image_quality_name}_{lower_limit_scaled}_{upper_limit_scaled}"
    )
    data = data.with_columns(
        pl.lit(
            [
                1 if i == True else 0
                for i in current_dataframe_scaled.apply(
                    lambda row: any(
                        (val < lower_limit_scaled) | (val > upper_limit_scaled)
                        for val in row
                    )
                ).to_series()
            ]
        ).alias(new_flag_scaled)
    )

data = data.with_columns(
    pl.max(
        pl.col([item for item in data.columns if item.startswith("OutlierZscore_")])
    ).alias("total")
)

print(
    data.select(
        [item for item in data.columns if item.startswith("OutlierZscore_")] + ["total"]
    ).sum()
)

shape: (1, 6)
┌──────────────────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┬───────┐
│ OutlierScaled_Fo ┆ OutlierScaled_M ┆ OutlierScaled_M ┆ OutlierScaled_P ┆ OutlierScaled_S ┆ total │
│ cusScore_-4.5_4. ┆ axIntensity_-5_ ┆ eanIntensity_-4 ┆ owerLogLogSlope ┆ tdIntensity_-3_ ┆ ---   │
│ …                ┆ 5               ┆ .5…             ┆ _-…             ┆ 3               ┆ i64   │
│ ---              ┆ ---             ┆ ---             ┆ ---             ┆ ---             ┆       │
│ i64              ┆ i64             ┆ i64             ┆ i64             ┆ i64             ┆       │
╞══════════════════╪═════════════════╪═════════════════╪═════════════════╪═════════════════╪═══════╡
│ 101              ┆ 146             ┆ 28              ┆ 65              ┆ 292             ┆ 412   │
└──────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴───────┘


In [19]:
def norm_std_df(df: pl.DataFrame, method="standardize"):
    methods = {
        "normalize": lambda x: (x - x.min()) / (x.max() - x.min()),
        "standardize": lambda x: (x - x.mean()) / x.std(ddof=1),
    }

    df = df.select(
        [
            (
                methods[method](df[col])
                if df[col].dtype in [pl.Float32, pl.Float64, pl.Int32, pl.Int64]
                else df[col]
            ).alias(col)
            for col in df.columns
        ]
    )
    return df


lower_limit_scaled = -4.5
upper_limit_scaled = 4.5
outlier_prefix = "OutlierZscore_"

for image_quality_name in data_frame_list:
    # Get the current dataframe from the dictionary
    current_dataframe = data_frame_dictionary[image_quality_name]

    # Scale the dataframe values
    current_dataframe_scaled = norm_std_df(current_dataframe, method="standardize")

    # Create a new flag
    new_flag_scaled = f"{outlier_prefix}{image_quality_name}_{lower_limit_scaled}_{upper_limit_scaled}"
    outliers = [
        1 if i == True else 0
        for i in current_dataframe_scaled.apply(
            lambda row: any(
                (val < lower_limit_scaled) | (val > upper_limit_scaled) for val in row
            )
        ).to_series()
    ]
    data = data.with_columns(pl.lit(outliers).alias(new_flag_scaled))

# Identify columns starting with 'OutlierZscore_'
outlier_flaged_columns = [item for item in data.columns if item.startswith(outlier_prefix)]

data = data.with_columns(pl.max(pl.col(outlier_flaged_columns)).alias("total"))

print(data.select(outlier_flaged_columns + ["total"]).sum())

shape: (1, 6)
┌──────────────────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┬───────┐
│ OutlierZscore_Fo ┆ OutlierZscore_M ┆ OutlierZscore_M ┆ OutlierZscore_P ┆ OutlierZscore_S ┆ total │
│ cusScore_-4.5_4. ┆ axIntensity_-4. ┆ eanIntensity_-4 ┆ owerLogLogSlope ┆ tdIntensity_-4. ┆ ---   │
│ …                ┆ 5_…             ┆ .5…             ┆ _-…             ┆ 5_…             ┆ i64   │
│ ---              ┆ ---             ┆ ---             ┆ ---             ┆ ---             ┆       │
│ i64              ┆ i64             ┆ i64             ┆ i64             ┆ i64             ┆       │
╞══════════════════╪═════════════════╪═════════════════╪═════════════════╪═════════════════╪═══════╡
│ 101              ┆ 176             ┆ 28              ┆ 65              ┆ 41              ┆ 305   │
└──────────────────┴─────────────────┴─────────────────┴─────────────────┴─────────────────┴───────┘


The Interquartile Range (IQR) method can be applied to either raw or scaled data, and the choice largely depends on the context and objectives of your analysis.

Raw Data: Applying the IQR method to raw data can be beneficial when your data is not skewed and you have a good understanding of the data's distribution and scales. In this case, the outliers identified by the IQR method will directly correspond to extreme values in your original data.

Scaled Data: If the scales of your different columns vary significantly, it may be beneficial to standardize or normalize your data before applying the IQR method. By scaling the data, you ensure that each column contributes equally to the calculation of the IQR and the identification of outliers. This is particularly useful when you're working with high-dimensional data, where you want to avoid one or two features with large scales dominating the outlier detection process.

In [18]:
outlier_prefix = "OutlierIQR_"
quantile_limit = 0.25  # this could be any value between 0 and 0.5
multiplier = 1.5 # by decreasing the multiplier, the criteria become more strict 

for image_quality_name in data_frame_list:
    # Get the current dataframe from the dictionary
    current_dataframe = data_frame_dictionary[image_quality_name]

    # Calculate the lower and upper quantiles
    lower_quantile = current_dataframe.quantile(quantile_limit)
    upper_quantile = current_dataframe.quantile(1 - quantile_limit)

    # Define the IQR and the bounds for outliers
    IQR = upper_quantile - lower_quantile
    lower_threshold = (lower_quantile - multiplier * IQR).to_numpy().min()
    upper_threshold = (upper_quantile + multiplier * IQR).to_numpy().max()
    print(lower_threshold, upper_threshold)

    # Create a new flag
    new_flag_iqr = f"{outlier_prefix}{image_quality_name}_{lower_threshold}_{upper_threshold}"
    outliers = [
        1 if i == True else 0
        for i in current_dataframe.apply(
            lambda row: any(
                (val < lower_threshold) | (val > upper_threshold) for val in row
            )
        ).to_series()
    ]
    
    data = data.with_columns(pl.lit(outliers).alias(new_flag_iqr))

# Identify columns starting with 'OutlierScaled_'
outlier_flaged_columns = [item for item in data.columns if item.startswith(outlier_prefix)]

data = data.with_columns(pl.max(pl.col(outlier_flaged_columns)).alias("total"))

print(data.select(outlier_flaged_columns + ["total"]).sum())


-0.0030352994799613953 0.22936414927244186
0.42883947491645813 1.3419394791126251
-0.006121504120528698 0.1665138453245163
-2.4562695026397705 -1.2748094201087952
0.00234079547226429 0.11933193355798721
shape: (1, 6)
┌──────────────────┬─────────────────┬─────────────────┬─────────────────┬─────────────────┬───────┐
│ OutlierIQR_Focus ┆ OutlierIQR_MaxI ┆ OutlierIQR_Mean ┆ OutlierIQR_Powe ┆ OutlierIQR_StdI ┆ total │
│ Score_-0.0030352 ┆ ntensity_0.4288 ┆ Intensity_-0.00 ┆ rLogLogSlope_-2 ┆ ntensity_0.0023 ┆ ---   │
│ …                ┆ 39…             ┆ 61…             ┆ .4…             ┆ 40…             ┆ i64   │
│ ---              ┆ ---             ┆ ---             ┆ ---             ┆ ---             ┆       │
│ i64              ┆ i64             ┆ i64             ┆ i64             ┆ i64             ┆       │
╞══════════════════╪═════════════════╪═════════════════╪═════════════════╪═════════════════╪═══════╡
│ 73               ┆ 99              ┆ 13              ┆ 290             ┆ 6

In [58]:
outlier_flaged_columns

[]

In [None]:
import plotly.figure_factory as ff
import numpy as np

for plate in plate_names:
    plate_data = data.filter(pl.col("Metadata_Barcode") == plate)
    heatmap_data = []
    heatmap_data_annot = []
    for row in rows:
        heatmap_row = []
        heatmap_row_annot = []
        for column in columns:
            well = row + column
            count_nuclei = plate_data.filter(pl.col("Metadata_Well") == well)[
                "Count_nuclei"
            ].to_numpy()

            # If the value is NaN, convert it to a specific value (like -1 or 0)
            if count_nuclei.size == 0:
                well_nuclei_count = (
                    0  # Or whatever value you'd like to use for missing data
                )
            else:
                well_nuclei_count = np.mean(count_nuclei).round(decimals=0).astype(int)

            heatmap_row.append(well_nuclei_count)
            heatmap_row_annot.append(f'{well}: {well_nuclei_count}')
        heatmap_data.append(heatmap_row)
        heatmap_data_annot.append(heatmap_row_annot)

    annotation_text = [["" for _ in range(len(row))] for row in heatmap_data]
    fig = ff.create_annotated_heatmap(
        heatmap_data,
        x=[i + 1 for i in range(24)],
        y=rows,
        annotation_text=annotation_text,
        colorscale="OrRd",
        hovertext=heatmap_data_annot,
        hoverinfo="text",
    )
    fig.update_layout(title_text=f"Plate: {plate}", width=700)
    fig.update_xaxes(side="bottom")
    fig["layout"]["yaxis"]["autorange"] = "reversed"
    fig.show()

In [None]:
import plotly.figure_factory as ff
import plotly.subplots as sp
import numpy as np

# Define the number of columns for your grid
plot_size = 400
font_ratio = plot_size/400
num_columns = 2
num_rows = -(-len(plate_names) // num_columns)  # Ceiling division to get number of rows needed

# Create a subplot with num_rows rows and num_columns columns
fig = sp.make_subplots(rows=num_rows, cols=num_columns, subplot_titles=plate_names)

for index, plate in enumerate(plate_names):
    plate_data = data.filter(pl.col('Metadata_Barcode') == plate)
    heatmap_data = []
    heatmap_data_annot = []
    for row in rows:
        heatmap_row = []
        heatmap_row_annot = []
        for column in columns:
            well = row + column
            count_nuclei = plate_data.filter(pl.col('Metadata_Well') == well)['Count_nuclei'].to_numpy()
            
            if count_nuclei.size == 0:
                well_nuclei_count = 0
            else:
                well_nuclei_count = np.mean(count_nuclei).round(decimals = 0).astype(int)
            
            heatmap_row.append(well_nuclei_count)
            heatmap_row_annot.append(f'{well}: {well_nuclei_count}')
        heatmap_data.append(heatmap_row)
        heatmap_data_annot.append(heatmap_row_annot)

    # Calculate the subplot row and column indices
    subplot_row = index // num_columns + 1
    subplot_col = index % num_columns + 1
    
    heatmap = ff.create_annotated_heatmap(
        heatmap_data,
        x=[str(i+1) for i in range(24)],
        y=rows,
        annotation_text=heatmap_data,
        colorscale='OrRd',
        hovertext=heatmap_data_annot,
        hoverinfo='text'
    )

    # Add the heatmap to the subplot
    fig.add_trace(heatmap.data[0], row=subplot_row, col=subplot_col)

# Update x and y axes properties
for i in fig['layout']['annotations']:
    i['font'] = dict(size=12*font_ratio)
fig.update_xaxes(tickfont=dict(size=10*font_ratio), nticks=48, side='bottom')
fig.update_yaxes(autorange="reversed", tickfont=dict(size=10))
# fig.update_yaxes(tickfont=dict(size=10*font_ratio))
fig.update_layout(height=plot_size*num_rows, width=plot_size*1.425*num_columns)
fig.show()

In [None]:
import plotly.subplots as sp
import plotly.graph_objects as go

# Defining a color list
colors = ['blue', 'green', 'red', 'purple', 'orange']

fig = sp.make_subplots(rows=len(image_quality_measures), cols=1, subplot_titles=image_quality_measures, x_title='Plates')

for x in range(len(image_quality_measures)):
    CurrentDataFrame = data_frame_dictionary.get(data_frame_list[x])
    
    min_val = CurrentDataFrame.min().to_numpy().min()  # minimum of all columns
    max_val = CurrentDataFrame.max().to_numpy().max()  # maximum of all columns
    for i, column in enumerate(CurrentDataFrame.columns):
        channel_name = channel_names[i]
        show_in_legend = (x == 0)
        
        fig.add_trace(
            go.Scatter(
                x=[str(j) for j in range(CurrentDataFrame.height)],
                y=CurrentDataFrame[column],
                mode='lines',
                line=dict(width=0.5, color=colors[i % len(colors)]),
                showlegend=False,
                name=channel_name if not show_in_legend else "",
                legendgroup=channel_name, 
            ),
            row=x + 1,
            col=1,
        )

    fig.update_xaxes(range=[0, CurrentDataFrame.height], showticklabels=False, row=x+1, col=1)

    fig.add_shape(type="line",
        xref="x", yref="paper",
        x0=CurrentDataFrame.height/2, y0=min_val, x1=CurrentDataFrame.height/2, y1=max_val,
        line=dict(
            color="Black",
            width=1,
            dash="dashdot",
        ),
        row=x + 1,
        col=1
    )

# Dummy traces for the legend
for i, channel_name in enumerate(channel_names):
    fig.add_trace(
        go.Scatter(
            x=[None],  # these traces won't appear
            y=[None],
            mode='lines',
            line=dict(width=3, color=colors[i % len(colors)]),  # this will be the width in the legend
            legendgroup=channel_name,
            name=channel_name,  # this will be the name in the legend
        ),
    )

# Add main title
fig.update_layout(height=1.8*len(image_quality_measures)*100, title_text=NameContains, title_x=0.1, width=1400)

fig.show()


In [None]:
import polars as pl

def norm_std_df(df: pl.DataFrame, method='standardize'):
    methods = {
        'normalize': lambda x: (x - x.min()) / (x.max() - x.min()),
        'standardize': lambda x: (x - x.mean()) / x.std(ddof=1)
    }
    
    df = df.select(
        [
            (
                methods[method](df[col])
                if df[col].dtype in [pl.Float32, pl.Float64, pl.Int32, pl.Int64]
                else df[col]
            ).alias(col)
            for col in df.columns
        ]
    )
    return df


import polars as pl
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np

def skl_norm_std_df(df: pl.DataFrame, method='standardize'):
    if method == 'standardize':
        scaler = StandardScaler()
    elif method == 'normalize':
        scaler = MinMaxScaler()
    else:
        raise ValueError(f"Invalid method {method}, expected 'standardize' or 'normalize'.")

    # Identify numeric columns
    numeric_cols = [col for col in df.columns if df[col].dtype in [pl.Float32, pl.Float64]]

    # Convert numeric columns to pandas DataFrame, scale them, and then convert back to Polars DataFrame
    for col in numeric_cols:
        pandas_df = df[col].to_pandas()
        transformed_col = scaler.fit_transform(pandas_df.values.reshape(-1,1))
        transformed_series = pl.Series(col, transformed_col.ravel())
        df = df.with_columns(transformed_series)

    return df


norm_std_df(data_frame_dictionary.get(data_frame_list[0]), method='standardize'), skl_norm_std_df(data_frame_dictionary.get(data_frame_list[0]), method='standardize')

In [None]:
import plotly.subplots as sp
import plotly.graph_objects as go

# Defining a color list
colors = ['blue', 'green', 'red', 'purple', 'orange']

fig = sp.make_subplots(rows=len(image_quality_measures), cols=1, subplot_titles=image_quality_measures, x_title='Plates')

for x in range(len(image_quality_measures)):
    CurrentDataFrame = data_frame_dictionary.get(data_frame_list[x])
    CurrentDataFrame = skl_norm_std_df(CurrentDataFrame)   # scaled df
    min_val = CurrentDataFrame.min().to_numpy().min()  # minimum of all columns
    max_val = CurrentDataFrame.max().to_numpy().max()  # maximum of all columns
    for i, column in enumerate(CurrentDataFrame.columns):
        channel_name = channel_names[i]
        show_in_legend = (x == 0)
        
        fig.add_trace(
            go.Scatter(
                x=[str(j) for j in range(CurrentDataFrame.height)],
                y=CurrentDataFrame[column],
                mode='lines',
                line=dict(width=0.5, color=colors[i % len(colors)]),
                showlegend=False,
                name=channel_name if not show_in_legend else "",
                legendgroup=channel_name, 
            ),
            row=x + 1,
            col=1,
        )

    fig.update_xaxes(range=[0, CurrentDataFrame.height], showticklabels=False, row=x+1, col=1)
    fig.update_yaxes(range=[-5, 5], row=x+1, col=1)

    fig.add_shape(type="line",
        xref="x", yref="paper",
        x0=CurrentDataFrame.height/2, y0=min_val, x1=CurrentDataFrame.height/2, y1=max_val,
        line=dict(
            color="Black",
            width=1,
            dash="dashdot",
        ),
        row=x + 1,
        col=1
    )

# Dummy traces for the legend
for i, channel_name in enumerate(channel_names):
    fig.add_trace(
        go.Scatter(
            x=[None],  # these traces won't appear
            y=[None],
            mode='lines',
            line=dict(width=3, color=colors[i % len(colors)]),  # this will be the width in the legend
            legendgroup=channel_name,
            name=channel_name,  # this will be the name in the legend
        ),
    )

# Add main title
fig.update_layout(height=1.8*len(image_quality_measures)*100, title_text=NameContains, title_x=0.1, width=1400)

fig.show()
