In [None]:
import pandas as pd
import os
import glob

In [None]:
results_path = "/home/schmied.christopher/FMP_Docs/Projects/eu_os_ecbl_qc/results/"

In [None]:
status_file = pd.read_csv(os.path.join(results_path, "Processing_Status.csv"))

In [None]:
def check_plate_id_match(array_1, array_2):
    set1 = set(array_1)
    set2 = set(array_2)

    if set1 == set2:
        print("Plate IDs of processed plates match expected IDs exactly")
    else:
        print("Plate IDs of processed plates do NOT match expected IDs")

        print("Missing in arr2:", sorted(set1 - set2))
        print("Extra in arr2:", sorted(set2 - set1))

# Check USC

In [None]:
# Get USC files that are marked for processing
status_file_usc = status_file[status_file["source"] == "USC"]
status_file_usc_processed = status_file_usc[status_file_usc["process_folder"] == True]

In [None]:
# Check if all expected plate IDs are present in the processed files
# USC plate ID range: C1157 - C1229
expected_plates_usc = [f"C{i}" for i in range(1157, 1230)]
print("USC number of expected plates: " + str(len(expected_plates_usc)))

processed_plates_usc = status_file_usc_processed["plate_name"].unique()

remove_control_plates = {"USC01", "USC02", "USC03", "USC04", "USC05", "USC06"}
processed_plates_usc_filtered = [x for x in processed_plates_usc if x not in remove_control_plates ]

print("USC number of processed plates: " + str(len(processed_plates_usc_filtered)))

check_plate_id_match(expected_plates_usc, processed_plates_usc_filtered)


In [None]:
# Check if all expected plates have 4 replicates
replicate_counts = status_file_usc_processed.groupby("plate_name")["replicate_number"].nunique()
plates_with_4_replicates = replicate_counts[replicate_counts == 4].index.tolist()
print("Number of plates with 4 replicates: " + str(len(plates_with_4_replicates)))  

print("Checking if plates with 4 replicates match expected plates...")
check_plate_id_match(expected_plates_usc, plates_with_4_replicates)
plates_with_less_than_4_replicates = replicate_counts[replicate_counts < 4].index.tolist()
print("Plates with less than 4 replicates: " + str(plates_with_less_than_4_replicates))

In [None]:
print("Checking if plates with less than 4 replicates match expected plates...")
check_plate_id_match(expected_plates_usc, plates_with_less_than_4_replicates)

# Check MEDINA

In [None]:
# Get USC files that are marked for processing
status_file_medina = status_file[status_file["source"] == "MEDINA"]
status_file_medina_processed = status_file_medina[status_file_medina["process_folder"] == True]

In [None]:
# MEDINA Plate ID range: C1011 - C1083
expected_plates_medina = [f"C{i}" for i in range(1011, 1084)]
print("MEDINA number of expected plates: " + str(len(expected_plates_medina)))

processed_plates_medina = status_file_medina["plate_name"].unique()
remove_control_plates = {"MED02", "MED03"}
processed_plates_medina_filtered = [x for x in processed_plates_medina if x not in remove_control_plates ]

print("MEDINA number of processed plates: " + str(len(processed_plates_medina_filtered)))

check_plate_id_match(expected_plates_medina, processed_plates_medina_filtered)

In [None]:
# Check if all expected plates have 4 replicates
replicate_counts_medina = status_file_medina_processed.groupby("plate_name")["replicate_number"].nunique()
plates_with_4_replicates_medina = replicate_counts_medina[replicate_counts_medina == 4].index.tolist()
print("Number of plates with 4 replicates: " + str(len(plates_with_4_replicates_medina)))  

print("Checking if plates with 4 replicates match expected plates...")
check_plate_id_match(expected_plates_medina, plates_with_4_replicates_medina)

plates_with_less_than_4_replicates_medina = replicate_counts_medina[replicate_counts_medina < 4].index.tolist()
print("Plates with less than 4 replicates: " + str(plates_with_less_than_4_replicates_medina))

In [None]:
# Just to check logic with the positive control plates, which should have less than 4 replicates
print("Checking if plates with less than 4 replicates match expected plates...")
check_plate_id_match(expected_plates_medina, plates_with_less_than_4_replicates_medina)