In [26]:
import pandas as pd
import os
import glob

In [27]:
results_path = "/home/schmied.christopher/FMP_Docs/Projects/eu_os_ecbl_qc/results/"

In [28]:
status_file = pd.read_csv(os.path.join(results_path, "Processing_Status.csv"))

In [29]:
def check_plate_id_match(array_1, array_2):
    set1 = set(array_1)
    set2 = set(array_2)

    if set1 == set2:
        print("Plate IDs of processed plates match expected IDs exactly")
    else:
        print("Plate IDs of processed plates do NOT match expected IDs")

        print("Missing in arr2:", sorted(set1 - set2))
        print("Extra in arr2:", sorted(set2 - set1))

# Check USC

In [30]:
# Get USC files that are marked for processing
status_file_usc = status_file[status_file["source"] == "USC"]
status_file_usc_processed = status_file_usc[status_file_usc["process_folder"] == True]

In [31]:
# Check if all expected plate IDs are present in the processed files
# USC plate ID range: C1157 - C1229
expected_plates_usc = [f"C{i}" for i in range(1157, 1230)]
print("USC number of expected plates: " + str(len(expected_plates_usc)))

processed_plates_usc = status_file_usc_processed["plate_name"].unique()

remove_control_plates = {"USC01", "USC02", "USC03", "USC04", "USC05", "USC06"}
processed_plates_usc_filtered = [x for x in processed_plates_usc if x not in remove_control_plates ]

print("USC number of processed plates: " + str(len(processed_plates_usc_filtered)))

check_plate_id_match(expected_plates_usc, processed_plates_usc_filtered)


USC number of expected plates: 73
USC number of processed plates: 73
Plate IDs of processed plates match expected IDs exactly


In [32]:
# Check if all expected plates have 4 replicates
replicate_counts = status_file_usc_processed.groupby("plate_name")["replicate_number"].nunique()
plates_with_4_replicates = replicate_counts[replicate_counts == 4].index.tolist()
print("Number of plates with 4 replicates: " + str(len(plates_with_4_replicates)))  

print("Checking if plates with 4 replicates match expected plates...")
check_plate_id_match(expected_plates_usc, plates_with_4_replicates)
plates_with_less_than_4_replicates = replicate_counts[replicate_counts < 4].index.tolist()
print("Plates with less than 4 replicates: " + str(plates_with_less_than_4_replicates))

Number of plates with 4 replicates: 73
Checking if plates with 4 replicates match expected plates...
Plate IDs of processed plates match expected IDs exactly
Plates with less than 4 replicates: ['USC01', 'USC02', 'USC03', 'USC04', 'USC05', 'USC06']


In [33]:
print("Checking if plates with less than 4 replicates match expected plates...")
check_plate_id_match(expected_plates_usc, plates_with_less_than_4_replicates)

Checking if plates with less than 4 replicates match expected plates...
Plate IDs of processed plates do NOT match expected IDs
Missing in arr2: ['C1157', 'C1158', 'C1159', 'C1160', 'C1161', 'C1162', 'C1163', 'C1164', 'C1165', 'C1166', 'C1167', 'C1168', 'C1169', 'C1170', 'C1171', 'C1172', 'C1173', 'C1174', 'C1175', 'C1176', 'C1177', 'C1178', 'C1179', 'C1180', 'C1181', 'C1182', 'C1183', 'C1184', 'C1185', 'C1186', 'C1187', 'C1188', 'C1189', 'C1190', 'C1191', 'C1192', 'C1193', 'C1194', 'C1195', 'C1196', 'C1197', 'C1198', 'C1199', 'C1200', 'C1201', 'C1202', 'C1203', 'C1204', 'C1205', 'C1206', 'C1207', 'C1208', 'C1209', 'C1210', 'C1211', 'C1212', 'C1213', 'C1214', 'C1215', 'C1216', 'C1217', 'C1218', 'C1219', 'C1220', 'C1221', 'C1222', 'C1223', 'C1224', 'C1225', 'C1226', 'C1227', 'C1228', 'C1229']
Extra in arr2: ['USC01', 'USC02', 'USC03', 'USC04', 'USC05', 'USC06']


# Check MEDINA

In [34]:
# Get USC files that are marked for processing
status_file_medina = status_file[status_file["source"] == "MEDINA"]
status_file_medina_processed = status_file_medina[status_file_medina["process_folder"] == True]

In [35]:
# MEDINA Plate ID range: C1011 - C1083
expected_plates_medina = [f"C{i}" for i in range(1011, 1084)]
print("MEDINA number of expected plates: " + str(len(expected_plates_medina)))

processed_plates_medina = status_file_medina["plate_name"].unique()
remove_control_plates = {"MED02", "MED03"}
processed_plates_medina_filtered = [x for x in processed_plates_medina if x not in remove_control_plates ]

print("MEDINA number of processed plates: " + str(len(processed_plates_medina_filtered)))

check_plate_id_match(expected_plates_medina, processed_plates_medina_filtered)

MEDINA number of expected plates: 73
MEDINA number of processed plates: 73
Plate IDs of processed plates match expected IDs exactly


In [None]:
# Check if all expected plates have 4 replicates
replicate_counts_medina = status_file_medina_processed.groupby("plate_name")["replicate_number"].nunique()
plates_with_4_replicates_medina = replicate_counts_medina[replicate_counts_medina == 4].index.tolist()
print("Number of plates with 4 replicates: " + str(len(plates_with_4_replicates_medina)))  

print("Checking if plates with 4 replicates match expected plates...")
check_plate_id_match(expected_plates_medina, plates_with_4_replicates_medina)

plates_with_less_than_4_replicates_medina = replicate_counts_medina[replicate_counts_medina < 4].index.tolist()
print("Plates with less than 4 replicates: " + str(plates_with_less_than_4_replicates_medina))

Number of plates with 4 replicates: 73
Checking if plates with 4 replicates match expected plates...
Plate IDs of processed plates match expected IDs exactly
Plates with less than 4 replicates: ['MED02', 'MED03']


In [None]:
# Just to check logic with the positive control plates, which should have less than 4 replicates
print("Checking if plates with less than 4 replicates match expected plates...")
check_plate_id_match(expected_plates_medina, plates_with_less_than_4_replicates_medina)

Checking if plates with less than 4 replicates match expected plates...
Plate IDs of processed plates do NOT match expected IDs
Missing in arr2: ['C1011', 'C1012', 'C1013', 'C1014', 'C1015', 'C1016', 'C1017', 'C1018', 'C1019', 'C1020', 'C1021', 'C1022', 'C1023', 'C1024', 'C1025', 'C1026', 'C1027', 'C1028', 'C1029', 'C1030', 'C1031', 'C1032', 'C1033', 'C1034', 'C1035', 'C1036', 'C1037', 'C1038', 'C1039', 'C1040', 'C1041', 'C1042', 'C1043', 'C1044', 'C1045', 'C1046', 'C1047', 'C1048', 'C1049', 'C1050', 'C1051', 'C1052', 'C1053', 'C1054', 'C1055', 'C1056', 'C1057', 'C1058', 'C1059', 'C1060', 'C1061', 'C1062', 'C1063', 'C1064', 'C1065', 'C1066', 'C1067', 'C1068', 'C1069', 'C1070', 'C1071', 'C1072', 'C1073', 'C1074', 'C1075', 'C1076', 'C1077', 'C1078', 'C1079', 'C1080', 'C1081', 'C1082', 'C1083']
Extra in arr2: ['MED02', 'MED03']
