In [90]:
import pandas as pd
import re
dataset = pd.read_csv('specimen_data.csv')

In [86]:
# Create exclusion groups of expected specimens; if a specimen is received from this group, we don't want any more
specimen_group_ABC_scr = [
    'Screening', 'Cycle 01 Day 01', 'Cycle 02 Day 01 (1)',
    'Cycle 02 Day 02 (1)', 'Cycle 02 Day 08 (1)', 'Cycle 02 Day 15 (1)'
    ]
specimen_group_ABC_c3 = [
    'Cycle 03 Day 01 (1)', 'Cycle 03 Day 02 (1)', 'Cycle 03 Day 03 (1)',
    'Cycle 03 Day 08 (1)', 'Cycle 03 Day 15 (1)', 'Cycle 03 Day 22 (1)',
    'Cycle 04 Day 01 (1)', 'Cycle 04 Day 02 (1)', 'Cycle 04 Day 03 (1)',
    'Cycle 05 Day 01 (1)', 'Cycle 05 Day 02 (1)', 'Cycle 05 Day 03 (1)'
]

specimen_group_AB_c6 = [
    'Cycle 06 Day 01 (1)', 'Cycle 06 Day 02 (1)', 'Cycle 06 Day 03 (1)',
    'Cycle 07 Day 01 (1)', 'Cycle 08 Day 01 (1)'
]

specimen_group_C_c6 = [
    'Cycle 06 Day 01 (1)', 'Cycle 06 Day 02 (1)', 'Cycle 06 Day 03 (1)'
]

specimen_group_AB_c9 = [
     'Cycle 09 Day 01 (1)', 'Cycle 10 Day 01 (1)', 'Cycle 11 Day 01 (1)'
     ]

specimen_group_AB_c12 = [
    'Cycle 12 Day 01 (1)', 'Cycle 13 Day 01 (1)'
]

specimen_group_AB_c14 = ['Cycle 14 Day 01 (1)']

specimen_group_ABC_sfu = ['Safety Follow-up Visit (1)']

In [87]:
# Initialize empty lists and sample counter
subj_list = []
expected_samples_list = []
sample_counter = 0
# Create RegEx pattern to search for all possible Post-Treatment Follow-up visits
pattern = re.compile(r'Post-Treatment Follow-up \(\d*\)')

In [88]:
# Loop through index and row of dataset
for index, row in dataset.iterrows():
    # Check what arm patient belongs to; move to Arm C loop if Arm C
    if row[5] != 'C':  
        # Check if subj ID already exists in our dictionary, add if it doesn't
        if row[0] not in subj_list:
            # Reset suspend list for a new patient
            suspend_list = []
            # Add subject ID to our subject list
            subj_list.append(row[0])
            # Add our current tally of expected specimens to the sample list
            expected_samples_list.append(sample_counter)
            # Reset the counter, since this will be reset every time the script sees a new patient
            sample_counter = 0
            # Check to see if the Visit in the Visits column is not suspended and is one of the specimen types below
            if (row[2] not in suspend_list) and (row[2] == 'Screening'
                                            or row[2] == 'Cycle 01 Day 01'
                                            or row[2] == 'Cycle 02 Day 01 (1)'
                                            or row[2] == 'Cycle 02 Day 02 (1)'
                                            or row[2] == 'Cycle 02 Day 08 (1)'
                                            or row[2] == 'Cycle 02 Day 15 (1)'):
                # Increment the sample counter by 1 since the visit is one of the ones above
                sample_counter = sample_counter + 1
                # Add all specimens above (in the specimen_group_scr list) to the suspend list
                suspend_list.extend(specimen_group_ABC_scr)
            # Utilize the same logic above, for the C3 specimens
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 03 Day 01 (1)'
                                            or row[2] == 'Cycle 03 Day 02 (1)'
                                            or row[2] == 'Cycle 03 Day 03 (1)'
                                            or row[2] == 'Cycle 03 Day 08 (1)'
                                            or row[2] == 'Cycle 03 Day 15 (1)'
                                            or row[2] == 'Cycle 03 Day 22 (1)'
                                            or row[2] == 'Cycle 04 Day 01 (1)'
                                            or row[2] == 'Cycle 04 Day 02 (1)'
                                            or row[2] == 'Cycle 04 Day 03 (1)'
                                            or row[2] == 'Cycle 05 Day 01 (1)'
                                            or row[2] == 'Cycle 05 Day 02 (1)'
                                            or row[2] == 'Cycle 05 Day 03 (1)'):
                sample_counter = sample_counter + 1
                suspend_list.extend(specimen_group_ABC_c3)
            # Utilize the same logic above, for the C6 specimens
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 06 Day 01 (1)'
                                            or row[2] == 'Cycle 06 Day 02 (1)'
                                            or row[2] == 'Cycle 06 Day 03 (1)'
                                            or row[2] == 'Cycle 07 Day 01 (1)'
                                            or row[2] == 'Cycle 08 Day 01 (1)'):
                sample_counter = sample_counter + 1
                suspend_list.extend(specimen_group_AB_c6)
            # Utilize the same logic above, for the C9 specimens
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 09 Day 01 (1)'
                                            or row[2] == 'Cycle 10 Day 01 (1)'
                                            or row[2] == 'Cycle 11 Day 01 (1)'):
                sample_counter = sample_counter + 1
                suspend_list.extend(specimen_group_AB_c9)
            # Utilize the same logic above, for the C12 specimens
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 12 Day 01 (1)'
                                            or row[2] == 'Cycle 13 Day 01 (1)'
                                            or row[2] == 'Cycle 14 Day 01 (1)'):
                sample_counter = sample_counter + 1
                suspend_list.extend(specimen_group_AB_c12)
            # Utilize the same logic above, for the SFU visit
            elif (row[2] not in suspend_list) and (row[2] == 'Safety Follow-up Visit (1)'):
                sample_counter = sample_counter + 1
                suspend_list.extend(specimen_group_ABC_sfu)
        # If the script does not see a new patient (ie the second row of a series of patient visits), proceed straight to incrementing counter
        else:
            # Follow same logic above to exclude visits and increment the specimen counter
            if (row[2] not in suspend_list) and (row[2] == 'Screening'
                                            or row[2] == 'Cycle 01 Day 01'
                                            or row[2] == 'Cycle 02 Day 01 (1)'
                                            or row[2] == 'Cycle 02 Day 02 (1)'
                                            or row[2] == 'Cycle 02 Day 08 (1)'
                                            or row[2] == 'Cycle 02 Day 15 (1)'):
                suspend_list.extend(specimen_group_AB_scr)
                sample_counter = sample_counter + 1
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 03 Day 01 (1)'
                                            or row[2] == 'Cycle 03 Day 02 (1)'
                                            or row[2] == 'Cycle 03 Day 03 (1)'
                                            or row[2] == 'Cycle 03 Day 08 (1)'
                                            or row[2] == 'Cycle 03 Day 15 (1)'
                                            or row[2] == 'Cycle 03 Day 22 (1)'
                                            or row[2] == 'Cycle 04 Day 01 (1)'
                                            or row[2] == 'Cycle 04 Day 02 (1)'
                                            or row[2] == 'Cycle 04 Day 03 (1)'
                                            or row[2] == 'Cycle 05 Day 01 (1)'
                                            or row[2] == 'Cycle 05 Day 02 (1)'
                                            or row[2] == 'Cycle 05 Day 03 (1)'):
                suspend_list.extend(specimen_group_ABC_c3)
                sample_counter = sample_counter + 1
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 06 Day 01 (1)'
                                            or row[2] == 'Cycle 06 Day 02 (1)'
                                            or row[2] == 'Cycle 06 Day 03 (1)'
                                            or row[2] == 'Cycle 07 Day 01 (1)'
                                            or row[2] == 'Cycle 08 Day 01 (1)'):
                suspend_list.extend(specimen_group_AB_c6)
                sample_counter = sample_counter + 1
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 09 Day 01 (1)'
                                            or row[2] == 'Cycle 10 Day 01 (1)'
                                            or row[2] == 'Cycle 11 Day 01 (1)'):
                suspend_list.extend(specimen_group_AB_c9)
                sample_counter = sample_counter + 1
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 12 Day 01 (1)'
                                            or row[2] == 'Cycle 13 Day 01 (1)'
                                            or row[2] == 'Cycle 14 Day 01 (1)'):
                suspend_list.extend(specimen_group_AB_c12)
                sample_counter = sample_counter + 1
            elif (row[2] not in suspend_list) and row[2] == 'Cycle 14 Day 01 (1)':
                suspend_list.extend(specimen_group_AB_c14)
                sample_counter = sample_counter + 1
            elif (row[2] not in suspend_list) and (row[2] == 'Safety Follow-up Visit (1)'):
                suspend_list.extend(specimen_group_ABC_sfu)
                sample_counter = sample_counter + 1
            elif (row[2] not in suspend_list) and (re.search(pattern, row[2])):
                suspend_list.append(row[2])
                sample_counter = sample_counter + 1
    # If Arm C patient
    else:
        if row[0] not in subj_list:
            # Reset suspend list for a new patient
            suspend_list = []
            # Add subject ID to our subject list
            subj_list.append(row[0])
            # Add our current tally of expected specimens to the sample list
            expected_samples_list.append(sample_counter)
            # Reset the counter, since this will be reset every time the script sees a new patient
            sample_counter = 0
            # Check to see if the Visit in the Visits column is not suspended and is one of the specimen types below
            if (row[2] not in suspend_list) and (row[2] == 'Screening'
                                            or row[2] == 'Cycle 01 Day 01'
                                            or row[2] == 'Cycle 02 Day 01 (1)'
                                            or row[2] == 'Cycle 02 Day 02 (1)'
                                            or row[2] == 'Cycle 02 Day 08 (1)'
                                            or row[2] == 'Cycle 02 Day 15 (1)'):
                # Increment the sample counter by 1 since the visit is one of the ones above
                sample_counter = sample_counter + 1
                # Add all specimens above (in the specimen_group_scr list) to the suspend list
                suspend_list.extend(specimen_group_ABC_scr)
            # Utilize the same logic above, for the C3 specimens
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 03 Day 01 (1)'
                                            or row[2] == 'Cycle 03 Day 02 (1)'
                                            or row[2] == 'Cycle 03 Day 03 (1)'
                                            or row[2] == 'Cycle 03 Day 08 (1)'
                                            or row[2] == 'Cycle 03 Day 15 (1)'
                                            or row[2] == 'Cycle 03 Day 22 (1)'
                                            or row[2] == 'Cycle 04 Day 01 (1)'
                                            or row[2] == 'Cycle 04 Day 02 (1)'
                                            or row[2] == 'Cycle 04 Day 03 (1)'
                                            or row[2] == 'Cycle 05 Day 01 (1)'
                                            or row[2] == 'Cycle 05 Day 02 (1)'
                                            or row[2] == 'Cycle 05 Day 03 (1)'):
                sample_counter = sample_counter + 1
                suspend_list.extend(specimen_group_ABC_c3)
            # Utilize the same logic above, for the C6 specimens
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 06 Day 01 (1)'
                                            or row[2] == 'Cycle 06 Day 02 (1)'
                                            or row[2] == 'Cycle 06 Day 03 (1)'):
                sample_counter = sample_counter + 1
                suspend_list.extend(specimen_group_C_c6)
            # Utilize the same logic above, for the SFU visit
            elif (row[2] not in suspend_list) and (row[2] == 'Safety Follow-up Visit (1)'):
                sample_counter = sample_counter + 1
                suspend_list.extend(specimen_group_ABC_sfu)
        # If the script does not see a new patient (ie the second row of a series of patient visits), proceed straight to incrementing counter
        else:
            # Follow same logic above to exclude visits and increment the specimen counter
            if (row[2] not in suspend_list) and (row[2] == 'Screening'
                                            or row[2] == 'Cycle 01 Day 01'
                                            or row[2] == 'Cycle 02 Day 01 (1)'
                                            or row[2] == 'Cycle 02 Day 02 (1)'
                                            or row[2] == 'Cycle 02 Day 08 (1)'
                                            or row[2] == 'Cycle 02 Day 15 (1)'):
                suspend_list.extend(specimen_group_ABC_scr)
                sample_counter = sample_counter + 1
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 03 Day 01 (1)'
                                            or row[2] == 'Cycle 03 Day 02 (1)'
                                            or row[2] == 'Cycle 03 Day 03 (1)'
                                            or row[2] == 'Cycle 03 Day 08 (1)'
                                            or row[2] == 'Cycle 03 Day 15 (1)'
                                            or row[2] == 'Cycle 03 Day 22 (1)'
                                            or row[2] == 'Cycle 04 Day 01 (1)'
                                            or row[2] == 'Cycle 04 Day 02 (1)'
                                            or row[2] == 'Cycle 04 Day 03 (1)'
                                            or row[2] == 'Cycle 05 Day 01 (1)'
                                            or row[2] == 'Cycle 05 Day 02 (1)'
                                            or row[2] == 'Cycle 05 Day 03 (1)'):
                suspend_list.extend(specimen_group_ABC_c3)
                sample_counter = sample_counter + 1
            elif (row[2]
                not in suspend_list) and (row[2] == 'Cycle 06 Day 01 (1)'
                                            or row[2] == 'Cycle 06 Day 02 (1)'
                                            or row[2] == 'Cycle 06 Day 03 (1)'):
                suspend_list.extend(specimen_group_C_c6)
                sample_counter = sample_counter + 1
            elif (row[2] not in suspend_list) and (row[2] == 'Safety Follow-up Visit (1)'):
                suspend_list.extend(specimen_group_ABC_sfu)
                sample_counter = sample_counter + 1
            elif (row[2] not in suspend_list) and (re.search(pattern, row[2])):
                suspend_list.append(row[2])
                sample_counter = sample_counter + 1

# Shift up list by 1 to account for initial 0 value            
expected_samples_list = expected_samples_list[1:]
# Shift down list by 1 to account for null subj value
subj_list = subj_list[:-1]

df = pd.DataFrame()
df['Subject ID-mod'] = subj_list
df['Expected Samples'] = expected_samples_list

In [89]:
df

Unnamed: 0,Subject ID-mod,Expected Samples
0,0025-001,10
1,0025-002,10
2,0025-003,9
3,0025-004,9
4,0025-005,9
5,0025-006,9
6,0025-007,9
7,0025-008,7
8,0025-009,6
9,0029-001,1
