In [5]:
#only relative paths are specified for reasons of data privacy

import csv

def read_barcodes_from_sample_sheet(sample_sheet_path):
    with open(sample_sheet_path, newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        barcodes = [row['barcode'] for row in reader]
    return barcodes

def find_common_part(barcode1, barcode2):
    prefix1, replicate1 = barcode1.rsplit('-', 1)
    prefix2, replicate2 = barcode2.rsplit('-', 1)

    # Split the prefix by the experiment number (e.g., 929-001)
    parts1 = prefix1.split('-')
    parts2 = prefix2.split('-')

    # Find the common part of the prefix before and after the experiment number
    common_part_before = ''.join(c1 for c1, c2 in zip(parts1[:-1], parts2[:-1]) if c1 == c2)
    common_part_after = ''.join(c1 for c1, c2 in zip(parts1[1:], parts2[1:]) if c1 == c2)

    # If the replicates are different and both common parts are not empty,
    # return the combined common part
    if replicate1 != replicate2 and common_part_before and common_part_after:
        return f"{common_part_before}-{common_part_after}"

def find_replicate_pairs(barcodes):
    replicates = {}
    while barcodes:
        barcode1 = barcodes.pop(0)
        for barcode2 in barcodes:
            if barcode1 != barcode2:
                common_part = find_common_part(barcode1, barcode2)
                if common_part:
                    if common_part not in replicates:
                        replicates[common_part] = []
                    if barcode1 not in replicates[common_part] and barcode2 not in replicates[common_part]:
                        replicates[common_part].extend([barcode1, barcode2])
                        barcodes.remove(barcode2)
                        break
    return replicates

# Specify the path to the sample sheet
barcode_input_path = '*/sample_sheet.csv'

# Specify the output directory and file
output_directory = '*/pipeline_out/'

output_file = 'replicate_pairs.csv'

# Read barcodes from the sample sheet
barcodes = read_barcodes_from_sample_sheet(barcode_input_path)
print(f"Barcodes read from sample sheet: {barcodes}")

# Find the replicate pairs
replicate_pairs = find_replicate_pairs(barcodes)

# Print the number of replicate pairs found
num_replicate_pairs = len(replicate_pairs)
print(f"Number of replicate pairs found: {num_replicate_pairs}")

# Write the replicate pairs to the output CSV file
with open(f'{output_directory}/{output_file}', 'w', newline='') as csvfile:
    fieldnames = ['replicate1', 'replicate2']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for replicates in replicate_pairs.values():
        writer.writerow({'replicate1': replicates[0], 'replicate2': replicates[1]})

# Print the replicate pairs
for common_part, replicates in replicate_pairs.items():
    print(f"Common part: {common_part}, Replicate pair: {replicates}")

Barcodes read from sample sheet: ['MZ-CR140-Ex203-2-929-001-H3K9ac-R1', 'MZ-CR141-Ex203-3-929-001-H3K27ac-R1', 'MZ-CR142-Ex203-4-929-001-H3K4me3-R1', 'MZ-CR143-Ex203-5-929-001-KAT2A-R1', 'MZ-CR144-Ex203-6-929-001-Pol-II-S2P-R1', 'MZ-CR146-Ex203-8-929-001-H3K9ac-R2', 'MZ-CR147-Ex203-9-929-001-H3K27ac-R2', 'MZ-CR148-Ex203-10-929-001-H3K4me3-R2', 'MZ-CR149-Ex203-11-929-001-KAT2A-R2', 'MZ-CR150-Ex203-12-929-001-Pol-II-S2P-R2', 'MZ-CR152-Ex203-14-928-010-H3K9ac-R1', 'MZ-CR153-Ex203-15-928-010-H3K27ac-R1', 'MZ-CR154-Ex203-16-928-010-H3K4me3-R1', 'MZ-CR155-Ex203-17-928-010-KAT2A-R1', 'MZ-CR156-Ex203-18-928-010-Pol-II-S2P-R1', 'MZ-CR158-Ex203-20-928-010-H3K9ac-R2', 'MZ-CR159-Ex203-21-928-010-H3K27ac-R2', 'MZ-CR160-Ex203-22-928-010-H3K4me3-R2', 'MZ-CR161-Ex203-23-928-010-KAT2A-R2', 'MZ-CR162-Ex203-24-928-010-Pol-II-S2P-R2', 'MZ-CR164-Ex206-50-13B-24-H3K9ac-R1', 'MZ-CR165-Ex206-51-13B-24-H3K27ac-R1', 'MZ-CR166-Ex206-52-13B-24-H3K4me3-R1', 'MZ-CR167-Ex206-53-13B-24-KAT2A-R1', 'MZ-CR168-Ex206-54-1