Generate random barcodes that will be attached to each recent H3 strain that we're synthesizing. Note that we will generate each strain with pools of ~30-40x unique barcodes. But for the stock genes in the plasmid log, we want to attach a single known barcode to each.

In [4]:
import random
import csv

In [2]:
def gen_barcode():
    nucleotides = ['a','g','c','t']
    
    #create a random sequence without repeating strings of nucleotides
    bc = ''
    for i in range(0,16):
        if len(bc) >= 1:
            nucls = random.sample(nucleotides, 2)
            if bc[(i-1)] == nucls[0]:
                n = nucls[1]
                bc += n
            else:
                n = nucls[0]
                bc += n
        else:
            bc += random.choice(nucleotides)
    
    return bc

In [3]:
def gen_bc_list(ha_strains):
    bc_list = []
    for strain in ha_strains:
        bc = gen_barcode()
        bc_list.append((strain, bc))
    
    return bc_list

In [25]:
ha_strains = [3114, 3115, 3116, 3117, 3118, 3119, 3120, 3121, 3122, 3123, 3124, 3125]

bc_list = gen_bc_list(ha_strains)
bc_list

[(3114, 'gcacgtgctgcgtcgc'),
 (3115, 'gcacatacatgtacta'),
 (3116, 'acgcgtactagcagca'),
 (3117, 'atctagactcagctag'),
 (3118, 'cgcgatctatactatc'),
 (3119, 'tatctcatgatcgtcg'),
 (3120, 'gctcatacacagagca'),
 (3121, 'gagatatgatagtcgt'),
 (3122, 'gatgtacgtacgatgt'),
 (3123, 'tgcgatgtcactcaga'),
 (3124, 'actgtgcgtcgtcaga'),
 (3125, 'tgcatcactgtcagag')]

In [11]:
ha_strains_hensley = [3174, 3175, 3176, 3177, 3178, 3179, 3180]

bc_list_hensley = gen_bc_list(ha_strains_hensley)
bc_list_hensley

[(3174, 'ctgctcatcacgcgct'),
 (3175, 'gtagatgatgtagcag'),
 (3176, 'cagtagtagtctgtat'),
 (3177, 'atcacgcagcagagct'),
 (3178, 'catcgtctgcacgtag'),
 (3179, 'gtatcagtgactcgta'),
 (3180, 'cgtctcagcagcatga')]

In [14]:
bc_list_full = bc_list + bc_list_hensley

with open('recent_h3_bc.csv','w') as out:
    csv_out=csv.writer(out)
    csv_out.writerow(['strain','barcode'])
    for row in bc_list_full:
        csv_out.writerow(row)