# align plasmid sequencing with target library. 
20230703
sarahfong

input 

    - plasmid sequencing genbank files for primoridum. 
    - library .fa file
    
my plan

    - make a bowtie index file for library
    - get plasmid sequencing genbank files 

In [1]:
from Bio import SeqIO
from Bio.Align import PairwiseAligner
import config_readwrite as crw
import glob
import os
import sys
import pandas as pd

In [2]:
config_tag = "config"
config, cfn = crw.read_config(os.path.join(os.path.dirname(os.getcwd()), config_tag))

# write config

## colony sequencing data

In [22]:
section = "col_seq"
crw.check(config, section)

PATH = "/wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/"
GB_PATH = os.path.join(PATH, "99L8BQ_genbank_files")
BOWTIE_OUT = os.path.join(GB_PATH, "library_bowtie_alignments.txt")

config[section]["path"] = PATH
config[section]["path_gb"] = GB_PATH
config[section]["lib_alignment"] = BOWTIE_OUT

## bowtie paths

In [5]:
section = "bowtie"
crw.check(config, section)

PATH_BOWTIE = "/wynton/home/ahituv/fongsl/bin/bowtie"
BIN_BOWTIE = os.path.join(PATH_BOWTIE, "bowtie")

config[section]["path"] = PATH_BOWTIE
config[section]["bin"] = BIN_BOWTIE

## target library

In [6]:
section = "common.15mer.2mut"

#read
TARGET = config[section]["library_twist"]

#write
TARGET_FA = os.path.splitext(TARGET)[0]+".fa"

config[section]["library_twist_FA"] = TARGET_FA

# target file .fa

In [7]:
def fastaWriter(outfile, df):
    """
    input
        outfile (str) - path to outfile
        df (pd dataframe) - dataframe with one col w/ seq_id name, another col sequence. 
        
    method
    
        1. open outfile, if it is not already written
        2. iterate through the rows of the dataframe
        3. write seq_id, seq as a fasta file
    """
    
    #1 write fa
    if os.path.exists(outfile) is False:
        with open(outfile, "w") as writer:

            #2 iterrate through rows
            for row in df.iterrows():
                seq_id, seq = row[1][0], row[1][1]
                
                #3
                writer.write(f">{seq_id}\n{seq}\n")

        writer.close()
        print("wrote fasta", outfile)
    else:
        print("already wrote fasta", outfile)

## turn TSV into fa

In [8]:
# read tsv
df = pd.read_csv(TARGET, sep='\t')
fastaWriter(TARGET_FA, df)

wrote fasta /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.fa


## index fa w/ bowtie

In [9]:
TARGET_INDEX = os.path.splitext(TARGET_FA)[0] + ".index"

cmd = [BIN_BOWTIE + "-build", 
      TARGET_FA, 
      TARGET_INDEX
      ]

print(" ".join(cmd))
os.system(" ".join(cmd))

/wynton/home/ahituv/fongsl/bin/bowtie/bowtie-build /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.fa /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index
Settings:
  Output files: "/wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index.*.ebwt"
  Line rate: 6 (line is 64 bytes)
  Lines per side: 1 (side is 64 bytes)
  Offset rate: 5 (one in 32)
  FTable chars: 10
  Strings: unpacked
  Max bucket size: default
  Max bucket size, sqrt multiplier: default
  Max bucket size, len divisor: 4
  Difference-cover sample period: 1024
  Endianness: little
  Actual local endianness: little
  Sanity checking: disabled
  Assertions: disabled
  Random seed: 0
  Sizeofs: void*:8, int:4, long:8, size_t:8
Input files DNA, FASTA:
  /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.lib

  Sorting block time: 00:00:01
Returning block of 4257907 for bucket 1
Getting block 2 of 7
  Reserving size (4723265) for bucket 2
  Calculating Z arrays for bucket 2
  Entering block accumulator loop for bucket 2:
  bucket 2: 10%
  bucket 2: 20%
  bucket 2: 30%
  bucket 2: 40%
  bucket 2: 50%
  bucket 2: 60%
  bucket 2: 70%
  bucket 2: 80%
  bucket 2: 90%
  bucket 2: 100%
  Sorting block of length 4332955 for bucket 2
  (Using difference cover)
  Sorting block time: 00:00:01
Returning block of 4332956 for bucket 2
Getting block 3 of 7
  Reserving size (4723265) for bucket 3
  Calculating Z arrays for bucket 3
  Entering block accumulator loop for bucket 3:
  bucket 3: 10%
  bucket 3: 20%
  bucket 3: 30%
  bucket 3: 40%
  bucket 3: 50%
  bucket 3: 60%
  bucket 3: 70%
  bucket 3: 80%
  bucket 3: 90%
  bucket 3: 100%
  Sorting block of length 2631949 for bucket 3
  (Using difference cover)
  Sorting block time: 00:00:01
Returning block of 2631950 for bucket 3
Getting block 4 of 7
  Rese

0

# get genbank files

In [13]:
def makeWindows(sequence, windowsize):
    """
    input
        sequence (str) - sequence to slide along
        windowsize (int) - size of sequence windows
        
    method 
        1. make empty dictionary to collect
        2. compute n kmers using the sequence length and the number of windows.
        3. iterate through sequence, sliding one bp over. 
        4. add each window to collection dictionary
        
    return
        collection (dict) - keys are window numbers, values are sequences. 
    """
    
    
    #1
    collection = {}
    
    #2
    n_kmers, start = (len(sequence) - windowsize + 1), 0

    #3
    for start in np.arange(n_kmers):

        # get kmer from sequence
        kmer = sequence[start:start+windowsize].upper()
        
        #4
        collection[f"window.{start}"] = kmer

    return collection

In [27]:
def runBowtie(bin_bowtie, target_index, input_fa, outfile):

    # command to run bowtie with the target_index
    cmd = [
        bin_bowtie,
        "-x",
        target_index,
        "-f",
        input_fa,
        "-n 3", 
        ">>",
        outfile
    ]

    # run the command
    os.system(" ".join(cmd))
    
    print(" ".join(cmd))

## parse genbank files

In [31]:
WINDOWSIZE = 230

GB_FILE_QUERY = os.path.join(GB_PATH, "*.gbk")
GB_FILES = glob.glob(GB_FILE_QUERY)

for input_handle in GB_FILES:
    
    for record in SeqIO.parse(input_handle, "genbank"):
        sequence = record.seq  # get the sequence record. 
        sequence_id = os.path.split(input_handle)[1]
        
        # break up sequence into sliding window, 230bp long
        windows_dict = makeWindows("".join(sequence), WINDOWSIZE)

        # make a dataframe from dictionary
        windows_df = pd.DataFrame(windows_dict.items())
        windows_df[0] = windows_df[0]+"|"+sequence_id

        # add handle id to the window id
        file_fa = os.path.splitext(input_handle)[0]+".fa" 

        # write fa
        fastaWriter(file_fa, windows_df)
        
        # run bowtie alignment
        runBowtie(BIN_BOWTIE, TARGET_INDEX, file_fa, BOWTIE_OUT)


wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/15_15.fa


# reads processed: 7865
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7864 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/15_15.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/2_02.fa


# reads processed: 7865
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7864 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/2_02.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/9_09.fa


# reads processed: 7864
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7863 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/9_09.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/11_11.fa


# reads processed: 7882
# reads with at least one alignment: 0 (0.00%)
# reads that failed to align: 7882 (100.00%)
No alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/11_11.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/4_04.fa


# reads processed: 7865
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7864 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/4_04.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/13_13.fa


# reads processed: 7865
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7864 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/13_13.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/6_06.fa


# reads processed: 7863
# reads with at least one alignment: 0 (0.00%)
# reads that failed to align: 7863 (100.00%)
No alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/6_06.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/14_14.fa


# reads processed: 7865
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7864 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/14_14.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/1_01.fa


# reads processed: 7865
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7864 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/1_01.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/8_08.fa


# reads processed: 7865
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7864 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/8_08.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/16_16.fa


# reads processed: 7865
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7864 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/16_16.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/3_03.fa


# reads processed: 7865
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7864 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/3_03.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/5_05.fa


# reads processed: 7865
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7864 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/5_05.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/10_10.fa


# reads processed: 7863
# reads with at least one alignment: 0 (0.00%)
# reads that failed to align: 7863 (100.00%)
No alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/10_10.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/7_07.fa


# reads processed: 7865
# reads with at least one alignment: 1 (0.01%)
# reads that failed to align: 7864 (99.99%)
Reported 1 alignments


/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/7_07.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt
already wrote fasta /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/12_12.fa
/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/12_12.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt


# reads processed: 7858
# reads with at least one alignment: 0 (0.00%)
# reads that failed to align: 7858 (100.00%)
No alignments


In [29]:
%%bash
wc -l /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt

12 /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230630/99L8BQ_genbank_files/library_bowtie_alignments.txt


In [32]:
crw.write(config, cfn)