# align plasmid sequencing with target library. 
20230703
sarahfong

input 

    - plasmid.fa file from yelena
    - library .fa file
    
my plan

    - make a bowtie index file for library
    - get plasmid sequencing genbank files 

In [1]:
from Bio import SeqIO
from Bio.Align import PairwiseAligner
import config_readwrite as crw
import glob
import os
import sys
import pandas as pd

In [2]:
config_tag = "config"
config, cfn = crw.read_config(os.path.join(os.path.dirname(os.getcwd()), config_tag))

# write config

## colony sequencing data

In [5]:
section = "col_seq"
crw.check(config, section)

PATH = "/wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230710/"
FILE_FA = os.path.join(PATH, "QC_071023.fa")
BOWTIE_OUT = os.path.join(PATH, "library_bowtie_alignments.txt")

config[section]["path"] = PATH
config[section]["path_fa"] = FILE_FA
config[section]["lib_alignment"] = BOWTIE_OUT

## bowtie paths

In [6]:
section = "bowtie"
crw.check(config, section)

PATH_BOWTIE = "/wynton/home/ahituv/fongsl/bin/bowtie"
BIN_BOWTIE = os.path.join(PATH_BOWTIE, "bowtie")

config[section]["path"] = PATH_BOWTIE
config[section]["bin"] = BIN_BOWTIE

## target library

In [7]:
section = "common.15mer.2mut"

#read
TARGET = config[section]["library_twist"]

#write
TARGET_FA = os.path.splitext(TARGET)[0]+".fa"

config[section]["library_twist_FA"] = TARGET_FA

# target file .fa

In [8]:
def fastaWriter(outfile, df):
    """
    input
        outfile (str) - path to outfile
        df (pd dataframe) - dataframe with one col w/ seq_id name, another col sequence. 
        
    method
    
        1. open outfile, if it is not already written
        2. iterate through the rows of the dataframe
        3. write seq_id, seq as a fasta file
    """
    
    #1 write fa
    if os.path.exists(outfile) is False:
        with open(outfile, "w") as writer:

            #2 iterrate through rows
            for row in df.iterrows():
                seq_id, seq = row[1][0], row[1][1]
                
                #3
                writer.write(f">{seq_id}\n{seq}\n")

        writer.close()
        print("wrote fasta", outfile)
    else:
        print("already wrote fasta", outfile)

## turn target TSV into fa

In [9]:
# read tsv
df = pd.read_csv(TARGET, sep='\t')
fastaWriter(TARGET_FA, df)

already wrote fasta /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.fa


## index fa w/ bowtie

In [18]:
TARGET_INDEX = os.path.splitext(TARGET_FA)[0] + ".index"

cmd = [BIN_BOWTIE + "-build", 
      TARGET_FA, 
      TARGET_INDEX
      ]


if len(glob.glob(TARGET_INDEX + "*")) == 0:
    print('make index\n'," ".join(cmd))
    os.system(" ".join(cmd))
else:
    print("already indexed target file for bowtie")



already indexed target file for bowtie


In [11]:
def runBowtie(bin_bowtie, target_index, input_fa, outfile):

    # command to run bowtie with the target_index
    cmd = [
        bin_bowtie,
        "-x",
        target_index,
        "-f",
        input_fa,
        "-n 3", 
        ">>",
        outfile
    ]

    # run the command
    os.system(" ".join(cmd))
    
    print(" ".join(cmd))

## run bowtie alignment

In [12]:
# run bowtie alignment
runBowtie(BIN_BOWTIE, TARGET_INDEX, FILE_FA, BOWTIE_OUT)

/wynton/home/ahituv/fongsl/bin/bowtie/bowtie -x /wynton/home/ahituv/fongsl/nullomers/results/lock/library_design/15mer.fo.pam.scaffold.ext200.library.TWIST.index -f /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230710/QC_071023.fa -n 3 >> /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230710/library_bowtie_alignments.txt


# reads processed: 16
# reads with at least one alignment: 16 (100.00%)
# reads that failed to align: 0 (0.00%)
Reported 16 alignments


In [13]:
%%bash
wc -l /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230710/library_bowtie_alignments.txt

16 /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230710/library_bowtie_alignments.txt


In [15]:
%%bash
cat /wynton/home/ahituv/fongsl/nullomers/data/lock/colony_seq_16_20230710/library_bowtie_alignments.txt

SF_1	+	15-firstorder.1628184|K562_peak26939|active	15	AGCGCCCTGACAAAACAGGCCACTCGGCTCTACCAATCAGCAAGATGTGGGTGGGGCCAGATAAGAGAATAAAAGCAGGCTGCCCGAGCTTGCATTAGTAACTCGCTCGGGTCCCCTTCCGGATTGTGGAAACTTTGTTCTTTCGCTGTATGTAATAACTCTTGCTACTGTAGCTCGCTTTTTGGGCTCGCGATTTACGG	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	0	
SF_2	+	15-firstorder.312772|WTC11_seq8098_F|inactive	15	CAGGCCAATAATTTTCCAAGTATTCAAACAAACCCATTAACCCACAATCTTTTGTTTTCCCTGGAAACCTCCTGCTGTGGGCCTCTCCCCATAGCTCGTTTGATCGGCTTGCTCCTGATGTTGCTGCACAGCTATTGTTCTGGGACTTCCCACTCTCTAGCCCCAGTGTTCGAAATCCTTGGACTTAAATCCCAGGGTGT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	0	
SF_3	+	15-firstorder.394438|K562_peak18899|active	15	TCAGGAGTGAAGCTGCAGACCTTCGTG

In [14]:
crw.write(config, cfn)