In [1]:
from Bio import SeqIO, Restriction as RE

In [2]:
# feed all SNP sequences into a dict
d = {}
with open('data/larson/cjfas-2013-0502suppli.fa', "rU") as f:
    for record in SeqIO.parse(f, "fasta"):
        d[record.id] = record.seq

In [3]:
def visualize(seq, enz):
    digested_seqs = enz.catalyze(seq)
    
    print 'Cut site: %s' % enz.site
    print 'Resulting Segments: %d' % len(digested_seqs)
    print '------------------------'
    print 'Before:'
    print str(seq) + '\n'
    print 'After:'
    
    # print out the cuts on separate lines
    cut_index = 0
    for seg in digested_seqs:
        if cut_index == 0:
            print str(seg) + '/'
            cut_index += len(seg)
            continue

        print ''.join([' ']*(cut_index-1)) + '/' + str(seg) + '/'
        cut_index += len(seg)

In [4]:
def compare(snp_id, seqs, enz):
    # extract the two alleles of the SNP
    ID = 'snp:ID:{snp};allele_{a}'
    snp = []
    snp.append(seqs[ID.format(snp=snp_id,a=1)])
    snp.append(seqs[ID.format(snp=snp_id,a=2)])
    
    # identify where in the sequence the SNP occurs
    i = [x != y for (x, y) in zip(str(snp[0]), str(snp[1]))].index(True)
    print 'Cutting SNP: `%s`' % ID.format(snp=snp_id, a=1)
    print 'Visualizing the two alleles'
    print '%s (Allele 1)' % str(snp[0])
    print '%s (Allele 2)' % str(snp[1])
    print ''.join([' ']*i) + '^'
    
    for i in xrange(2):
        print 'Cutting Allele %s' % str(i+1)
        visualize(snp[i], enz)
        if i==0:
            print '- - - - - - - - - - - - - - - - - - - - - - - - - '
    
    # helpful link:
    # http://www.bioinformatics.org/sms/iupac.html

In [5]:
compare(1372, d, RE.EcoT22I)

Cutting SNP: `snp:ID:1372;allele_1`
Visualizing the two alleles
TGCAGGGGACACAAATGTACATGGTGTAACCAGTTTCTATTCTTTGTTTCTTTGGTGTAGCAAGCAAGCATTTTGGGCTTGAAAACACCCATAC (Allele 1)
TGCAGGGGACACAAATGTACATGGTGTAACCAGTTTCTATTCTTTGTTTCTTTGGTGTAGCAAGCATGCATTTTGGGCTTGAAAACACCCATAC (Allele 2)
                                                                  ^
Cutting Allele 1
Cut site: ATGCAT
Resulting Segments: 1
------------------------
Before:
TGCAGGGGACACAAATGTACATGGTGTAACCAGTTTCTATTCTTTGTTTCTTTGGTGTAGCAAGCAAGCATTTTGGGCTTGAAAACACCCATAC

After:
TGCAGGGGACACAAATGTACATGGTGTAACCAGTTTCTATTCTTTGTTTCTTTGGTGTAGCAAGCAAGCATTTTGGGCTTGAAAACACCCATAC/
- - - - - - - - - - - - - - - - - - - - - - - - - 
Cutting Allele 2
Cut site: ATGCAT
Resulting Segments: 2
------------------------
Before:
TGCAGGGGACACAAATGTACATGGTGTAACCAGTTTCTATTCTTTGTTTCTTTGGTGTAGCAAGCATGCATTTTGGGCTTGAAAACACCCATAC

After:
TGCAGGGGACACAAATGTACATGGTGTAACCAGTTTCTATTCTTTGTTTCTTTGGTGTAGCAAGCATGCA/
                                                     

In [6]:
compare(3769, d, RE.BaeI)

Cutting SNP: `snp:ID:3769;allele_1`
Visualizing the two alleles
TGCAGGATTGTCTGACACTAGTCACCCGGACAACTGATGAACCTGAGTTTGCACAACCGAAGAATTTCTGCACAAACTGTCAAACAGTCTCAGG (Allele 1)
TGCAGGATTGTCTGACACTAGTCACCCGGACAACTGATGTACCTGAGTTTGCACAACCGAAGAATTTCTGCACAAACTGTCAAACAGTCTCAGG (Allele 2)
                                       ^
Cutting Allele 1
Cut site: ACNNNNGTAYC
Resulting Segments: 1
------------------------
Before:
TGCAGGATTGTCTGACACTAGTCACCCGGACAACTGATGAACCTGAGTTTGCACAACCGAAGAATTTCTGCACAAACTGTCAAACAGTCTCAGG

After:
TGCAGGATTGTCTGACACTAGTCACCCGGACAACTGATGAACCTGAGTTTGCACAACCGAAGAATTTCTGCACAAACTGTCAAACAGTCTCAGG/
- - - - - - - - - - - - - - - - - - - - - - - - - 
Cutting Allele 2
Cut site: ACNNNNGTAYC
Resulting Segments: 3
------------------------
Before:
TGCAGGATTGTCTGACACTAGTCACCCGGACAACTGATGTACCTGAGTTTGCACAACCGAAGAATTTCTGCACAAACTGTCAAACAGTCTCAGG

After:
TGCAGGATTGTCTGACACTAGT/
                     /CACCCGGACAACTGATGTACCTGAGTTTGCACA/
                                                      /ACCGAA

In [7]:
compare(4369, d, RE.AspBHI)

Cutting SNP: `snp:ID:4369;allele_1`
Visualizing the two alleles
TGCAGGCTTCACACGTGTTCACAATGTTAACTCCACCTGTAGTTTCTCACCGCAGGTTTGCTTATATGCAAAAGAGTCACTAAAATATGCACTA (Allele 1)
TGCAGGCTTCACACGTGTTCACAATGTTAACTCCACCTGTAGTTTCTCATCGCAGGTTTGCTTATATGCAAAAGAGTCACTAAAATATGCACTA (Allele 2)
                                                 ^
Cutting Allele 1
Cut site: YSCNS
Resulting Segments: 4
------------------------
Before:
TGCAGGCTTCACACGTGTTCACAATGTTAACTCCACCTGTAGTTTCTCACCGCAGGTTTGCTTATATGCAAAAGAGTCACTAAAATATGCACTA

After:
TGCAGGCTTCACA/
            /CGTGTTCACAATGTTAACTCCACC/
                                    /TGTAGTT/
                                           /TCTCACCGCAGGTTTGCTTATATGCAAAAGAGTCACTAAAATATGCACTA/
- - - - - - - - - - - - - - - - - - - - - - - - - 
Cutting Allele 2
Cut site: YSCNS
Resulting Segments: 4
------------------------
Before:
TGCAGGCTTCACACGTGTTCACAATGTTAACTCCACCTGTAGTTTCTCATCGCAGGTTTGCTTATATGCAAAAGAGTCACTAAAATATGCACTA

After:
TGCAGGCTTCACA/
            /CGTGTTCACAATGTTA

In [8]:
compare(2687, d, RE.TstI)

Cutting SNP: `snp:ID:2687;allele_1`
Visualizing the two alleles
TGCAGGTGTAGAAGCCGGATGTGGAGGTCCTAGGAGGTAACGTGGTCTGCGGTTGTGATAAGAGAATTATGTTCTCCAGGTACATAACCCAATT (Allele 1)
TGCAGGTGTAGAAGCCGGATGTGGAGGTCCTAGGTGGTAACGTGGTCTGCGGTTGTGATAAGAGAATTATGTTCTCCAGGTACATAACCCAATT (Allele 2)
                                  ^
Cutting Allele 1
Cut site: CACNNNNNNTCC
Resulting Segments: 3
------------------------
Before:
TGCAGGTGTAGAAGCCGGATGTGGAGGTCCTAGGAGGTAACGTGGTCTGCGGTTGTGATAAGAGAATTATGTTCTCCAGGTACATAACCCAATT

After:
TGCAGGTGTAGAAGCCGGATGTGGA/
                        /GGTCCTAGGAGGTAACGTGGTCTGCGGTTGTG/
                                                        /ATAAGAGAATTATGTTCTCCAGGTACATAACCCAATT/
- - - - - - - - - - - - - - - - - - - - - - - - - 
Cutting Allele 2
Cut site: CACNNNNNNTCC
Resulting Segments: 1
------------------------
Before:
TGCAGGTGTAGAAGCCGGATGTGGAGGTCCTAGGTGGTAACGTGGTCTGCGGTTGTGATAAGAGAATTATGTTCTCCAGGTACATAACCCAATT

After:
TGCAGGTGTAGAAGCCGGATGTGGAGGTCCTAGGTGGTAACGTGGTCTGCGGTTGTGAT

In [9]:
compare(4043, d, RE.YkrI)

Cutting SNP: `snp:ID:4043;allele_1`
Visualizing the two alleles
TGCAGGATTCTTTTCCAGCTGCAGCAAACTGGCTCAAATTAAGATTCTACATCGGTGAAACCTGAATCTATGCCCCCCGACGAACCATCAAACA (Allele 1)
TGCAGGATTCTTTTCCAGCTGCAGCAAACTGGCTCAAATTAAGATTCTACATCGGTGAAACCTGAATGTATGCCCCCCGACGAACCATCAAACA (Allele 2)
                                                                   ^
Cutting Allele 1
Cut site: C
Resulting Segments: 37
------------------------
Before:
TGCAGGATTCTTTTCCAGCTGCAGCAAACTGGCTCAAATTAAGATTCTACATCGGTGAAACCTGAATCTATGCCCCCCGACGAACCATCAAACA

After:
TGCAGGAT/
       /TCT/
          /TT/
            /T/
             /CCAGCT/
                   /G/
                    /C/
                     /AGC/
                        /A/
                         /AAC/
                            /TGG/
                               /C/
                                /TC/
                                  /AAAT/
                                      /TAAG/
                                          /A/
                    