In [33]:
import re, doctest

In [None]:
def validate1(s):
    """
    Checks whether the string is a valid employee ID using a single regular expression.
    An employee ID is valid if and only if it consists
    only of 6-10 alphabetic characters (letters), followed by 2 numeric digits.

    (Assumes s is a string without any non-ASCII characters.
    Otherwise, does not make any assumptions about the string.)

    The lines below give example inputs and correct outputs using doctest notation,
    and can be run to test the code. Passing these tests is NOT sufficient
    to guarantee your implementation is correct. You may add additional test cases.

    >>> validate1('AbCdEf00')
    True
    >>> validate1('$0RQLpCHz49')
    False
    """
    EMPLOYEE_RE = r'^[a-zA-Z]{6,10}\d{2}$' #string anchor, matches 6 to 10 alphabetic characters and 2 numeric digits.
    if re.search(EMPLOYEE_RE, s):
        return True
    return False

In [35]:
validate1('AbCdEf00')

True

In [36]:
validate1('$0RQLpCHz49')

False

In [37]:
validate1('abdfesas01')
validate1('aCDFRa00')

True

In [None]:
def validate2(s):
    """
    >>> validate2('AbCdEf00')
    True
    >>> validate2('$0RQLpCHz49')
    False
    """
    if len(s) < 8 or len(s) > 12: # total length must be between 8 and 12.
        return False
    if not all(c.isalpha() for c in s[:len(s)-2]):  
        return False
    if not all(c.isdigit() for c in s[len(s)-2:]):  
        return False
    return True

In [39]:
validate2('aCDFRa00')

True

In [40]:
validate2('$0RQLpCHz49')

False

In [41]:
validate2('av12')  #total length <8, so we suppose to output FALSE

False

In [None]:
def dna_prob(seq):
    """
    Given a sequence of the DNA bases {A, C, G, T},
    stored as a string, returns a conditional probability table
    in a data structure such that one base (b1) can be looked up,
    and then a second (b2), to get the probability p(b2 | b1)
    of the second base occurring immediately after the first.
    (Assumes the length of seq is >= 3, and that the probability of
    any b1 and b2 which have never been seen together is 0.
    Ignores the probability that b1 will be followed by the
    end of the string.)

    >>> tbl = dna_prob('ATCGATTGAGCTCTAGCG')
    >>> tbl['T']['T']
    0.2
    >>> tbl['G']['A']
    0.5
    >>> tbl['C']['G']
    0.5
    """
    counts = {}
    for i in range(len(seq) - 1):
        b1, b2 = seq[i], seq[i+1]
        if b1 not in counts:
            counts[b1] = {}
        if b2 not in counts[b1]:
            counts[b1][b2] = 0
        counts[b1][b2] += 1

    tbl = {}
    for b1 in counts:
        total = sum(counts[b1].values())
        tbl[b1] = {b2: counts[b1][b2] / total for b2 in counts[b1]}

    return tbl

tbl = dna_prob('AATCGGCTTA')
tbl['T']['T']
tbl['T']['A']

0.3333333333333333

In [None]:
def dna_bp(seq):
    """
    Given a string representing a sequence of DNA bases,
    returns the paired sequence, also as a string,
    where A is always paired with T and C with G.

    >>> dna_bp('ATCGATTGAGCTCTAGCG')
    'TAGCTAACTCGAGATCGC'
    """
    pairs = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    return ''.join(pairs[b] for b in seq)

In [52]:
dna_bp('ATCGATTGAGCTCTAGCG')

'TAGCTAACTCGAGATCGC'

In [53]:
dna_bp('ATCGCCTTAA')

'TAGCGGAATT'

In [None]:
if __name__=='__main__':
    doctest.testmod() # This runs the doctests and prints any failures.