## How to find the complements of  nucleotide sequences ##

- Procedure
   - obtain your sequence(s)
   - create a dictionary object to store each of the four bases and their respective complements
   - use loops to iterate over the bases in the sequence, find their complements and combine them.

#### Working with a single sequence

In [1]:
#obtain your sequence

DNA='ATCTAGAGGATATAC'

In [2]:
print(DNA)

ATCTAGAGGATATAC


In [3]:
#length of sequence
len(DNA)

15

In [4]:
#create a dictionary to store the bases and their respective complements

base_complements={'A':'T','T':'A','C':'G','G':'C'}

In [5]:
print(base_complements)

{'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}


In [6]:
#what is the complement of A
base_complements['A']

'T'

In [7]:
#what is the complement of C
base_complements['C']

'G'

#### Find the complement  of the sequence  "ATCTAGAGGATATAC"

In [8]:
#APPROACH 1:  USE THE TRADITIONAL 'FOR' LOOP

In [9]:
#create an empty string
sequence_complement=''

for base in DNA:
    base_complement=base_complements[base]
    sequence_complement+=base_complement


In [10]:
print(sequence_complement)

TAGATCTCCTATATG


In [11]:
print('original sequence:  ',DNA)
print('sequence complement:',sequence_complement)

original sequence:   ATCTAGAGGATATAC
sequence complement: TAGATCTCCTATATG


In [12]:
#APPROACH 2 : USING LIST COMPREHENSIONS

sequence_complement2=[base_complements[base] for base in DNA]

In [13]:
print(sequence_complement2)

['T', 'A', 'G', 'A', 'T', 'C', 'T', 'C', 'C', 'T', 'A', 'T', 'A', 'T', 'G']


In [14]:
#convert the list to a string
sequence_complement2=''.join(sequence_complement2)

In [15]:
print(sequence_complement2)

TAGATCTCCTATATG


In [16]:
print(sequence_complement)
print(sequence_complement2)

TAGATCTCCTATATG
TAGATCTCCTATATG


In [17]:
print('original sequence:',DNA)
print('approach 1:       ',sequence_complement)
print('approach 2:       ',sequence_complement2)

original sequence: ATCTAGAGGATATAC
approach 1:        TAGATCTCCTATATG
approach 2:        TAGATCTCCTATATG


### Find the complements of multiple nucleotide sequences

In [None]:
#APPROACH: COMBINE FUNCTIONS AND LOOPS

In [18]:
DNASEQUENCES=['ATCGA','TTAGC','ACCTAG']

In [19]:
len(DNASEQUENCES)

3

In [21]:
# create a function that will find the complement of a nucleotide sequence
# then reuse the function for all your sequences

def sequence_complement_finder(sequence):
    base_complements={'A':'T','T':'A','C':'G','G':'C'}
    sequence_complement=[base_complements[base] for base in sequence]
    sequence_complement=''.join(sequence_complement)
    return sequence_complement


In [22]:
# use a 'for' loop to iterate over the list of sequences and find their respective complements

for sequence in DNASEQUENCES:
    sequence_complement=sequence_complement_finder(sequence)
    print('original sequence:  ',sequence)
    print('sequence complement:',sequence_complement)


original sequence:   ATCGA
sequence complement: TAGCT
original sequence:   TTAGC
sequence complement: AATCG
original sequence:   ACCTAG
sequence complement: TGGATC


In [23]:
complement_sequences=[]
for sequence in DNASEQUENCES:
    sequence_complement=sequence_complement_finder(sequence)
    complement_sequences.append(sequence_complement)

In [24]:
print(complement_sequences)

['TAGCT', 'AATCG', 'TGGATC']


In [25]:
print('original:  ',DNASEQUENCES)
print('complement:',complement_sequences)

original:   ['ATCGA', 'TTAGC', 'ACCTAG']
complement: ['TAGCT', 'AATCG', 'TGGATC']
