# *Lists*

In [1]:
gene_expression = ['gene', 5.16e-08, 0.00138511, 7.33e-08]

In [2]:
# Access the last element of the list
print(gene_expression[3])
print(gene_expression[-1])

# Both are eqaully likely

7.33e-08
7.33e-08


In [3]:
# Changing individual list element
gene_expression[0] = 'Lif'
print(gene_expression)

['Lif', 5.16e-08, 0.00138511, 7.33e-08]


In [4]:
# !Don't Change an element in a string

motif = 'nacgggtca'
motif[0] = 'a'

TypeError: 'str' object does not support item assignment

## *Slicing Lists*

In [5]:
# Can slice a list [it will create a new list]
gene_expression[-3:]

[5.16e-08, 0.00138511, 7.33e-08]

In [6]:
# Copying a list
new_list = gene_expression[:]
print(new_list)

['Lif', 5.16e-08, 0.00138511, 7.33e-08]


In [7]:
# Clearing a list
new_list[:] = []
print(new_list)

[]


In [8]:
# Concatenation
gene_expression += [5.16e-08, 0.011123]
gene_expression

['Lif', 5.16e-08, 0.00138511, 7.33e-08, 5.16e-08, 0.011123]

In [9]:
# deleting a list or list element/s
del gene_expression[-2]
gene_expression

['Lif', 5.16e-08, 0.00138511, 7.33e-08, 0.011123]

### *List as objects*

In [10]:
gene_expression.extend([0.13,0.1244])
gene_expression

['Lif', 5.16e-08, 0.00138511, 7.33e-08, 0.011123, 0.13, 0.1244]

In [11]:
# Count the number of times an element appears in a list
print(gene_expression.count('Lif'), gene_expression.count('gene'))

1 0


In [16]:
# Reverse all the elements in a list
gene_expression.reverse()
gene_expression
#gene_expression.reverse()

In [17]:
# append a element in list
gene_expression.append('gene')
gene_expression

['Lif', 5.16e-08, 0.00138511, 7.33e-08, 0.011123, 0.13, 0.1244, 'gene']

#####  ```Append()``` is for element wise including, ```Extends()``` is for many element wise inlcluding case.

In [19]:
# Removing last element
last_elem = gene_expression.pop()
last_elem

'gene'

### *Sorting a list*

In [22]:
mylist = [2,31,4,45]
sorted(mylist)

[2, 4, 31, 45]

In [23]:
mylist
# actual variable hasn't changed. In that case we can use sort()

[2, 31, 4, 45]

In [25]:
mylist.sort()
mylist

[2, 4, 31, 45]

In [28]:
dna = ['A','t', 'c', 'g','a', 'T']

In [29]:
sorted(dna)

['A', 'T', 'a', 'c', 'g', 't']

In [31]:
dna.sort()
dna

['A', 'T', 'a', 'c', 'g', 't']

# *Tuple*
### A tuple consists of a number of values separated by commas, and is another standard sequence data type, like strings and lists.

In [33]:
t = 1,2,3
print(t)
t_ = (1,2,3)
print(t_)
# both are same

(1, 2, 3)
(1, 2, 3)


# *Sets*
#### A set is an unordered collection with no duplicate elements. Set Objects support mathematical operations like union, intersections and difference.

In [43]:
brca1 = {'DNA Repair','zinc ion binding','DNA building','ubiquitin-protein transferase activity',
         'DNA Repair', 'protein ubiquitination'}
# all duplicates will be removed
brca1

{'DNA Repair',
 'DNA building',
 'protein ubiquitination',
 'ubiquitin-protein transferase activity',
 'zinc ion binding'}

In [44]:
brca2 = {'protein binding', 'H4 histone acetyltransferase activity', 'nucleoplasm', 'DNA Repair'
        , 'double-strand break repair', 'double-strand break repair via homologous recombination'}


### Operations with Sets

In [45]:
brca1 | brca2
# | operator will return union

{'DNA Repair',
 'DNA building',
 'H4 histone acetyltransferase activity',
 'double-strand break repair',
 'double-strand break repair via homologous recombination',
 'nucleoplasm',
 'protein binding',
 'protein ubiquitination',
 'ubiquitin-protein transferase activity',
 'zinc ion binding'}

- ```a | b``` ~ ```UNION```
- ```a & n ``` ~ ```INTERSECTION```
- ```a - b ``` ~ ```DIFFERENCE```

In [46]:
brca1 & brca2

{'DNA Repair'}

In [47]:
brca1 - brca2

{'DNA building',
 'protein ubiquitination',
 'ubiquitin-protein transferase activity',
 'zinc ion binding'}

# *Dictionary*
#### *A dictionary is an unordered set of key and value pairs, with the requirements that the keys are unique(within one dictionary)*

```keys```: can be any immutable type e.g strings, numbers<br>
```values```: can be any type

In [55]:
TF_motif = {
    'SP1' : 'gggcgg',
    'C/EBP': 'attgcgcaat',
    'ATF': 'tgacgtca',
    'c-Myc': 'cacgtg',
    'oct-1': 'atgcaaat'
}

In [56]:
TF_motif

{'SP1': 'gggcgg',
 'C/EBP': 'attgcgcaat',
 'ATF': 'tgacgtca',
 'c-Myc': 'cacgtg',
 'oct-1': 'atgcaaat'}

In [57]:
TF_motif['ATF']

'tgacgtca'

In [58]:
# searching a key that is not present
TF_motif['ag']

KeyError: 'ag'

In [60]:
'ag' in TF_motif
## it will in binary format

False

In [61]:
TF_motif['AP-1'] = 'tgagtca'
TF_motif

{'SP1': 'gggcgg',
 'C/EBP': 'attgcgcaat',
 'ATF': 'tgacgtca',
 'c-Myc': 'cacgtg',
 'oct-1': 'atgcaaat',
 'AP-1': 'tgagtca'}

In [62]:
# modifying value in a key
TF_motif['AP-1'] = 'tgagtca/cgat'
TF_motif

{'SP1': 'gggcgg',
 'C/EBP': 'attgcgcaat',
 'ATF': 'tgacgtca',
 'c-Myc': 'cacgtg',
 'oct-1': 'atgcaaat',
 'AP-1': 'tgagtca/cgat'}

In [63]:
# deleting a key-value pair
del TF_motif['SP1']
TF_motif

{'C/EBP': 'attgcgcaat',
 'ATF': 'tgacgtca',
 'c-Myc': 'cacgtg',
 'oct-1': 'atgcaaat',
 'AP-1': 'tgagtca/cgat'}

In [64]:
# Add nother dictionary to the current one
up_dict = {'SP1': 'gggcgg',
          'C/EBP' : 'attgcgcaat',
           'Oct-1' : 'atgcaaa'
          }

TF_motif.update(up_dict)

In [65]:
TF_motif

{'C/EBP': 'attgcgcaat',
 'ATF': 'tgacgtca',
 'c-Myc': 'cacgtg',
 'oct-1': 'atgcaaat',
 'AP-1': 'tgagtca/cgat',
 'SP1': 'gggcgg',
 'Oct-1': 'atgcaaa'}

In [66]:
# len of a dict, key-value pair
len(TF_motif)

7

In [67]:
# getting all keys
TF_motif.keys()

dict_keys(['C/EBP', 'ATF', 'c-Myc', 'oct-1', 'AP-1', 'SP1', 'Oct-1'])

In [68]:
# getting all values
TF_motif.values()

dict_values(['attgcgcaat', 'tgacgtca', 'cacgtg', 'atgcaaat', 'tgagtca/cgat', 'gggcgg', 'atgcaaa'])

In [70]:
list(TF_motif.values())

['attgcgcaat',
 'tgacgtca',
 'cacgtg',
 'atgcaaat',
 'tgagtca/cgat',
 'gggcgg',
 'atgcaaa']

In [71]:
sorted(TF_motif.keys())

['AP-1', 'ATF', 'C/EBP', 'Oct-1', 'SP1', 'c-Myc', 'oct-1']

In [72]:
sorted(TF_motif.values())

['atgcaaa',
 'atgcaaat',
 'attgcgcaat',
 'cacgtg',
 'gggcgg',
 'tgacgtca',
 'tgagtca/cgat']

# *Ifs and Loops*

In [73]:
dna = input('Enter DNA sequence:' )

Enter DNA sequence:agcgcgggtatatatgcnccann


In [74]:
if 'n' in dna:
    nbases = dna.count('n')
    print('dna sequence has %d undefined bases ' % nbases)

dna sequence has 3 undefined bases 


In [75]:
'a' == 'A'

False

In [76]:
'GT' != 'AG'

True

In [78]:
'A' < 'C'

True

```Membership Operators``` - ```in, not in```

In [81]:
motif = 'gtccc'
dna = 'atatattgtcccattt'

motif in dna

True

```Identity Operators``` - ```is , is not``` ~ True if the variables on either side of the operator point to the same object and false otherwise and is not is opposite.

In [84]:
alphabet = ['a', 'c','g', 't']
new_alphabet = alphabet[:]

In [85]:
alphabet == new_alphabet

True

In [87]:
alphabet is new_alphabet

# it is because both are carried in different memory location although their elements are same

False

#### *Logical Operator*
```and``` ~ True if both condition are true <br>
```or``` ~ True if at least one condition is true <br>
```not``` ~ True if condition is false

### *Loop*
```for``` ```while```

In [90]:
dna = input('Enter DNA sequence: ')
pos = dna.find('gt',0) # position of donor splice site

while pos > -1:
    print('Donor splice site candidate at position %d' %pos)
    pos = dna.find('gt', pos+1)

Enter DNA sequence: gtcggatcgttaacgttaacccccgggtttaacccttgg
Donor splice site candidate at position 0
Donor splice site candidate at position 8
Donor splice site candidate at position 14
Donor splice site candidate at position 26


### The for Loop
##### Python's for loop iterates over the items of any sequence (a list or a string), in the order that they appear in the sequence.

In [92]:
motifs = ['attcccgt', 'agggggtttttcg', 'gtagc']

for m in motifs:
    print(m, len(m))

attcccgt 8
agggggtttttcg 13
gtagc 5


In [93]:
for i in range(4):
    print(i)

0
1
2
3


In [94]:
for i in range(100,200,13):
    print(i)

100
113
126
139
152
165
178
191


In [95]:
protein = 'SDVIHRYKUUPAKSHGWYVCJRSRFTWMVWWRFRSCRA'
for i in range(len(protein)):
    if protein[i] not in 'ABCDEFGHJKLMNPQRSTVWXYZ':
        print('protein contains invalid amino acid %s at position %d' %(protein[i],i))

protein contains invalid amino acid I at position 3
protein contains invalid amino acid U at position 8
protein contains invalid amino acid U at position 9


In [96]:
protein = 'SDVIHRYKUUPAKSHGWYVCJRSRFTWMVWWRFRSCRA'
for i in range(len(protein)):
    if protein[i] not in 'ABCDEFGHJKLMNPQRSTVWXYZ':
        print('protein contains invalid amino acid %s at position %d' %(protein[i],i))
        break
# break where is false

protein contains invalid amino acid I at position 3


In [97]:
protein = 'SDVIHRYKUUPAKSHGWYVCJRSRFTWMVWWRFRSCRA'
corrected_protein = []
for i in range(len(protein)):
    if protein[i] not in 'ABCDEFGHJKLMNPQRSTVWXYZ':
        continue
    corrected_protein += protein[i]
    
print(corrected_protein)
        

['S', 'D', 'V', 'H', 'R', 'Y', 'K', 'P', 'A', 'K', 'S', 'H', 'G', 'W', 'Y', 'V', 'C', 'J', 'R', 'S', 'R', 'F', 'T', 'W', 'M', 'V', 'W', 'W', 'R', 'F', 'R', 'S', 'C', 'R', 'A']


# *Functions*