# Protein Synthesis

In [58]:
pip install biopython



In [59]:
import Bio

In [60]:
from Bio.Seq import Seq

In [61]:
dna_seq = Seq('ATGATCTGTAA')

In [62]:
len(dna_seq)

11

In [63]:
dna_seq

Seq('ATGATCTGTAA')

Seq('ATGATCTGTAA')

In [64]:
# Calculate DNA sequence complement
# Be mind that AT=2 hydrogen bonds GC=3 hydrogen bonds

dna_seq.complement()
Seq('TACTAGAGCATT')

Seq('TACTAGAGCATT')

In [65]:
# Calculate DNA sequence reverse complement
dna_seq.reverse_complement()

Seq('TTACGAGATCAT')

Seq('TTACGAGATCAT')

In [66]:
# Custom function to calculate DNA sequence complement
dna_seq.complement()[::-1]

Seq('TTACAGATCAT')

# Transcription & translation

In [67]:
dna_seq

Seq('ATGATCTGTAA')

In [68]:
# Transcription DNA to mRNA (writing the message)
dna_seq.transcribe()

Seq('AUGAUCUGUAA')

In [69]:
mRNA = dna_seq.transcribe()

In [70]:
# Translation : mRNA to protein/amino acid
# Method 1
mRNA.translate()

Seq('MIS*')



Seq('MIS*')

In [71]:
# Method 2
# Direct translation of DNA to amino acid
dna_seq.translate()

Seq('MIC')

In [72]:
# Create our custom stop codon symbol
mRNA.translate(stop_symbol = "@")

Seq('MIS@')

Seq('MIS@')

In [73]:
# Back transcription : mRNA to DNA
mRNA.back_transcribe()

Seq('ATGATCTGTAA')

In [74]:
mRNA.back_transcribe() == dna_seq

True

In [75]:
# Join the steps
dna_seq.transcribe().translate()

Seq('MIC')

In [76]:
# Convert amino acid to 3 letter codon
from Bio.SeqUtils import seq3, seq1

In [77]:
aa = dna_seq.translate()
aa

Seq('MIC')

In [78]:
seq3(aa)

'MetIleCys'

In [79]:
# Convert from 3 letter to 1 letter
seq1('MetIleSerTer')

'MIS*'

# Amino Acids Codon Table

In [80]:
# View our codon table
from Bio.Data import CodonTable

In [81]:
# check methods & attributes
dir(CodonTable)

['AmbiguousCodonTable',
 'AmbiguousForwardTable',
 'CodonTable',
 'Dict',
 'IUPACData',
 'List',
 'NCBICodonTable',
 'NCBICodonTableDNA',
 'NCBICodonTableRNA',
 'Optional',
 'TranslationError',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'ambiguous_dna_by_id',
 'ambiguous_dna_by_name',
 'ambiguous_generic_by_id',
 'ambiguous_generic_by_name',
 'ambiguous_rna_by_id',
 'ambiguous_rna_by_name',
 'generic_by_id',
 'generic_by_name',
 'list_ambiguous_codons',
 'list_possible_proteins',
 'make_back_table',
 'register_ncbi_table',
 'standard_dna_table',
 'standard_rna_table',
 'unambiguous_dna_by_id',
 'unambiguous_dna_by_name',
 'unambiguous_rna_by_id',
 'unambiguous_rna_by_name']

In [82]:
# DNA codon table
print(CodonTable.unambiguous_dna_by_name['Standard'])

Table 1 Standard, SGC0

  |  T      |  C      |  A      |  G      |
--+---------+---------+---------+---------+--
T | TTT F   | TCT S   | TAT Y   | TGT C   | T
T | TTC F   | TCC S   | TAC Y   | TGC C   | C
T | TTA L   | TCA S   | TAA Stop| TGA Stop| A
T | TTG L(s)| TCG S   | TAG Stop| TGG W   | G
--+---------+---------+---------+---------+--
C | CTT L   | CCT P   | CAT H   | CGT R   | T
C | CTC L   | CCC P   | CAC H   | CGC R   | C
C | CTA L   | CCA P   | CAA Q   | CGA R   | A
C | CTG L(s)| CCG P   | CAG Q   | CGG R   | G
--+---------+---------+---------+---------+--
A | ATT I   | ACT T   | AAT N   | AGT S   | T
A | ATC I   | ACC T   | AAC N   | AGC S   | C
A | ATA I   | ACA T   | AAA K   | AGA R   | A
A | ATG M(s)| ACG T   | AAG K   | AGG R   | G
--+---------+---------+---------+---------+--
G | GTT V   | GCT A   | GAT D   | GGT G   | T
G | GTC V   | GCC A   | GAC D   | GGC G   | C
G | GTA V   | GCA A   | GAA E   | GGA G   | A
G | GTG V   | GCG A   | GAG E   | GGG G   | G
--+---------

In [83]:
0 # RNA codon table
print(CodonTable.unambiguous_rna_by_name['Standard'])

Table 1 Standard, SGC0

  |  U      |  C      |  A      |  G      |
--+---------+---------+---------+---------+--
U | UUU F   | UCU S   | UAU Y   | UGU C   | U
U | UUC F   | UCC S   | UAC Y   | UGC C   | C
U | UUA L   | UCA S   | UAA Stop| UGA Stop| A
U | UUG L(s)| UCG S   | UAG Stop| UGG W   | G
--+---------+---------+---------+---------+--
C | CUU L   | CCU P   | CAU H   | CGU R   | U
C | CUC L   | CCC P   | CAC H   | CGC R   | C
C | CUA L   | CCA P   | CAA Q   | CGA R   | A
C | CUG L(s)| CCG P   | CAG Q   | CGG R   | G
--+---------+---------+---------+---------+--
A | AUU I   | ACU T   | AAU N   | AGU S   | U
A | AUC I   | ACC T   | AAC N   | AGC S   | C
A | AUA I   | ACA T   | AAA K   | AGA R   | A
A | AUG M(s)| ACG T   | AAG K   | AGG R   | G
--+---------+---------+---------+---------+--
G | GUU V   | GCU A   | GAU D   | GGU G   | U
G | GUC V   | GCC A   | GAC D   | GGC G   | C
G | GUA V   | GCA A   | GAA E   | GGA G   | A
G | GUG V   | GCG A   | GAG E   | GGG G   | G
--+---------

# 3D Structure of protein

+ File Format
 - pdb :PDBParser() legacy
 - cif :MMCIFParser() recent

+ links
 - https://www.ncbi.nlm.nih.gov/Structure/pdb/6LU7
 - https://www.rcsb.org/search
 - Protein Data Bank

+ Packages
 - pip install nglview
 - jupyter-nbextension enable nglview --py --sys-prefix
 - nglview enable
 - jupyter-labextension install @jupyter-
    widget/jupyterlab-manager
 - jupyter-labextension install nglview-js-widgets


In [90]:
import os
from Bio.PDB.PDBParser import PDBParser

parser = PDBParser()
pdb_file = os.path.abspath("6lu7.pdb")

In [91]:
structure = parser.get_structure("6LU7", pdb_file)



In [92]:
structure

<Structure id=6LU7>

In [93]:
model = structure [0]

In [95]:
for chain in model:
  print(f'Chain{chain},Chain_ID{chain.id}')

Chain<Chain id=A>,Chain_IDA
Chain<Chain id=C>,Chain_IDC


In [97]:
# Check residue
for chain in model:
  print(f'Chain{chain},Chain_ID{chain.id}')
  for residue in chain:
    print(residue)

Chain<Chain id=A>,Chain_IDA
<Residue SER het=  resseq=1 icode= >
<Residue GLY het=  resseq=2 icode= >
<Residue PHE het=  resseq=3 icode= >
<Residue ARG het=  resseq=4 icode= >
<Residue LYS het=  resseq=5 icode= >
<Residue MET het=  resseq=6 icode= >
<Residue ALA het=  resseq=7 icode= >
<Residue PHE het=  resseq=8 icode= >
<Residue PRO het=  resseq=9 icode= >
<Residue SER het=  resseq=10 icode= >
<Residue GLY het=  resseq=11 icode= >
<Residue LYS het=  resseq=12 icode= >
<Residue VAL het=  resseq=13 icode= >
<Residue GLU het=  resseq=14 icode= >
<Residue GLY het=  resseq=15 icode= >
<Residue CYS het=  resseq=16 icode= >
<Residue MET het=  resseq=17 icode= >
<Residue VAL het=  resseq=18 icode= >
<Residue GLN het=  resseq=19 icode= >
<Residue VAL het=  resseq=20 icode= >
<Residue THR het=  resseq=21 icode= >
<Residue CYS het=  resseq=22 icode= >
<Residue GLY het=  resseq=23 icode= >
<Residue THR het=  resseq=24 icode= >
<Residue THR het=  resseq=25 icode= >
<Residue THR het=  resseq=26 ic

In [98]:
!pip install py3dmol

Collecting py3dmol
  Downloading py3Dmol-2.1.0-py2.py3-none-any.whl (12 kB)
Installing collected packages: py3dmol
Successfully installed py3dmol-2.1.0


In [99]:
!pip install --upgrade pip

Collecting pip
  Downloading pip-24.0-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-24.0


In [100]:
import py3Dmol
view1 = py3Dmol.view(query='pdb:6LU7')
view1.setStyle({'cartoon':{'color':'spectrum'}})

<py3Dmol.view at 0x7cc126b0ff40>

In [101]:
view2 = py3Dmol.view(query='pdb:4ZS6')
view2.setStyle({'cartoon':{'color':'spectrum'}})

<py3Dmol.view at 0x7cc126b0f610>