### What the info output should look like

In [None]:
:I
 PDB File: 3AYU.pdb
Title: CRYSTAL STRUCTURE OF MMP-2 ACTIVE SITE MUTANT IN COMPLEX WITH APP-DRIVED
DECAPEPTIDE INHIBITOR
CHAINS: A and B
- Chain A
Number of amino acids: 167
Number of helix:           3
Number of sheet:           9
Sequence: YNFFPRKPKWDKNQITYRIIGYTPDLDPETVDDAFARAFQVWSDVTPLRF
SRIHDGEADIMINFGRWEHGDGYPFDGKDGLLAHAFAPGTGVGGDSHFDD
DELWTLGKGVGYSLFLVAAHAFGHAMGLEHSQDPGALMAPIYTYTKNFRL
SQDDIKGIQELYGASPD
- Chain B
Number of amino acids: 10
Number of helix:           0
Number of sheet:           1
Sequence: ISYGNDALMP

### The functions for each task

In [1]:
def print_menu():
    print(
"""
********************************************************************************
* PDB FILE ANALYZER                                                            *
********************************************************************************
* Select an option from below:                                                 *
*                                                                              *
*      1) Open a PDB File                      (O)                             *
*      2) Information                          (I)                             *
*      3) Show histogram of amino acids        (H)                             *
*      4) Display Secondary Structure          (S)                             *
*      5) Export PDB File                      (X)                             *
*      6) Exit                                 (Q)                             *
*                                                                              *
*                                                            Current PDB: None *
********************************************************************************
""")
    option = input(": ")
    return option

In [25]:
def open_file(file):
    """open file and extract lines"""
    with open(file) as myfile:
        lines = myfile.readlines()
    return lines



In [26]:
lines = open_file("3AYU.pdb")

In [27]:
def title_print(lines):
    """print title of the protein"""
    title_string = ""
    for line in lines:
        if line.startswith("TITLE"):
            title_string = title_string + line[9:].strip()
    print("Title : " , title_string)

In [28]:
title_print(lines)

Title :  CRYSTAL STRUCTURE OF MMP-2 ACTIVE SITE MUTANT IN COMPLEX WITH APP-2 DRIVED DECAPEPTIDE INHIBITOR


In [29]:
def sheet_num(lines):
    """counts number of sheets (all chains)"""
    sheet = []
    for line in lines:
        if line.startswith('SHEET'):
            sheet.append(line[0:])
    num = len(sheet)
    print("Number of sheet: ", num)

In [30]:
sheet_num(lines)

Number of sheet:  10


In [31]:
def helix_num(lines):
    """count number of helices(all chains)"""
    helix = []
    for line in lines:
        if line.startswith('HELIX'):
            helix.append(line[0:])
    num = len(helix)
    print("Number of helix: ", num)

In [32]:
helix_num(lines)

Number of helix:  3


In [36]:
def extract_chain_sequences(lines):
    """extract all the sequence residue lines from the file"""
    seq = []
    for line in lines:
        if line.startswith('SEQRES'):
            seq.append(line[0:])
    return seq
all_sequences = extract_chain_sequences(lines)

In [37]:
extract_chain_sequences(lines)

['SEQRES   1 A  167  TYR ASN PHE PHE PRO ARG LYS PRO LYS TRP ASP LYS ASN          \n',
 'SEQRES   2 A  167  GLN ILE THR TYR ARG ILE ILE GLY TYR THR PRO ASP LEU          \n',
 'SEQRES   3 A  167  ASP PRO GLU THR VAL ASP ASP ALA PHE ALA ARG ALA PHE          \n',
 'SEQRES   4 A  167  GLN VAL TRP SER ASP VAL THR PRO LEU ARG PHE SER ARG          \n',
 'SEQRES   5 A  167  ILE HIS ASP GLY GLU ALA ASP ILE MET ILE ASN PHE GLY          \n',
 'SEQRES   6 A  167  ARG TRP GLU HIS GLY ASP GLY TYR PRO PHE ASP GLY LYS          \n',
 'SEQRES   7 A  167  ASP GLY LEU LEU ALA HIS ALA PHE ALA PRO GLY THR GLY          \n',
 'SEQRES   8 A  167  VAL GLY GLY ASP SER HIS PHE ASP ASP ASP GLU LEU TRP          \n',
 'SEQRES   9 A  167  THR LEU GLY LYS GLY VAL GLY TYR SER LEU PHE LEU VAL          \n',
 'SEQRES  10 A  167  ALA ALA HIS ALA PHE GLY HIS ALA MET GLY LEU GLU HIS          \n',
 'SEQRES  11 A  167  SER GLN ASP PRO GLY ALA LEU MET ALA PRO ILE TYR THR          \n',
 'SEQRES  12 A  167  TYR THR LYS ASN PHE AR

In [38]:
def collect_chain_ids(all_sequences):
    """identify chains in protein"""
    chains = []
    for line in all_sequences:
        if line[11] not in chains:
            chains.append(line[11])
    return chains
chains_in_prot = collect_chain_ids(all_sequences)

In [39]:
collect_chain_ids(all_sequences)

['A', 'B']

In [40]:
def print_chains(chains_in_prot):
    """print chains in protein"""
    x = '' .join(chains_in_prot)
    print("- Chains:", x[0], "and", x[1])
    

In [41]:
print_chains(chains_in_prot)

- Chains: A and B


In [42]:
def chain_sequence(all_sequences):
    """general print sequences in all chains"""
    residues = []
    for line in all_sequences:
        one_letter_code = {'GLY':'G', 'ALA':'A', 'VAL':'V', 'CYS':'C', 'PRO':'P', 'LEU':'L', 'ILE':'I',\
                           'MET':'M', 'TRP':'W', 'PHE':'F', 'SER':'S', 'THR':'T', 'TYR':'Y', 'ASN':'N', \
                           'GLN':'Q', 'LYS':'K', 'ARG':'R', 'HIS':'H', 'ASP':'D', 'GLU':'E'}
        residues.extend(line[18:].split()) #splits the string into a list of residues after appending to the list of residues
        chain_seq = '' .join([one_letter_code[i] for i in residues]) #converts the 3 code residues to their corresponding 1 letter denotation
    return chain_seq
sequence = chain_sequence(all_sequences)

In [43]:
chain_sequence(all_sequences)

'YNFFPRKPKWDKNQITYRIIGYTPDLDPETVDDAFARAFQVWSDVTPLRFSRIHDGEADIMINFGRWEHGDGYPFDGKDGLLAHAFAPGTGVGGDSHFDDDELWTLGKGVGYSLFLVAAHAFGHAMGLEHSQDPGALMAPIYTYTKNFRLSQDDIKGIQELYGASPDISYGNDALMP'

In [44]:
def print_aa_num(sequence):
    """print number of amino acids"""
    chain_seq = sequence
    print("Number of amino acids:", len(chain_seq))

In [45]:
print_aa_num(sequence)

Number of amino acids: 177


In [46]:
def print_seq(sequence):
    """print the amino acid sequence"""
    chain_seq = sequence
    print("Sequence:", chain_seq)

In [47]:
print_seq(sequence)

Sequence: YNFFPRKPKWDKNQITYRIIGYTPDLDPETVDDAFARAFQVWSDVTPLRFSRIHDGEADIMINFGRWEHGDGYPFDGKDGLLAHAFAPGTGVGGDSHFDDDELWTLGKGVGYSLFLVAAHAFGHAMGLEHSQDPGALMAPIYTYTKNFRLSQDDIKGIQELYGASPDISYGNDALMP


In [48]:
def caller(all_sequences, chains_in_prot):
    """boss function to call the rest into action, for evry chain in the chain list"""
    title_print(lines)
    for chain in chains_in_prot:
        print("- Chain ", chain)
        chain_sequence(all_sequences)
        helix_num(lines)
        sheet_num(lines)
        print_seq(sequence)     

In [50]:
def main_info():
    """main function"""
    caller(all_sequences, chains_in_prot)
main_info() 

Title :  CRYSTAL STRUCTURE OF MMP-2 ACTIVE SITE MUTANT IN COMPLEX WITH APP-2 DRIVED DECAPEPTIDE INHIBITOR
- Chain  A
Number of helix:  3
Number of sheet:  10
Sequence: YNFFPRKPKWDKNQITYRIIGYTPDLDPETVDDAFARAFQVWSDVTPLRFSRIHDGEADIMINFGRWEHGDGYPFDGKDGLLAHAFAPGTGVGGDSHFDDDELWTLGKGVGYSLFLVAAHAFGHAMGLEHSQDPGALMAPIYTYTKNFRLSQDDIKGIQELYGASPDISYGNDALMP
- Chain  B
Number of helix:  3
Number of sheet:  10
Sequence: YNFFPRKPKWDKNQITYRIIGYTPDLDPETVDDAFARAFQVWSDVTPLRFSRIHDGEADIMINFGRWEHGDGYPFDGKDGLLAHAFAPGTGVGGDSHFDDDELWTLGKGVGYSLFLVAAHAFGHAMGLEHSQDPGALMAPIYTYTKNFRLSQDDIKGIQELYGASPDISYGNDALMP


### Stand alone information summary function

In [54]:
def pdb_info(all_sequences, chains_in_prot):
    title_print(lines)
    for chain in chains_in_prot:
        print("- Chain ", chain)
        
        residues = []
        for line in all_sequences:
            if line[11] == chain:
                one_letter_code = {'GLY':'G', 'ALA':'A', 'VAL':'V', 'CYS':'C', 'PRO':'P', 'LEU':'L',\
                                   'ILE':'I', 'MET':'M', 'TRP':'W', 'PHE':'F', 'SER':'S', 'THR':'T',\
                                   'TYR':'Y', 'ASN':'N', 'GLN':'Q', 'LYS':'K', 'ARG':'R', 'HIS':'H',\
                                   'ASP':'D', 'GLU':'E'}
                residues.extend(line[18:].split()) #splits the string into a list of residues after appending to the list of residues
                chain_seq = '' .join([one_letter_code[i] for i in residues])#converts the 3 code residues to their corresponding 1 letter denotation
                
                helix = []
                for line in lines:
                    if line.startswith('HELIX') and line[19] == chain:
                        helix.append(line[0:])
                numb = len(helix)
                
                sheet = []
                for line in lines:
                    if line.startswith('SHEET') and line[21] == chain:
                        sheet.append(line[0:])
                num = len(sheet)
                
        print("Number of amino acids:", len(chain_seq))
        print("Number of helix: ", numb)
        print("Number of sheet: ", num)
        print("Sequence:", '\n'.join(''.join(chain_seq[i:i+50]) for i in range(0, len(chain_seq), 50)))

In [55]:
def main_info_function():
    """The information summary boss function"""
    pdb_info(all_sequences, chains_in_prot)
    #print_menu()
main_info_function() 

Title :  CRYSTAL STRUCTURE OF MMP-2 ACTIVE SITE MUTANT IN COMPLEX WITH APP-2 DRIVED DECAPEPTIDE INHIBITOR
- Chain  A
Number of amino acids: 167
Number of helix:  3
Number of sheet:  9
Sequence: YNFFPRKPKWDKNQITYRIIGYTPDLDPETVDDAFARAFQVWSDVTPLRF
SRIHDGEADIMINFGRWEHGDGYPFDGKDGLLAHAFAPGTGVGGDSHFDD
DELWTLGKGVGYSLFLVAAHAFGHAMGLEHSQDPGALMAPIYTYTKNFRL
SQDDIKGIQELYGASPD
- Chain  B
Number of amino acids: 10
Number of helix:  0
Number of sheet:  1
Sequence: ISYGNDALMP


### Pending stuff

#### Done