# Option4
## Display Secondary Structure:
For each chain in the loaded pdb, print a representation of the secondary structure using the character ‘/’ to represent an amino acid that is part of a helix, ‘|’ for one that is part of a sheet, and ‘-’ for any other. 

Each line should have a maximum of 80 characters. 

Over the representation, the sequence should be displayed, and under it, a tag indicating the identifier of the substructure should be aligned. 

Execution Example:

In [3]:
my_pdb_file = open ("../data/3AYU.pdb", "r")
chainsInProtein=['A','B']
aminoAcidsInChains2={'A': 'YNFFPRKPKWDKNQITYRIIGYTPDLDPETVDDAFARAFQVWSDVTPLRF\nSRIHDGEADIMINFGRWEHGDGYPFDGKDGLLAHAFAPGTGVGGDSHFDD\nDELWTLGKGVGYSLFLVAAHAFGHAMGLEHSQDPGALMAPIYTYTKNFRL\nSQDDIKGIQELYGASPD',
 'B': 'ISYGNDALMP'}
aaNumber={'A': '167', 'B': '10'}


def multiToSingleLine ( aminoAcidsInChains2, chainsInProtein):
    "This converts the multiline aa sequence earlier generated (by option 2-information) to a single line aa sequence"
    
    AAInChainsSingleLine ={}
    for i in chainsInProtein:
        multiLine=aminoAcidsInChains2.get(i)
        singleLine=multiLine.replace('\n','')
        AAInChainsSingleLine[i]=singleLine       
    return AAInChainsSingleLine




def helixStructure (my_pdb_file, aaNumber, chainsInProtein):
    """ This function creates a dictionary for the chains in the protein and assigns helix symbols at respective 
    postions of the sec structures list"""
    
    secStrListDict={}
    for chain in chainsInProtein:    
        secStrListDict[chain] = ['-']*int(aaNumber[chain])
        helixSymList=[]
        for line in my_pdb_file:
            if line.startswith("HELIX") and line[19] in chainsInProtein:
                helixSymList=["/"]*(int(line[45:].strip()))
                secStrListDict[line[19]][int(line[20:27].strip())-1:int(line[32:37].strip())]=helixSymList
    
    return secStrListDict




def sheetStructure (my_pdb_file, aaNumber, chainsInProtein):
    """ This function creates a dictionary for the chains in the protein and assigns sheet symbols at respective 
    postions of the sec structures list"""
    
    secStrListDictHelices=helixStructure (my_pdb_file, aaNumber, chainsInProtein)
    my_pdb_file.seek(0)
    for chain in chainsInProtein:     
        sheetSymList=[]
        for line in my_pdb_file:
            if line.startswith("SHEET") and line[21] in chainsInProtein:
                sheetSymList=["|"]*((int(line[34:38].strip())-int(line[22:28].strip()))+1)
                secStrListDictHelices[line[21]][int(line[22:28].strip())-1:int(line[34:38].strip())]=sheetSymList
    
        secStrListDictHelices[chain]=''.join(secStrListDictHelices[chain])
    return secStrListDictHelices




def helixTag (chainsInProtein, my_pdb_file, aaNumber):
    "Give a suitable tag at the beginning of every helical secondary structure"
    my_pdb_file.seek(0)
    tagListDict={}
    for chain in chainsInProtein: 
        tagListDict[chain]=[' ']*int(aaNumber[chain])
        tagList=[]
        for line in my_pdb_file:
            if line.startswith("HELIX") and line[19] in chainsInProtein:
                tag=line[11:15].strip()
                tagListDict[line[19]][int(line[20:27].strip())-1]=tag
    return tagListDict




def sheetTag(chainsInProtein, my_pdb_file, aaNumber):
    "Give a suitable tag at the beginning of every sheet secondary structure"
    tagListDict=helixTag (chainsInProtein, my_pdb_file, aaNumber)
    my_pdb_file.seek(0)
    for chain in chainsInProtein: 
        tagList=[]
       
        for line in my_pdb_file: 
            if line.startswith("SHEET") and line[21] in chainsInProtein:
                tag=''.join(line[5:15].split())
                tagListDict[line[21]][int(line[22:28].strip())-1:int(line[22:28].strip())-1+len(tag)]=tag
             
                
        tagListDict[chain]= ''.join(tagListDict[chain])
    return tagListDict





def fileName (my_pdb_file):                           #Title of the pdb file
    "Extract the pdb id from the open file"
    my_pdb_file.seek(0)
    for line in my_pdb_file: 
        if line.startswith('HEADER'):
            return line[59:].strip()


        

def countLastPosition (chainsInProtein, aaNumber):
    "Create a label of the last aa position in every chain"
    lastPosDict={}
    for chain in chainsInProtein: 
        lastPosDict[chain]=[' ']*int(aaNumber[chain])
        lastPos='('+ aaNumber[chain]+')'
        lastPosDict[chain][-len(lastPos):-1]=lastPos
        lastPosDict[chain]=''.join(lastPosDict[chain])        
    return lastPosDict


def countFirstPosition (chainsInProtein, aaNumber):
    "Create a label of the first aa position in every chain"
    firstPosDict={}
    for chain in chainsInProtein: 
        firstPosDict[chain]=[' ']*int(aaNumber[chain])
        firstPos='(1)'
        firstPosDict[chain][0]=firstPos
        firstPosDict[chain]=''.join(firstPosDict[chain])
    return firstPosDict




def generateMultiLine (chainsInProtein, singleLine):
    """For ease of printing line for line, this function generates a multiline sequence from the necessary string objects
    (not more than 80)"""
    for chain in chainsInProtein:
        count=0
        multiLineSeq=""
        for i in singleLine[chain]:
            count+=1
            multiLineSeq+=i
            if count% 80==0:
                multiLineSeq+="\n"
            singleLine[chain]=multiLineSeq
            multiLine=singleLine
    
    return multiLine

            
def printSecStr (chainsInProtein):
    """Print out the secondary structure"""
    aminoAcidsSeqDict=multiToSingleLine (aminoAcidsInChains2, chainsInProtein)
    secStrListDictAll= sheetStructure (my_pdb_file, aaNumber, chainsInProtein)
    tagListDictAll = sheetTag(chainsInProtein, my_pdb_file, aaNumber)
    firstPosDict=countFirstPosition (chainsInProtein, aaNumber)
    lastPosDict=countLastPosition (chainsInProtein, aaNumber)
    
    
    aminoAcidsSeqDictMultiLine=generateMultiLine (chainsInProtein, aminoAcidsSeqDict)  #generate multiple lines of all the suitable dictionaries 
    secStrListDictAllMultiLine=generateMultiLine (chainsInProtein, secStrListDictAll)    #so as to print out the secondary structure
    tagListDictAllMultiLine=generateMultiLine (chainsInProtein, tagListDictAll)
    firstPosDictMultiLine=generateMultiLine (chainsInProtein, firstPosDict)
    lastPosDictMultiLine=generateMultiLine (chainsInProtein, lastPosDict)
    
    print("Secondary structure of the PDB ID: %s" %fileName (my_pdb_file))
    for chain in chainsInProtein:
        print("chain %s:" %chain)
        line1=aminoAcidsSeqDictMultiLine[chain].split('\n')
        line2=secStrListDictAllMultiLine[chain].split('\n')
        line3=tagListDictAllMultiLine[chain].split('\n')
        line4=firstPosDictMultiLine[chain].split('\n')
        line5=lastPosDictMultiLine[chain].split('\n')
        
        count=0
        for i in line1, line2, line3, line4:
            while count < len(line1):
                print(line4[count])
                print(line1[count])
                print(line2[count])
                print(line3[count])
                print(line5[count])
                count+=1
    

In [4]:
printSecStr (chainsInProtein)

Secondary structure of the PDB ID: 3AYU
chain A:
(1)                                                                             
YNFFPRKPKWDKNQITYRIIGYTPDLDPETVDDAFARAFQVWSDVTPLRFSRIHDGEADIMINFGRWEHGDGYPFDGKDG
-||----------||||||-------//////////////////----||||-------||||||---------------
 1A          2B           1                     1B         3B                   
                                                                                
                                                                                
LLAHAFAPGTGVGGDSHFDDDELWTLGKGVGYSLFLVAAHAFGHAMGLEHSQDPGALMAPIYTYTKNFRLSQDDIKGIQE
--||||---------||||----||------||/////////////-||---------------------//////////
  5B           4B      1C      2C2             2A                     3         
                                                                                
         
LYGASPD
///----
       
  (167) 
chain B:
(1)         
ISYGNDALMP
------||--
      6B  
      (10) 


In [35]:
generateMultiLine (chainsInProtein, aminoAcidsSeqDict)

{'A': 'YNFFPRKPKWDKNQITYRIIGYTPDLDPETVDDAFARAFQVWSDVTPLRFSRIHDGEADIMINFGRWEHGDGYPFDGKDG\nLLAHAFAPGTGVGGDSHFDDDELWTLGKGVGYSLFLVAAHAFGHAMGLEHSQDPGALMAPIYTYTKNFRLSQDDIKGIQE\nLYGASPD',
 'B': 'ISYGNDALMP'}

NameError: name 'aminoAcidsSeqSingleLineDict' is not defined

In [38]:
aminoAcidsInChains2={'A': 'YNFFPRKPKWDKNQITYRIIGYTPDLDPETVDDAFARAFQVWSDVTPLRF\nSRIHDGEADIMINFGRWEHGDGYPFDGKDGLLAHAFAPGTGVGGDSHFDD\nDELWTLGKGVGYSLFLVAAHAFGHAMGLEHSQDPGALMAPIYTYTKNFRL\nSQDDIKGIQELYGASPD',
 'B': 'ISYGNDALMP'}
chainsInProtein=['A','B']
mylist3=['Martha is crazy', "But she's also kinda smart", "So be on the watch", "Okay bye" ]
for chain in chainsInProtein:
    print("chain %s"%chain)
    aaList=aminoAcidsInChains2[chain].split()
    count=0
    for i in aaList,mylist3:
        while count < len(aaList):
            print (aaList[count])
            print(mylist3[count])
            count+=1
        
    

chain A
YNFFPRKPKWDKNQITYRIIGYTPDLDPETVDDAFARAFQVWSDVTPLRF
Martha is crazy
SRIHDGEADIMINFGRWEHGDGYPFDGKDGLLAHAFAPGTGVGGDSHFDD
But she's also kinda smart
DELWTLGKGVGYSLFLVAAHAFGHAMGLEHSQDPGALMAPIYTYTKNFRL
So be on the watch
SQDDIKGIQELYGASPD
Okay bye
chain B
ISYGNDALMP
Martha is crazy


In [63]:
len(aaList)

4

In [6]:
my_list = [' ', ' ', ' ', ' ', ' ', ' ', ' ']
print(len(my_list))
list3=['a']*3
list4=[2,2,2,2,2]
my_list[5:8]=list3
print(len(my_list))
#my_list.append(("martha")*2)
#my_list[-5:-1]=list4
my_list

7
8


[' ', ' ', ' ', ' ', ' ', 'a', 'a', 'a']

In [26]:
list4[9]=100
list4

IndexError: list assignment index out of range

In [96]:
?list.append

[0;31mSignature:[0m [0mlist[0m[0;34m.[0m[0mappend[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mobject[0m[0;34m,[0m [0;34m/[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Append object to the end of the list.
[0;31mType:[0m      method_descriptor


In [1]:
a="gsvdfv\nvdgfgs\nhdgt"

b=a.replace('\n', '')


b

'gsvdfvvdgfgshdgt'

In [31]:
?str.replace

[0;31mSignature:[0m [0mstr[0m[0;34m.[0m[0mreplace[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mold[0m[0;34m,[0m [0mnew[0m[0;34m,[0m [0mcount[0m[0;34m=[0m[0;34m-[0m[0;36m1[0m[0;34m,[0m [0;34m/[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Return a copy with all occurrences of substring old replaced by new.

  count
    Maximum number of occurrences to replace.
    -1 (the default value) means replace all occurrences.

If the optional argument count is given, only the first count occurrences are
replaced.
[0;31mType:[0m      method_descriptor


In [None]:
printFile (my_pdb_file)

In [None]:
helixSymbol='/'
sheetSymbol='|'
otherSymbol='-'

In [2]:
a="acscscscsaasa"
aSymbol= '\'

cSymbol= '-'

for i in a:
    print (i)

SyntaxError: EOL while scanning string literal (<ipython-input-2-e573f392b23a>, line 2)

In [4]:
aSymbol= ('\')

SyntaxError: EOL while scanning string literal (<ipython-input-4-32dfaf3936be>, line 1)