In [None]:
import os
import pandas as pd
import numpy as np
import freesasa
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.Polypeptide import PPBuilder
from Bio.PDB.Polypeptide import *

In [None]:
# Retrieve current working directory (`cwd`)
cwd = os.getcwd()
cwd

# Change directory 
os.chdir("C:/Kate/Bioinformatics/Diplom")

# Assign spreadsheet filename to `file`
file = 'Selection_434.xlsx'

# Load spreadsheet
xf = pd.ExcelFile(file)

# Print the sheet names
print(xf.sheet_names)

# Load a sheet into a DataFrames by names: dfSelection and dfTableAA
dfSelection = xf.parse('Selection')
dfTableAA = xf.parse('tableAA')

# Convert names of proteins in dataFrame to list
names = dfSelection["ID"].tolist()

# maxSASA to list
resList = dfTableAA['three'].tolist()
rList = dfTableAA['one'].tolist()

#Series of AA
tableAA = pd.Series(["ala","arg","asn","asp","cys","glu","gln","gly","his","ile","leu","lys","met","phe","pro","ser","thr","trp","tyr","val"],
                 index=['A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V'])

#dfSelection
#dfTableAA
#names

In [None]:
# Change directory 
os.chdir("C:/Kate/Bioinformatics/Diplom/Results")

# Calculate SASA for a the 200 strutures:

path = 'C:/Kate/Bioinformatics/Diplom/Dataset200Structures/'
f = '.pdb'
seqLenR = []
seqLenCalc = []

for i in names:
    
    file = path+i+f
    
    # Get Sequences from .pdb files
    seq = []
    p = PDBParser()
    structure = p.get_structure(i, file)
    for model in structure:        
        for chain in model:
            for residue in chain:
                r = three_to_one(residue.get_resname())
                seq.append(r)
    #print(seq)
    seqLenR.append(len(seq))
    
    #Calculate SASA
    
    structure = freesasa.Structure(path+i+f)
    
    # Default parameters calculation
    #result = freesasa.calc(structure)
    
    # a High precision L&R calculation
    result = freesasa.calc(structure,freesasa.Parameters({'algorithm' : freesasa.LeeRichards,
                                            'n-slices' : 100}))
    area_classes = freesasa.classifyResults(result, structure)

    # Create lists to hold the values from the selections
    resNum = []
    resValues = []
    residues = []
    aaValues = []
    
    # a = A, R, N, D ..
    # res = ala, arg, asn ..
    # num = 0, 1, 2 ..
    # index = 1, 2, 3 ..
    for num, a in enumerate(seq):
        index = str(num+1)
        res = tableAA[a]
    
        selections = freesasa.selectArea(('{}, resn {}'.format(a, res), '{}, resi {}'.format(index, index)), 
                                     structure, result)
        # key = a, index, a, index ..
        # selections[key] = value
        for key in selections:
            if key.isdigit():
                resNum.append(key)
                resValues.append(selections[key])
            else:
                residues.append(key)
                aaValues.append(selections[key])
                
    seqLenCalc.append(len(resValues))
    
    # Create a DataFrame from the lists with the data
    dfsasa = pd.DataFrame()
    dfsasa['Index'] = resNum
    dfsasa['Residue'] = residues
    dfsasa['sumSASA'] = aaValues
    dfsasa['rSASA'] = resValues
    
    
    # Calculate RSA and code Buried(0) vs Exposed(1)
    protor = []
    RSA = []
    exposure = []
    
    for row in dfsasa.itertuples():
        # tableRow - temporary variable - holds a row of a table, containing maxSASA values
        # for any of the residues in the protein sequence
        tableRow = dfTableAA.loc[dfTableAA.one == row.Residue]
        
        # Calculate RSA - temp variable
        rsa = row.rSASA/tableRow.iloc[0]['ProtOr']
        
        # List of RSA values for every R in the protein seq
        RSA.append(rsa)
        
        # List of the maxSASA values by ProtOr
        protor.append(tableRow.iloc[0]['ProtOr'])
        
        # Code Buried(0) vs Exposed(1)
        if rsa > np.float64(0.1):
            exposure.append(0)
        else:
            exposure.append(1)
    
    # 
    dfsasa['maxProtOr'] = protor
    dfsasa['RSA'] = RSA
    dfsasa['b0e1'] = exposure
    
    writer = pd.ExcelWriter((i+'.xlsx'), engine='xlsxwriter')

    # Write your DataFrame to a file     
    dfsasa.to_excel(writer, 'data')

    # Save the result 
    writer.save()
    