In [39]:
import freesasa
import requests
import os

def download_pdb(pdb_id, save_path):
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    response = requests.get(url)
    
    if response.status_code == 200:
        with open(save_path, 'wb') as file:
            file.write(response.content)
        print(f"PDB file successfully downloaded: {save_path}")
    else:
        print(f"file downloaded failed: {response.status_code}")

def check_freesasa_residues(pdb_file):
    try:
        structure = freesasa.Structure(pdb_file)
        result = freesasa.calc(structure)
        sasa_values = result.residueAreas()
        total_residues = 0
        print('''

Amount of Resolved Residues (powered by freesasa):
        ''')
        
        for chain in sasa_values:
            residues = list(sasa_values[chain].keys())
            total_residues += len(residues)
            print(f"① Detailed Sites in Chain {chain}     : {residues}")
        print(f"\n② Total Amount of Resolved Residues : {total_residues}")
    except Exception as err:
        print(f"Error: {err}")

pdb_id = "1ymg"
pdb_file = f"{pdb_id}.pdb"
download_pdb(pdb_id, pdb_file)
check_freesasa_residues(pdb_file)

if os.path.exists(pdb_file):
    os.remove(pdb_file)


PDB file successfully downloaded: 1ymg.pdb


Amount of Resolved Residues (powered by freesasa):
        
① Detailed Sites in Chain A     : ['6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143'

In [40]:
from Bio import PDB

def extract_single_chain(input_pdb, output_pdb, chain_id="A"):
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure("protein", input_pdb)   
    io = PDB.PDBIO()
    class ChainSelect(PDB.Select):
        def accept_chain(self, chain):
            return chain.get_id() == chain_id
    
    io.set_structure(structure)
    io.save(output_pdb, ChainSelect())
    print(f"Chain {chain_id} has successfully been extracted to:   {output_pdb}")

input_pdb = r"C:\Users\11960\Documents\zju\1ymg.pdb"
output_pdb = r"C:\Users\11960\Documents\zju\1ymg_chainA.pdb"
extract_single_chain(input_pdb, output_pdb, chain_id="A")

Chain A has successfully been extracted to:   C:\Users\11960\Documents\zju\1ymg_chainA.pdb


In [41]:
import nglview as nv
import freesasa
import os
import pandas as pd
from Bio import PDB
from IPython.display import display

def get_residue_names(pdb_file):
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure("protein", pdb_file)
    residue_map = {}

    for model in structure:
        for chain in model:
            for residue in chain:
                if PDB.is_aa(residue, standard=True):
                    chain_id = chain.id.upper()
                    res_id = residue.get_id()
                    res_num = str(res_id[1])
                    if res_id[2] != " ":
                        res_num += res_id[2]
                    res_name = residue.get_resname()
                    residue_map[(chain_id, res_num)] = res_name

    return residue_map

def visualize_pdb(pdb_file):
    if not os.path.exists(pdb_file):
        print(f"error: PDB file '{pdb_file}' does not exist，check file path！")
        return None
    try:
        view = nv.show_file(pdb_file)
        display(view)
        return view
    except Exception as e:
        print(f"An error occurred while visualizing the PDB: {e}")
        return None

def calculate_sasa(pdb_file, output_xlsx="sasa_results.xlsx"):
    if not os.path.exists(pdb_file):
        print(f"erroe: PDB file '{pdb_file}' does not exist，check file path！")
        return

    try:
        structure = freesasa.Structure(pdb_file)
        result = freesasa.calc(structure)
        sasa_values = result.residueAreas()

        residue_names = get_residue_names(pdb_file)

        sasa_list = []

        for chain in sasa_values:
            for resnum, residue_area in sasa_values[chain].items():
                total_sasa = residue_area.total
                res_name = residue_names.get((chain.upper(), str(resnum)), "UNK") 
                sasa_list.append([chain, resnum, res_name, total_sasa])

        df_sasa = pd.DataFrame(sasa_list, columns=["Chain", "R.No.", "Amino Acid", "SASA (Å²)"])
        df_sasa = df_sasa.sort_values(by="SASA (Å²)", ascending=True).reset_index(drop=True)

        df_sasa.to_excel(output_xlsx, index=False)
        print(f"\nSASA calculation completed, see result blank in: {output_xlsx}")

        print("\nSASA calculation result example（Sorted by SASA from smallest to largest）:")
        display(df_sasa)

    except Exception as e:
        print(f"an unexcepted error occurred: {e}")

pdb_file = input("input PDB input-file path (or Enter with using '1ymg_chainA.pdb')：").strip()
if not pdb_file:
    pdb_file = r"C:\Users\11960\Documents\zju\1ymg_chainA.pdb"

output_xlsx = input("input Excel output-file path (or Enter with using 'sasa_results.xlsx')：").strip()
if not output_xlsx:
    output_xlsx = "C:\\Users\\11960\\Documents\\zju\\sasa_results.xlsx"

view = visualize_pdb(pdb_file)
calculate_sasa(pdb_file, output_xlsx)

input PDB input-file path (or Enter with using '1ymg_chainA.pdb')： 
input Excel output-file path (or Enter with using 'sasa_results.xlsx')： 


NGLWidget()


SASA calculation completed, see result blank in: C:\Users\11960\Documents\zju\sasa_results.xlsx

SASA calculation result example（Sorted by SASA from smallest to largest）:


Unnamed: 0,Chain,R.No.,Amino Acid,SASA (Å²)
0,A,16,GLU,0.000000
1,A,28,LEU,0.000000
2,A,45,ALA,0.000000
3,A,96,GLY,0.000000
4,A,93,GLN,0.000000
...,...,...,...,...
228,A,10,TRP,165.289426
229,A,202,TRP,173.168755
230,A,196,ARG,194.568173
231,A,156,ARG,218.395482
