## Compute change in solvent accessible surface area (SASA) per residue

We want to create a DataFrame `sasa` that contains changes in SASA per residue for all the antibodies in our cleaned summary file.

The desired result is a DataFrame with columns

- `pdb_id`, e.g. '9ds1'
- `ab_chain`, e.g. 'H' 
- `ab_chaintype`, 'heavy' or 'light'
- `ab_resnum`
- `ab_icode`
- `ab_resname`
- `dSASA`
- `dSASA_rel`
- `dSASA_polar`
- `dSASA_polar_rel`
- `dSASA_apolar`
- `dSASA_apolar_rel` 

This can be achieved using the function `compute_dSASA(pdb_id, light_chain, heavy_chain)` from notebook 10.
Copy the function into a code cell below.

In [1]:
# compute_dSASA (copied from notebook 10)

import os
import pandas as pd
import freesasa
from Bio.PDB import PDBParser, PDBIO, Select

PDB_DIR = "../data/pdbs"
PDB_AB_DIR = "../data/pdbs_ab"

class ChainSelect(Select):
    def __init__(self, chain_ids):
        self.chain_ids = chain_ids
    def accept_chain(self, chain):
        return chain.get_id() in self.chain_ids

def convert_chothia_resnum(residue_number):
    icode = ''

    if residue_number[-1].isalpha():
        resnum = int(residue_number[:-1])
        icode = residue_number[-1]
    else:
        resnum = int(residue_number)

    return resnum, icode



def compute_dSASA(pdb_id, light_chain, heavy_chain):

    # create PDB file for unbound antibody
    pdb_path = os.path.join(PDB_DIR, f"{pdb_id}_chothia.pdb")
    pdb_ab_path = os.path.join(PDB_AB_DIR, f"{pdb_id}_{light_chain}_{heavy_chain}_chothia.pdb")
    
    parser = PDBParser(PERMISSIVE=1)
    structure = parser.get_structure(pdb_id, pdb_path)
    
    io = PDBIO()
    io.set_structure(structure)
    io.save(pdb_ab_path, select = ChainSelect([light_chain, heavy_chain]))

    # compute SASA for complex and unbound antibody 
    structure = freesasa.Structure(pdb_path)
    result = freesasa.calc(structure)
    complex = result.residueAreas()  

    structure = freesasa.Structure(pdb_ab_path)
    result = freesasa.calc(structure)
    antibody = result.residueAreas()

    # report dSASA per residue for light and heavy chain
    data = []

    chain = light_chain
    for residue_number, ab_area in antibody[chain].items(): 
        # get SASA for complex for same chain/residue
        cplx_area = complex[chain][residue_number]
    
        resnum, icode = convert_chothia_resnum(residue_number)

        data.append(dict(pdb_id = pdb_id,
                     ab_chaintype = 'light',  
                     ab_chain = chain,
                     ab_res = residue_number,
                     ab_resnum = resnum,
                     ab_icode = icode, 
                     dSASA = ab_area.total - cplx_area.total, 
                     dSASA_rel = ab_area.relativeTotal - cplx_area.relativeTotal,
                     dSASA_polar = ab_area.polar - cplx_area.polar,
                     dSASA_polar_rel = ab_area.relativePolar - cplx_area.relativePolar,
                     dSASA_apolar = ab_area.apolar - cplx_area.apolar,
                     dSASA_apolar_rel = ab_area.relativeApolar - cplx_area.relativeApolar)
                     ) 

    chain = heavy_chain
    for residue_number, ab_area in antibody[chain].items(): 
        cplx_area = complex[chain][residue_number]
        resnum, icode = convert_chothia_resnum(residue_number)
        data.append(dict(pdb_id = pdb_id,
                     ab_chaintype = 'heavy',  
                     ab_chain = chain,
                     ab_res = residue_number,
                     ab_resnum = resnum,
                     ab_icode = icode,
                     ab_resname = ab_area.residueType, 
                     dSASA = ab_area.total - cplx_area.total, 
                     dSASA_rel = ab_area.relativeTotal - cplx_area.relativeTotal,
                     dSASA_polar = ab_area.polar - cplx_area.polar,
                     dSASA_polar_rel = ab_area.relativePolar - cplx_area.relativePolar,
                     dSASA_apolar = ab_area.apolar - cplx_area.apolar,
                     dSASA_apolar_rel = ab_area.relativeApolar - cplx_area.relativeApolar)
                     ) 


    df = pd.DataFrame(data)
    os.remove(pdb_ab_path)
    return df



In [2]:
# compute_dSASA("6o25", "F", "E")

Now we need to iterate over the rows of the cleaned summary file, append results to the `sasa` data frame, and save the file under `../generated/sasa/sasa.tsv` (create the directory if necessary).

Like in notebook 8, we wrap the processing of each pdb file into a `try ... except` block, such that an error in a single file does not mess up the whole run. 

In [5]:
import warnings
import sys
from Bio.PDB.PDBExceptions import PDBConstructionWarning
warnings.simplefilter("error", PDBConstructionWarning)




SUMMARY_FILE = "../generated/preprocess/summary_pdb_clusters_deduplicated.tsv"

# read the summary file
summary = pd.read_csv(SUMMARY_FILE, sep="\t")

# declare a DataFrame to store dSASA
sasa = pd.DataFrame()

for i, row in summary.iterrows():
    if row['pdb'] in ['7mtb', '6o25', '6o2a']:
        continue
    try:
        print(f"{row['pdb']} {row['Lchain']} {row['Hchain']}")
        # compute SASA for pdb_id, light_chain, heavy_chain in row
        df = compute_dSASA(row["pdb"], row["Lchain"], row["Hchain"])

        sasa = pd.concat([sasa, df], ignore_index=True)

    except Exception as e:
        print(e)
        print(row)
        sys.exit(1)


# save the sasa DataFrame to a tab-delimited file
sasa.to_csv("../generated/sasa/sasa.tsv", sep="\t", index=False)




3hi6 L H
2ny3 C D
3bdy L H
3mac L H
2wuc L H
1hez C D
3q1s L H
3t2n L H
2ny7 L H
2fjg L H
3nh7 O K
2dd8 L H
1bvk A B
3hi1 L H
2qr0 Q R
3ngb C B
4gxu N M
2uzi L H
3lh2 O K
2h9g L H
2xtj B D
2fjh A B
3b2v L H
2xra L H
3p11 L H
3eob L H
2vxs N J
2r56 M I
3r1g L H
3skj L H
3n85 L H
3mxw L H
2r0k L H
3lev L H
3nfp L H
3l5x L H
2vxq L H
1jps L H
2xqb L H
2cmr L H
3kr3 L H
1iqd A B
9fgr E E
8sxj D C
8dw9 D C
7ucf E D
7sk6 E F
6p3s D C
7eh5 L H
3whe P O
6ur5 B A
7akd L H
5h37 L K
7m6i N M
7wee L H
7m6h G E
7q9j F E
7kzx C D
7zr9 G F
7sfv N M
7wlc L H
7s6l L H
7n62 L H
8qks B C
6pdx N M
8vus F E
8gsb K H
7v6n D E
5w1k K L
8ix3 L H
7ydi L H
8sj6 D C
8blq C E
2fec L I
6uye J G
8vvh L H
8zc4 G H
7r7n L H
7mnl L H
5um8 E D
8yz5 K G
8jam L H
7wvf C C
7ly9 C B
5w1m C D
9aru L H
8vww C B
8i3s L H
7won L H
7m8l Q N
6udk N M
5njd M N
7a5s L H
5n0a L H
7po5 L H
7m6f D C
7l2f L H
7d4g G N
7cyh D E
7ps7 B A
7kfb L I
7e3k G F
5t3x L H
7d0b L H
7cwo L H
7oh0 B C
7k8x G E
7cah D E
7lok J K
8k19 B A
8swh L H
8



7ui1 H H
7z4t L H
8erq L H
7t4r Q P
7f3q L H
6e1k C D
7l06 L H
7m6e C D
6oe4 F E
5ggr L H
7czy K H
7q9g L H
3p30 L H
2qad C D
6vja L H
7xic L K
7ezv N M
6ukj L H
7k4n D C
7jr7 F E
6b0e A B
6ii4 L H
7tlz A B
7e23 C B
7m42 C D
8sm1 L H
7uvq B C
7s07 L H
4r2g M N
8ykg D D
6okp R Q
8yro L H
8dwx L H
6q23 L H
7oly L H
6x58 D C
4jdt L H
8thz L H
8f6x D D
7xmz L H
6d0u I H
7lqv L H
6ss6 M I
3tyg L H
6okn B A
6bkd L H
8tqi D C
7k90 N M
7rfc B A
7n5h L H
7rai M I
8qkr C B
5umi B A
4oqt L H
8ejj L H
5d1q A B
4k8r C D
6nn3 L H
4s1r L H
8u1s K J
7luc G F
5d1x A B
5t33 L H
5grj L H
8ivw C B
5w6d L H
8vpf N M
8si1 L H
8xsl L H
8pwx A A
8q5d C B
8tqk E D
8iow L H
8tma L H
8dz4 L H
5xj3 B A
8dok D C
8c8t D C
8cxg L H
7xsb L H
8dwa L H
7mxl K J
5cus N J
4ut6 M I
4hg4 O N
7sc1 Q P
7si2 F G
4rfo L H
6aru B C
7mem L H
6iec K J
7t3d Q O
7tat E D
6ywd B A
3idy C B
7prz L H
7k8m B A
7ey0 B A
6uig C B
7d03 L H
3g6d L H
7kra J I




7f9w C B
4od2 A B
7dk0 B A
7wch E D
5xf1 L H
5czv L H
7f7h D C
8vee L H
8dwy L H
8f0h D B
7l5b L H
7t72 L H
6bp2 L H
6pzy L I
5fec L H
5d1z C D
8tvj L H
4zs6 D C
9azv L H
8cbe L H
8f2j B A
8eqf L H
8pe9 L H
9e6k L H
4jan B A
6wn1 F E
6a4k M I
4y5x H G
7ch4 L H
7zf3 B A
6x4t F D
6wqo C B
1adq L H
8pwu B B
5tpn L H
8t6m A B
7ps5 L H
7e88 E D
6fy1 L H
8dww L H
6r0x D C
7cyv H H
6pe9 L H
8w0x L H
8dnn C B
7rr0 C B
6xc2 Y X
7tty L H
7ahv B A
8vqf A B
4hfu L H
5esv L H
3wd5 L H
9dq5 L H
8ume L H
8wyj R Q
8vif L H
8r9w L H
8oz3 B A
8tp6 L H
8tp9 F D
8e6k L H
8g30 F E
8g3o F E
7ra8 D C
7sn3 G F
6n8d B D
6phf B A
7t4s K J
4odx B H
7uaq L H
6mek E F
1i9r M K
6uta B A
6phd L H
7pry D C
6wvz L H
5x8l L G
6i04 L H
4ybl C B
6w16 L H
7c2l N J
5dhy L H
7mzn L H
7bpk L H
4kvn L H
4v1d E D
7n0a A B
2qql L H
9bif L H
8v5l L H
8vvo G A
7ukl Y X
3wlw D C
8sis L H
7xdl D E
8tvd L H
8h7z I F
8gpt E D
6ymq A A
8znz E F
8i5i L H
6txz N J
5dup L H
8s6z B A
8vk1 E F
7ley E D
6t3j D C
6u02 L H
6wtv I H
6xy2 L H
7