In [1]:
import pandas as pd
from biopandas.pdb import PandasPdb
import numpy as np
import matplotlib.pyplot as plt

from funcs import read_pdb_mmcif, preprocess, f2_cutoff, exposure, create_3_vectors, create_vectors, average_score, features, yes_no, score_v_localres, reciprocal_ticks, standard_residues, visualize

## Example process below:
for soluble protein, or protein without calculating contributions from adducts/detergent

In [None]:
initial_pdb_path = 'pdbs/in/1u7g.pdb'

preprocessed_pdb_path = preprocess(initial_pdb_path, 'pdbs/preprocessed', yes_no) # Preprocesses down to only C, N, O, S, and Se atoms, removes any low occupancy atoms, and removes segment_id and element_symbol (which often cause errors in ChimeraX)

calculated_pdb_paths = exposure(preprocessed_pdb_path, 'pdbs/out') # By default, calculates with d^-2 and cutoff of 5 nm

average_by_backbone = False

for pdbpath in calculated_pdb_paths: # This averages for every file generated by exposure. You can choose any or all that you want explicitly instead
    average_score(pdbpath[0], average_by_backbone) # By default, assigns each atom in a residue the average score of that atoms in that residue, and separately assigns each atom in a residue the average score of the backbone atoms in that residue

## Visualising in ChimeraX:
using standard coloring:
```
color byattribute bfactor #1 palette 20,#000000:15,#000088:10,#ff0000:5,#ffff00:0,#ffffff
```
(assuming model #1)

## Example process below:
for a membrane protein, calculating the contribution to score separately from normal residues, nonstandard residues, and total (standard)

If you have a model with detergent molecules or other adducts, this could be done to calculate scores from the protein, the detergent/adduct, and the total exposure

In [None]:
initial_pdb_path = 'pdbs/in/1u7g.pdb'

preprocessed_pdb_path = preprocess(initial_pdb_path, 'pdbs/preprocessed', yes_no) # Preprocesses down to only C, N, O, S, and Se atoms, removes any low occupancy atoms, and removes segment_id and element_symbol (which often cause errors in ChimeraX)
                                                                # This will also ask if you want to include nonstandard residues, such as HSD (protonated histidine) and BDD (in this case, detergent).

print(features(preprocessed_pdb_path, 'residue_name')) # You can select any feature, another useful options can be 'chain_id' if chains are assigned correctly. If you already know what you want to do, or just calculate total exposure, this is unnecessary

In [None]:
calculated_pdb_paths = exposure(preprocessed_pdb_path, 'pdbs/out', assignment=create_3_vectors(preprocessed_pdb_path, 'MSE', 'residue_name')) # By default, calculates with d^-2 and gives score and inverse score (better for ChimeraX visualisation)

average = True
average_by_backbone = False

if average:
    for pdbpath in calculated_pdb_paths:
        average_score(pdbpath[0], average_by_backbone)

#### Getting local resolution by atom in ChimeraX
Ensure the map and model are correctly aligned!

```measure mapvalues #3 atoms #1 attribute locres```

```save 'XXXX\pdbs\out\XXXX.defattr' attrName locres models #1```

(assuming model is #1, map is #2, local resolution map is #3)

In [None]:
dis = score_v_localres('pdbs/out/3jcz_2c50_26p5.pdb', 'pdbs/out/defattrs/gdh_J123.defattr', interactive=False)


#### Advanced examples

In [None]:
preprocess('pdbs/in/1u7g.pdb', 'pdbs/preprocessed', yn=yes_no)
print(create_vectors('pdbs/preprocessed/1u7g.pdb', 'MSE', 'residue_name'))
print(create_vectors('pdbs/preprocessed/1u7g.pdb', ['A', 'B'], 'chain_id'))

In [None]:
# use a non-standard scoring function. You should find and use a reasonable value for max_scores
def score(x):
    return x ** -3

In [None]:
calculated_pdb_paths = exposure('pdbs/preprocessed/1u7g.pdb', 'pdbs/out', funcs={'3': score}, max_scores={'3': 10})
print(f'File saved to {calculated_pdb_paths[0][0]}. \n Min score: {calculated_pdb_paths[0][1]} \n Max score: {calculated_pdb_paths[0][2]}')

In [3]:
visualize('pdbs/out/1u7g_2c50.pdb')