<a href="https://colab.research.google.com/github/rae-gh/colab-analyses/blob/main/Geometry_Intro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1>Protein Geometry Correlations</h1>
(c) Rachel Alcraft 2024

There are 5 cells to run, you only need to edit the first to change the pdbs and atoms of interest:
<hr/>
</br> 1. RUN: Import the required librarires
</br> 2. EDIT+RUN: Enter structure and atoms
</br> 3. Calculate the pdb objects
</br> 4. Generate a basic 2d correlation
</br> 5. Generate a correlation with additional geoemtric hue
<hr/>

In [4]:
# CELL 1
# Need to import libraries
try:#runtime gets refreshed so reinstall of non standard libraries may be necessary
  import google.colab
  !pip install matplotlib
  !pip install numpy
  !pip install pandas
  !pip install leuci_xyz
  !pip install leuci_pol
  !pip install biopython==1.81 #later versions break the pdb loader
  !pip install prometry
except:
  pass

In [18]:
# CELL 2 - specify the inputs
pdb_codes = ["1crn", "1ejg", "3u7t", "2fd7", "1cbn", "1cnr", "3nir", "1ab1", "2fd9", "1jxy", "1jxu", "1jxx", "1jxw", "1jxt", "4rek"]
geos = ["N:CA:C:N+1","N:CA:C","N:O","CA:C:N+1","C-1:N:CA:C"]
DATADIR = "content/"

In [19]:
# CELL 3 - Calculate the dataframe of geometric correlations
from prometry import pdbloader as pl
from prometry import pdbgeometry as pg

pobjs = []
for pdb in pdb_codes:    
    pla = pl.PdbLoader(pdb,DATADIR,cif=False)    
    po = pla.load_pdb()
    #print(po,len(po.lines()))
    #print("---")
    pobjs.append(po)

gm = pg.GeometryMaker(pobjs)
df_geos = gm.calculateGeometry(geos)
print(df_geos)

     N:CA:C:N+1   N:CA:C       N:O  CA:C:N+1  C-1:N:CA:C pdb_code  resolution  \
0       144.346  108.649  2.761307   114.975    -107.833     1crn        1.50   
1       133.291  106.634  2.742142   119.442    -131.170     1crn        1.50   
2       151.203  109.941  2.766070   116.923    -118.897     1crn        1.50   
3       -18.979  116.739  3.623741   117.547     -76.182     1crn        1.50   
4       166.027  108.065  2.621433   116.324    -157.924     1crn        1.50   
..          ...      ...       ...       ...         ...      ...         ...   
905     -47.438  109.919  3.494409   116.431     -60.893     4rek        0.74   
906     -47.770  110.725  3.498333   115.607     -59.001     4rek        0.74   
907     -49.292  112.234  3.473657   117.160     -67.314     4rek        0.74   
908     -33.562  109.447  3.518229   118.058     -68.258     4rek        0.74   
909     -45.074  109.960  3.493018   116.847     -60.987     4rek        0.74   

      aa chain  rid  ... ri

In [22]:
# CELL 4
# Correlation 1
import plotly.express as px
import plotly.io as pio
x_ax = "N:CA:C:N+1"
y_ax = "N:O"
hue = "resolution"
df_geos.sort_values(by=["resolution"], inplace=True)
fig = px.scatter(df_geos, x=x_ax, y=y_ax, color=hue,title="",width=500, height=500, opacity=0.7,color_continuous_scale=px.colors.sequential.Viridis)
fig.show()


In [25]:
# CELL 5
# Correlation 2
import plotly.express as px
import plotly.io as pio
x_ax = "N:CA:C:N+1"
y_ax = "N:O"
hue = "N:CA:C"
fig = px.scatter(df_geos, x=x_ax, y=y_ax, color=hue,title="",width=500, height=500, opacity=0.7,color_continuous_scale=px.colors.sequential.Inferno)
fig.show()
