# Melodia: A Python Library for Protein Structure and Dynamics Analysis

## Examples

In [None]:
import os 
import dill

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from Bio.PDB import PDBParser, PDBIO

from sklearn.preprocessing import StandardScaler

In [None]:
# Import melodia library
import melodia as mel

## Creating a Panda Dataframe from a structure file

#### Melodia allows us to import a PDB file directly into a Pandas Dataframe. 

In [None]:
# Example with multiple chains
file_name = '2k5x.pdb'

In [None]:
# Example with multiple models
#file_name = '2lj5.pdb'

In [None]:
dfi = mel.geometry_from_structure_file(file_name)

In [None]:
dfi

#### Select a model 

In [None]:
model = dfi['model'] == 0
dfo = dfi[model].copy()

In [None]:
dfo.head()

#### A Ramachandran-like plot

In [None]:
cmap = sns.color_palette("Blues", as_cmap=True)
sns.jointplot(x='curvature', y='torsion', data=dfo, kind='kde', cmap=cmap, height=10, fill=True)
plt.show();

#### Apply Scikit Standard Scaler to geometric attributes

In [None]:
#df = dfo[(dfo['name'] == 'GLY')].copy()
df = dfo.copy()
features = ['curvature', 'torsion', 'arc_length', 'writhing']
autoscaler = StandardScaler()
df[features] = autoscaler.fit_transform(df[features])

In [None]:
cmap = sns.color_palette("Blues", as_cmap=True)
#sns.jointplot(x='curvature', y='torsion', data=df, kind='kde', cmap=cmap, height=10, fill=True)
sns.jointplot(x='curvature', y='torsion', data=df, kind='hex', cmap=cmap, height=10)
plt.show();

## Creating geometric data from a BioPython parsed structure

In [None]:
parser = PDBParser()
name, ext = os.path.splitext(file_name)
structure = parser.get_structure(name, file_name)

### Create a Panda Dataframe from the parser

In [None]:
df = mel.geometry_from_structure(structure)
df

### Create a dictionary of attribute from the parser

In [None]:
geo = mel.geometry_dict_from_structure(structure)

In [None]:
# Access format 'model:chain'
geo['0:A'].residues[0]

### The Geometric Data can be stored into a file (pickled) using the Dill library.

In [None]:
with open('geo.dill', 'wb') as file:
    dill.dump(geo, file)

In [None]:
with open('geo.dill', 'rb') as file:
    geo_loaded = dill.load(file)

In [None]:
geo_loaded['0:A'].residues[0]

### Set the b-factor as an attribute 

In [None]:
mel.bfactor_from_geo(structure=structure, attribute='curvature', geo=geo)

#### View structure as a putty model

In [None]:
#mel.view_putty(structure)
mel.view_putty(structure[0], radius_scale=1.4, width=800, height=600)

In [None]:
mel.view_cartoon(structure[0], width=800, height=600)

In [None]:
mel.view_tube(structure[0], width=800, height=600)

### Save the structure to a PDB file with the new bfactors

In [None]:
io = PDBIO()
io.set_structure(structure)
io.save('out.pdb')

## Acess the Propensity Table for a target residue

In [None]:
ptable = mel.PropensityTable()

In [None]:
phi = -82.0
psi =  55.0
ptable.get_score(target='F', residue='A', phi=phi, psi=psi)