# BasicAssessMetrics

In [4]:
cd opt/BasicAssessMetrics

/Users/magnus/work-src/rna-pdb-tools/opt/BasicAssessMetrics


In [12]:
import sys
import os

import pdb_utils
import utils
import extract

from operator import attrgetter

RESIDUES_LIST = "data/residues.list"
ATOMS_LIST = "data/atoms.list"

def CleanFormat(f):
    """
    CleanFormat is a function used to format different platform formats to unix. Users need to install dos2unix
    """
    os.system( "mac2unix -q %s" %f )
    os.system( "dos2unix -q %s" %f )

In [13]:
def normalize_structure(struct, out_file = None, index_file=None, extract_file = None):
    pdb_normalizer = pdb_utils.PDBNormalizer( RESIDUES_LIST, ATOMS_LIST )
    ok = pdb_normalizer.parse( struct, out_file )
    if not ok:
        sys.stderr.write("ERROR: structure not normalized!\n")
    else:
        sys.stderr.write("INFO: Normalization succeded!\n")
    if not extract_file is None:
        coords=open(index_file).read()
        extract.extract_PDB(SOLUTION_NORMAL,coords, extract_file)
        sys.stderr.write("INFO:	structure extracted\n")

In [14]:
# PVALUE set according to Hajdin et al., RNA (7) 16, 2010, either "+" or "-"
def calc_RMSD(native_file, native_index, prediction_file, prediction_index, PVALUE = "-"):
    res_struct = pdb_utils.PDBStruct()
    res_struct.load( native_file, native_index )
    res_raw_seq = res_struct.raw_sequence()

    sol_struct = pdb_utils.PDBStruct()
    sol_struct.load( prediction_file, prediction_index )
    sol_raw_seq = sol_struct.raw_sequence()

    if( sol_raw_seq != res_raw_seq ):
        sys.stderr.write("ERROR Result sequence != Solution sequence!\n")
        sys.stderr.write("DATA Solution sequence --> '%s'\n" %sol_raw_seq )
        sys.stderr.write("DATA Result sequence   --> '%s'\n" %res_raw_seq )
        return(-1)
    # computes the RMSD
    comparer = pdb_utils.PDBComparer()
    rmsd = comparer.rmsd( sol_struct, res_struct )
    sys.stderr.write("INFO Partial RMSD --> %f\n" %rmsd )
    pvalue = comparer.pvalue( rmsd, len(sol_raw_seq), PVALUE )
    sys.stderr.write("INFO Partial P-Value --> %e\n" %pvalue )
    return(rmsd, pvalue)

In [10]:
def InteractionNetworkFidelity(native_file, native_index, prediction_file, prediction_index):
    res_struct = pdb_utils.PDBStruct()
    res_struct.load( native_file, native_index )
    res_raw_seq = res_struct.raw_sequence()

    sol_struct = pdb_utils.PDBStruct()
    sol_struct.load( prediction_file, prediction_index )
    sol_raw_seq = sol_struct.raw_sequence()

    if( sol_raw_seq != res_raw_seq ):
        sys.stderr.write("ERROR Result sequence != Solution sequence!\n")
        sys.stderr.write("DATA Solution sequence --> '%s'\n" %sol_raw_seq )
        sys.stderr.write("DATA Result sequence	 --> '%s'\n" %res_raw_seq )
        return(-1)
    # computes the RMSD
    comparer = pdb_utils.PDBComparer()
    rmsd = comparer.rmsd( sol_struct, res_struct )
    INF_ALL = comparer.INF( sol_struct, res_struct, type="ALL" )
    DI_ALL = rmsd / INF_ALL
    INF_WC = comparer.INF( sol_struct, res_struct, type="PAIR_2D" )
    INF_NWC = comparer.INF( sol_struct, res_struct, type="PAIR_3D" )
    INF_STACK = comparer.INF( sol_struct, res_struct, type="STACK" )
    return (rmsd,DI_ALL, INF_ALL, INF_WC, INF_NWC,INF_STACK)

In [41]:
# Normalize PDB format, correct residue names and atom names. 
normalize_structure('example/14_solution_0.pdb','example/14_solution_normalized.pdb')

# calculate RMSD for RNA structures
# require biopython
rmsd, pvalue = calc_RMSD("example/14_solution_0.pdb",
        "example/14_solution_0.index",
        "example/14_ChenPostExp_2.pdb",
        "example/14_ChenPostExp_2.index")

print '14_ChenPostExp_2'
print '  RMSD:', rmsd
print '  pvalue:', pvalue

# calculate InteractionNetworkFidelity and Deformation Index for RNA structures
# need to have MA-annotate in the directory or set in mcannotate.py
rmsd, DI_ALL, INF_ALL, INF_WC, INF_NWC,INF_STACK = InteractionNetworkFidelity("example/14_solution_0.pdb",
          "example/14_solution_0.index",
          "example/14_ChenPostExp_2.pdb",
          "example/14_ChenPostExp_2.index")

print '14_ChenPostExp_2, rmsd', rmsd
print "  DI_ALL:", DI_ALL
print "  INF_ALL:", INF_ALL

INFO: Normalization succeded!
INFO Partial RMSD --> 7.751173
INFO Partial P-Value --> 7.327472e-15


14_ChenPostExp_2
  RMSD: 7.751173243045827
  pvalue: 7.327471962526033e-15
14_ChenPostExp_2, rmsd 7.751173243045827
  DI_ALL: 10.643784178530254
  INF_ALL: 0.72823472489
