/
mainpage.h
1362 lines (1103 loc) · 37.3 KB
/
mainpage.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**
\mainpage ViennaRNA Package core - RNAlib
\n
\htmlonly <center> \endhtmlonly
<h2>A Library for folding and comparing RNA secondary structures</h2>
\htmlonly </center> \endhtmlonly
\n
\date 1994-2010
\authors Ivo Hofacker, Peter Stadler, Ronny Lorenz and many more
<h3>Table of Contents</h3>
<hr>
\li \ref mp_intro
\li \ref mp_fold
\li \ref mp_parse
\li \ref mp_utils
\li \ref mp_example
\li \ref mp_ref
<hr>
\section mp_intro Introduction
The core of the Vienna RNA Package is formed by a collection of routines
for the prediction and comparison of RNA secondary structures. These
routines can be accessed through stand-alone programs, such as RNAfold,
RNAdistance etc., which should be sufficient for most users. For those who
wish to develop their own programs we provide a library which can be
linked to your own code.
This document describes the library and will be primarily useful to
programmers. However, it also contains details about the implementation
that may be of interest to advanced users. The stand-alone programs are
described in separate man pages. The latest version of the package
including source code and html versions of the documentation can be found
at
\n\n
http://www.tbi.univie.ac.at/~ivo/RNA/
\page mp_fold Folding Routines - Functions for Folding RNA Secondary Structures
\anchor toc
<h3>Table of Contents</h3>
<hr>
\li \ref mp_mfe_Fold
\li \ref mp_PF_Fold
\li \ref mp_Inverse_Fold
\li \ref mp_Suboptimal_folding
\li \ref mp_Cofolding
\li \ref mp_Local_Fold
\li \ref mp_Alignment_Fold
\li \ref mp_Fold_Vars
\li \ref mp_Param_Files
<hr>
\section mp_mfe_Fold Calculating Minimum Free Energy Structures
The library provides a fast dynamic programming minimum free energy
folding algorithm as described by \ref zuker_81 "Zuker & Stiegler (1981)".
Associated functions are:
\verbatim
float fold (char* sequence, char* structure);
\endverbatim
\copybrief fold()
\verbatim
float circfold (char* sequence, char* structure);
\endverbatim
\copybrief circfold()
\verbatim
float energy_of_structure(const char *string,
const char *structure,
int verbosity_level);
\endverbatim
\copybrief energy_of_structure()
\verbatim
float energy_of_circ_structure( const char *string,
const char *structure,
int verbosity_level);
\endverbatim
\copybrief energy_of_circ_structure()
\verbatim
void update_fold_params(void);
\endverbatim
\copybrief update_fold_params()
\verbatim
void free_arrays(void);
\endverbatim
\copybrief free_arrays()
\see fold.h, cofold.h, 2Dfold.h, Lfold.h, alifold.h and subopt.h for a complete list of available functions.
\htmlonly
<hr>
<a href="#toc">Table of Contents</a>
<hr>
\endhtmlonly
\section mp_PF_Fold Calculating Partition Functions and Pair Probabilities
Instead of the minimum free energy structure the partition function of
all possible structures and from that the pairing probability for every
possible pair can be calculated, using a dynamic programming algorithm
as described by \ref mccaskill_90 "McCaskill (1990)". The following
functions are provided:
\verbatim
float pf_fold ( char* sequence,
char* structure)
\endverbatim
\copybrief pf_fold()
\verbatim
void free_pf_arrays (void)
\endverbatim
\copybrief free_pf_arrays()
\verbatim
void update_pf_params (int length)
\endverbatim
\copybrief update_pf_params()
\verbatim
char *get_centroid_struct_pl( int length,
double *dist,
plist *pl);
\endverbatim
\copybrief get_centroid_struct_pl()
\verbatim
char *get_centroid_struct_pr( int length,
double *dist,
double *pr);
\endverbatim
\copybrief get_centroid_struct_pr()
\verbatim
double mean_bp_distance_pr( int length,
double *pr);
\endverbatim
\copybrief mean_bp_distance_pr
\see part_func.h, part_func_co.h, part_func_up.h, 2Dpfold.h, LPfold.h, alifold.h and MEA.h for a complete list of available functions.
\htmlonly
<hr>
<a href="#toc">Table of Contents</a>
<hr>
\endhtmlonly
\section mp_Inverse_Fold Searching for Predefined Structures
We provide two functions that search for sequences with a given
structure, thereby inverting the folding routines.
\verbatim
float inverse_fold (char *start,
char *target)
\endverbatim
\copybrief inverse_fold()
\verbatim
float inverse_pf_fold ( char *start,
char *target)
\endverbatim
\copybrief inverse_pf_fold()
The following global variables define the behavior or show the results of the
inverse folding routines:
\verbatim
char *symbolset
\endverbatim
\copybrief symbolset
\see inverse.h for more details and a complete list of available functions.
\htmlonly
<hr>
<a href="#toc">Table of Contents</a>
<hr>
\endhtmlonly
\section mp_Suboptimal_folding Enumerating Suboptimal Structures
\verbatim
SOLUTION *subopt (char *sequence,
char *constraint,
int *delta,
FILE *fp)
\endverbatim
\copybrief subopt()
\verbatim
SOLUTION *subopt_circ ( char *sequence,
char *constraint,
int *delta,
FILE *fp)
\endverbatim
\copybrief subopt_circ()
\verbatim
SOLUTION *zukersubopt(const char *string);
\endverbatim
\copybrief zukersubopt()
\verbatim
char *TwoDpfold_pbacktrack ( TwoDpfold_vars *vars,
unsigned int d1,
unsigned int d2)
\endverbatim
\copybrief TwoDpfold_pbacktrack()
\verbatim
char *alipbacktrack (double *prob)
\endverbatim
\copybrief alipbacktrack()
\verbatim
char *pbacktrack(char *sequence);
\endverbatim
\copybrief pbacktrack()
\verbatim
char *pbacktrack_circ(char *sequence);
\endverbatim
\copybrief pbacktrack_circ()
\see subopt.h, part_func.h, alifold.h and 2Dpfold.h for more detailed descriptions
\htmlonly
<hr>
<a href="#toc">Table of Contents</a>
<hr>
\endhtmlonly
\section mp_Cofolding Predicting hybridization structures of two molecules
The function of an RNA molecule often depends on its interaction with
other RNAs. The following routines therefore allow to predict structures
formed by two RNA molecules upon hybridization.\n
One approach to co-folding two RNAs consists of concatenating the two
sequences and keeping track of the concatenation point in all energy
evaluations. Correspondingly, many of the cofold() and
co_pf_fold() routines below take one sequence string as argument
and use the the global variable #cut_point to mark the concatenation
point. Note that while the <i>RNAcofold</i> program uses the '&' character
to mark the chain break in its input, you should not use an '&' when using
the library routines (set #cut_point instead).\n
In a second approach to co-folding two RNAs, cofolding is seen as a
stepwise process. In the first step the probability of an unpaired region
is calculated and in a second step this probability of an unpaired region
is multiplied with the probability of an interaction between the two RNAs.
This approach is implemented for the interaction between a long
target sequence and a short ligand RNA. Function pf_unstru() calculates
the partition function over all unpaired regions in the input
sequence. Function pf_interact(), which calculates the
partition function over all possible interactions between two
sequences, needs both sequence as separate strings as input.
\verbatim
int cut_point
\endverbatim
\copybrief cut_point
\verbatim
float cofold (char *sequence,
char *structure)
\endverbatim
\copybrief cofold()
\verbatim
void free_co_arrays (void)
\endverbatim
\copybrief free_co_arrays()
<b>Partition Function Cofolding</b>
To simplify the implementation the partition function computation is done
internally in a null model that does not include the duplex initiation
energy, i.e. the entropic penalty for producing a dimer from two
monomers). The resulting free energies and pair probabilities are initially
relative to that null model. In a second step the free energies can be
corrected to include the dimerization penalty, and the pair probabilities
can be divided into the conditional pair probabilities given that a re
dimer is formed or not formed.
\verbatim
cofoldF co_pf_fold( char *sequence,
char *structure);
\endverbatim
\copybrief co_pf_fold()
\verbatim
void free_co_pf_arrays(void);
\endverbatim
\copybrief free_co_pf_arrays()
<b>Cofolding all Dimeres, Concentrations</b>
After computing the partition functions of all possible dimeres one
can compute the probabilities of base pairs, the concentrations out of
start concentrations and sofar and soaway.
\verbatim
void compute_probabilities(
double FAB,
double FEA,
double FEB,
struct plist *prAB,
struct plist *prA,
struct plist *prB,
int Alength)
\endverbatim
\copybrief compute_probabilities()
\verbatim
ConcEnt *get_concentrations(double FEAB,
double FEAA,
double FEBB,
double FEA,
double FEB,
double * startconc)
\endverbatim
\copybrief get_concentrations()
<b>Partition Function Cofolding as a stepwise process</b>
In this approach to cofolding the interaction between two RNA molecules is
seen as a stepwise process. In a first step, the target molecule has to
adopt a structure in which a binding site is accessible. In a second step,
the ligand molecule will hybridize with a region accessible to an
interaction. Consequently the algorithm is designed as a two step process:
The first step is the calculation of the probability
that a region within the target is unpaired, or equivalently, the
calculation of the free energy needed to expose a region. In the second step
we compute the free energy of an interaction for every possible binding
site.
Associated functions are:
\verbatim
pu_contrib *pf_unstru ( char *sequence,
int max_w)
\endverbatim
\copybrief pf_unstru()
\verbatim
void free_pu_contrib_struct (pu_contrib *pu)
\endverbatim
\copybrief free_pu_contrib_struct()
\verbatim
interact *pf_interact(
const char *s1,
const char *s2,
pu_contrib *p_c,
pu_contrib *p_c2,
int max_w,
char *cstruc,
int incr3,
int incr5)
\endverbatim
\copybrief pf_interact()
\verbatim
void free_interact (interact *pin)
\endverbatim
\copybrief free_interact()
\see cofold.h, part_func_co.h and part_func_up.h for more details
\htmlonly
<hr>
<a href="#toc">Table of Contents</a>
<hr>
\endhtmlonly
\section mp_Local_Fold Predicting local structures of large sequences
Local structures can be predicted by a modified version of the
fold() algorithm that restricts the span of all base pairs.
\verbatim
float Lfold ( const char *string,
char *structure,
int maxdist)
\endverbatim
\copybrief Lfold()
\verbatim
float aliLfold( const char **strings,
char *structure,
int maxdist)
\endverbatim
\copybrief aliLfold()
\verbatim
float Lfoldz (const char *string,
char *structure,
int maxdist,
int zsc,
double min_z)
\endverbatim
\copybrief Lfoldz()
\verbatim
plist *pfl_fold (
char *sequence,
int winSize,
int pairSize,
float cutoffb,
double **pU,
struct plist **dpp2,
FILE *pUfp,
FILE *spup)
\endverbatim
\copybrief pfl_fold()
\see Lfold.h and LPfold.h for more details
\htmlonly
<hr>
<a href="#toc">Table of Contents</a>
<hr>
\endhtmlonly
\section mp_Alignment_Fold Predicting Consensus Structures from Alignment
Consensus structures can be predicted by a modified version of the
fold() algorithm that takes a set of aligned sequences instead
of a single sequence. The energy function consists of the mean energy
averaged over the sequences, plus a covariance term that favors pairs
with consistent and compensatory mutations and penalizes pairs that
cannot be formed by all structures. For details see \ref hofacker_02 "Hofacker (2002)".
\verbatim
float alifold (const char **strings,
char *structure)
\endverbatim
\copybrief alifold()
\verbatim
float circalifold (const char **strings,
char *structure)
\endverbatim
\copybrief circalifold()
\verbatim
void free_alifold_arrays (void)
\endverbatim
\copybrief free_alifold_arrays()
\verbatim
float energy_of_alistruct (
const char **sequences,
const char *structure,
int n_seq,
float *energy)
\endverbatim
\copybrief energy_of_alistruct()
\verbatim
struct pair_info
\endverbatim
\copybrief pair_info
\verbatim
double cv_fact
\endverbatim
\copybrief cv_fact
\verbatim
double nc_fact
\endverbatim
\copybrief nc_fact
\see alifold.h for more details
\htmlonly
<hr>
<a href="#toc">Table of Contents</a>
<hr>
\endhtmlonly
\section mp_Fold_Vars Global Variables for the Folding Routines
The following global variables change the behavior the folding
algorithms or contain additional information after folding.
\verbatim
int noGU
\endverbatim
\copybrief noGU
\verbatim
int no_closingGU
\endverbatim
\copybrief no_closingGU
\verbatim
int noLonelyPairs
\endverbatim
\copybrief noLonelyPairs
\verbatim
int tetra_loop
\endverbatim
\copybrief tetra_loop
\verbatim
int energy_set
\endverbatim
\copybrief energy_set
\verbatim
float temperature
\endverbatim
\copybrief temperature
\verbatim
int dangles
\endverbatim
\copybrief dangles
\verbatim
char *nonstandards
\endverbatim
\copybrief nonstandards
\verbatim
int cut_point
\endverbatim
\copybrief cut_point
\verbatim
float pf_scale
\endverbatim
\copybrief pf_scale
\verbatim
int fold_constrained
\endverbatim
\copybrief fold_constrained
\verbatim
int do_backtrack
\endverbatim
\copybrief do_backtrack
\verbatim
char backtrack_type
\endverbatim
\copybrief backtrack_type
include fold_vars.h if you want to change any of these variables
from their defaults.
\see fold_vars.h for a more complete and detailed description of all global variables and how to use them
\htmlonly
<hr>
<a href="#toc">Table of Contents</a>
<hr>
\endhtmlonly
\section mp_Param_Files Reading Energy Parameters from File
A default set of parameters, identical to the one described in \ref mathews_04 "Mathews et.al. (2004)", is
compiled into the library.\n
Alternately, parameters can be read from and written to a file.
\verbatim
void read_parameter_file (const char fname[])
\endverbatim
\copybrief read_parameter_file()
\verbatim
void write_parameter_file (const char fname[])
\endverbatim
\copybrief write_parameter_file
To preserve some backward compatibility the RNAlib also provides functions to convert energy parameter
files from the format used in version 1.4-1.8 into the new format used since version 2.0
\verbatim
void convert_parameter_file (
const char *iname,
const char *oname,
unsigned int options)
\endverbatim
\copybrief convert_parameter_file()
\see read_epars.h and convert_epars.h for detailed description of the available functions
\htmlonly
<hr>
<a href="#toc">Table of Contents</a>
<hr>
\endhtmlonly
\ref mp_parse "Next Page: Parsing and Comparing"
\page mp_parse Parsing and Comparing - Functions to Manipulate Structures
<h2>Representations of Secondary Structures</h2>
The standard representation of a secondary structure is the <i>bracket
notation</i>, where matching brackets symbolize base pairs and unpaired
bases are shown as dots. Alternatively, one may use two types of node
labels, 'P' for paired and 'U' for unpaired; a dot is then replaced by
'(U)', and each closed bracket is assigned an additional identifier 'P'.
We call this the expanded notation. In \ref fontana_93b "Fontana et al. (1993)" a
condensed
representation of the secondary structure is proposed, the so-called
homeomorphically irreducible tree (HIT) representation. Here a stack is
represented as a single pair of matching brackets labeled 'P' and
weighted by the number of base pairs. Correspondingly, a contiguous
strain of unpaired bases is shown as one pair of matching brackets
labeled 'U' and weighted by its length. Generally any string consisting
of matching brackets and identifiers is equivalent to a plane tree with
as many different types of nodes as there are identifiers.
\ref shapiro_88 "Bruce Shapiro (1988)" proposed a coarse grained representation, which,
does not retain the full information of the secondary structure. He
represents the different structure elements by single matching brackets
and labels them as 'H' (hairpin loop), 'I' (interior loop), 'B'
(bulge), 'M' (multi-loop), and 'S' (stack). We extend his alphabet by an
extra letter for external elements 'E'. Again these identifiers may be
followed by a weight corresponding to the number of unpaired bases or
base pairs in the structure element. All tree representations (except
for the dot-bracket form) can be encapsulated into a virtual root
(labeled 'R'), see the example below.
The following example illustrates the different linear tree representations
used by the package. All lines show the same secondary structure.
\verbatim
a) .((((..(((...)))..((..)))).)).
(U)(((((U)(U)((((U)(U)(U)P)P)P)(U)(U)(((U)(U)P)P)P)P)(U)P)P)(U)
b) (U)(((U2)((U3)P3)(U2)((U2)P2)P2)(U)P2)(U)
c) (((H)(H)M)B)
((((((H)S)((H)S)M)S)B)S)
(((((((H)S)((H)S)M)S)B)S)E)
d) ((((((((H3)S3)((H2)S2)M4)S2)B1)S2)E2)R)
\endverbatim
Above: Tree representations of secondary structures. a) Full structure:
the first line shows the more convenient condensed notation which is
used by our programs; the second line shows the rather clumsy expanded
notation for completeness, b) HIT structure, c) different versions of
coarse grained structures: the second line is exactly Shapiro's
representation, the first line is obtained by neglecting the stems.
Since each loop is closed by a unique stem, these two lines are
equivalent. The third line is an extension taking into account also the
external digits. d) weighted coarse structure, this time including the
virtual root.
For the output of aligned structures from string editing, different
representations are needed, where we put the label on both sides.
The above examples for tree representations would then look like:
\verbatim
a) (UU)(P(P(P(P(UU)(UU)(P(P(P(UU)(UU)(UU)P)P)P)(UU)(UU)(P(P(UU)(U...
b) (UU)(P2(P2(U2U2)(P2(U3U3)P3)(U2U2)(P2(U2U2)P2)P2)(UU)P2)(UU)
c) (B(M(HH)(HH)M)B)
(S(B(S(M(S(HH)S)(S(HH)S)M)S)B)S)
(E(S(B(S(M(S(HH)S)(S(HH)S)M)S)B)S)E)
d) (R(E2(S2(B1(S2(M4(S3(H3)S3)((H2)S2)M4)S2)B1)S2)E2)R)
\endverbatim
Aligned structures additionally contain the gap character '_'.
<h2>Parsing and Coarse Graining of Structures</h2>
Several functions are provided for parsing structures and converting to
different representations.
\verbatim
char *expand_Full(const char *structure)
\endverbatim
\copybrief expand_Full()
\verbatim
char *b2HIT (const char *structure)
\endverbatim
\copybrief b2HIT()
\verbatim
char *b2C (const char *structure)
\endverbatim
\copybrief b2C()
\verbatim
char *b2Shapiro (const char *structure)
\endverbatim
\copybrief b2Shapiro()
\verbatim
char *expand_Shapiro (const char *coarse);
\endverbatim
\copybrief expand_Shapiro()
\verbatim
char *add_root (const char *structure)
\endverbatim
\copybrief add_root()
\verbatim
char *unexpand_Full (const char *ffull)
\endverbatim
\copybrief unexpand_Full()
\verbatim
char *unweight (const char *wcoarse)
\endverbatim
\copybrief unweight()
\verbatim
void unexpand_aligned_F (char *align[2])
\endverbatim
\copybrief unexpand_aligned_F()
\verbatim
void parse_structure (const char *structure)
\endverbatim
\copybrief parse_structure()
\see RNAstruct.h for prototypes and more detailed description
<h2>Distance Measures</h2>
A simple measure of dissimilarity between secondary structures of equal
length is the base pair distance, given by the number of pairs present in
only one of the two structures being compared. I.e. the number of base
pairs that have to be opened or closed to transform one structure into the
other. It is therefore particularly useful for comparing structures on the
same sequence. It is implemented by
\verbatim
int bp_distance(const char *str1,
const char *str2)
\endverbatim
\copybrief bp_distance()
For other cases a distance measure that allows for gaps is preferable.
We can define distances between structures as edit distances between
trees or their string representations. In the case of string distances
this is the same as "sequence alignment". Given a set of edit operations
and edit costs, the edit distance is given by the minimum sum of the
costs along an edit path converting one object into the other. Edit
distances like these always define a metric. The edit operations used by us
are insertion, deletion and replacement of nodes.
String editing does not pay attention to the matching of brackets, while
in tree editing matching brackets represent a single node of the tree.
Tree editing is therefore usually preferable, although somewhat
slower. String edit distances are always smaller or equal to tree edit
distances.
The different level of detail in the structure representations defined
above naturally leads to different measures of distance. For full
structures we use a cost of 1 for deletion or insertion of an unpaired
base and 2 for a base pair. Replacing an unpaired base for a pair incurs
a cost of 1.
Two cost matrices are provided for coarse grained structures:
\verbatim
/* Null, H, B, I, M, S, E */
{ 0, 2, 2, 2, 2, 1, 1}, /* Null replaced */
{ 2, 0, 2, 2, 2, INF, INF}, /* H replaced */
{ 2, 2, 0, 1, 2, INF, INF}, /* B replaced */
{ 2, 2, 1, 0, 2, INF, INF}, /* I replaced */
{ 2, 2, 2, 2, 0, INF, INF}, /* M replaced */
{ 1, INF, INF, INF, INF, 0, INF}, /* S replaced */
{ 1, INF, INF, INF, INF, INF, 0}, /* E replaced */
/* Null, H, B, I, M, S, E */
{ 0, 100, 5, 5, 75, 5, 5}, /* Null replaced */
{ 100, 0, 8, 8, 8, INF, INF}, /* H replaced */
{ 5, 8, 0, 3, 8, INF, INF}, /* B replaced */
{ 5, 8, 3, 0, 8, INF, INF}, /* I replaced */
{ 75, 8, 8, 8, 0, INF, INF}, /* M replaced */
{ 5, INF, INF, INF, INF, 0, INF}, /* S replaced */
{ 5, INF, INF, INF, INF, INF, 0}, /* E replaced */
\endverbatim
The lower matrix uses the costs given in \ref shapiro_90 "Shapiro (1990)".
All distance functions use the following global variables:
\verbatim
int cost_matrix;
\endverbatim
\copybrief cost_matrix
\verbatim
int edit_backtrack;
\endverbatim
\copybrief edit_backtrack
\verbatim
char *aligned_line[4];
\endverbatim
\copybrief aligned_line
\see utils.h, dist_vars.h and stringdist.h for more details
<h3>Functions for Tree Edit Distances</h3>
\verbatim
Tree *make_tree (char *struc)
\endverbatim
\copybrief make_tree()
\verbatim
float tree_edit_distance (Tree *T1,
Tree *T2)
\endverbatim
\copybrief tree_edit_distance()
\verbatim
void free_tree(Tree *t)
\endverbatim
\copybrief free_tree()
\see dist_vars.h and treedist.h for prototypes and more detailed descriptions
<h3>Functions for String Alignment</h3>
\verbatim
swString *Make_swString (char *string)
\endverbatim
\copybrief Make_swString()
\verbatim
float string_edit_distance (swString *T1,
swString *T2)
\endverbatim
\copybrief string_edit_distance()
\see dist_vars.h and stringdist.h for prototypes and more detailed descriptions
<h3>Functions for Comparison of Base Pair Probabilities</h3>
For comparison of base pair probability matrices, the matrices are first
condensed into probability profiles which are the compared by alignment.
\verbatim
float *Make_bp_profile_bppm ( double *bppm,
int length)
\endverbatim
\copybrief Make_bp_profile_bppm()
\verbatim
float profile_edit_distance ( const float *T1,
const float *T2)
\endverbatim
\copybrief profile_edit_distance()
\see ProfileDist.h for prototypes and more details of the above functions
\ref mp_utils "Next Page: Utilities"
\page mp_utils Utilities - Odds and Ends
\anchor toc
<h3>Table of Contents</h3>
<hr>
\li \ref utils_ss
\li \ref utils_dot
\li \ref utils_aln
\li \ref utils_seq
\li \ref utils_struc
\li \ref utils_misc
<hr>
\section utils_ss Producing secondary structure graphs
\verbatim
int PS_rna_plot ( char *string,
char *structure,
char *file)
\endverbatim
\copybrief PS_rna_plot()
\verbatim
int PS_rna_plot_a (
char *string,
char *structure,
char *file,
char *pre,
char *post)
\endverbatim
\copybrief PS_rna_plot_a()
\verbatim
int gmlRNA (char *string,
char *structure,
char *ssfile,
char option)
\endverbatim
\copybrief gmlRNA()
\verbatim
int ssv_rna_plot (char *string,
char *structure,
char *ssfile)
\endverbatim
\copybrief ssv_rna_plot()
\verbatim
int svg_rna_plot (char *string,
char *structure,
char *ssfile)
\endverbatim
\copybrief svg_rna_plot()
\verbatim
int xrna_plot ( char *string,
char *structure,
char *ssfile)
\endverbatim
\copybrief xrna_plot()
\verbatim
int rna_plot_type
\endverbatim
\copybrief rna_plot_type
Two low-level functions provide direct access to the graph lauyouting
algorithms:
\verbatim
int simple_xy_coordinates ( short *pair_table,
float *X,
float *Y)
\endverbatim
\copybrief simple_xy_coordinates()
\verbatim
int naview_xy_coordinates ( short *pair_table,
float *X,
float *Y)
\endverbatim
\copybrief naview_xy_coordinates()
\see PS_dot.h and naview.h for more detailed descriptions.
\htmlonly
<hr>
<a href="#toc">Table of Contents</a>
<hr>
\endhtmlonly
\section utils_dot Producing (colored) dot plots for base pair probabilities
\verbatim
int PS_color_dot_plot ( char *string,
cpair *pi,
char *filename)
\endverbatim
\copybrief PS_color_dot_plot()
\verbatim
int PS_color_dot_plot_turn (char *seq,
cpair *pi,
char *filename,
int winSize)
\endverbatim
\copybrief PS_color_dot_plot_turn()
\verbatim
int PS_dot_plot_list (char *seq,
char *filename,
plist *pl,
plist *mf,
char *comment)
\endverbatim
\copybrief PS_dot_plot_list()
\verbatim