Permalink
Browse files

changed search tree for contig offsets to uint32_t instead of int

hard-coded b=17 in search functions
  • Loading branch information...
mateidavid committed Nov 3, 2011
1 parent d5c2185 commit 9a687bb044f3c1514d8dcfc46f1a9d01249972ff
Showing with 36 additions and 23 deletions.
  1. +17 −7 common/gen-st.c
  2. +18 −15 common/gen-st.h
  3. +1 −1 gmapper/gmapper.c
View
@@ -1,5 +1,7 @@
#include <stdlib.h>
#include <math.h>
+#include <limits.h>
+#include <string.h>
#include <assert.h>
#include "gen-st.h"
@@ -10,10 +12,10 @@
* with the keys from the given a. The subtree will have the given h.
*/
static void
-gen_st_fill(gen_st * t, int d, int lev_idx, int h, int * a, int n)
+gen_st_fill(gen_st * t, int d, int lev_idx, int h, uint32_t * a, int n)
{
int abs_idx, delta, k, i, j, prev_j, left_child_lev_idx;
- int * node;
+ uint32_t * node;
if (n == 0) return;
@@ -74,16 +76,22 @@ gen_st_fill(gen_st * t, int d, int lev_idx, int h, int * a, int n)
void
-gen_st_init(gen_st * t, int b, int * a, int n)
+gen_st_init(gen_st * t, int b, uint32_t * a, int n)
{
int tmp;
+ uint32_t * a_aux;
assert(t != NULL);
assert(b >= 2);
t->b = b;
- t->n_keys = n;
- t->n_nodes = (n == 0? 0 : (n - 1) / (b - 1) + 1);
+ t->b = GEN_ST_BASE; // hard-coded to be equal to 17 during searching
+ t->n_keys = (n == 0? 0 : ((n - 1) / (t->b - 1) + 1) * (t->b - 1));
+ a_aux = (uint32_t *)malloc(t->n_keys * sizeof(uint32_t));
+ memcpy(a_aux, a, n * sizeof(uint32_t));
+ for (int i = n; i < t->n_keys; i++)
+ a_aux[i] = UINT_MAX;
+ t->n_nodes = t->n_keys / (t->b - 1);
for (t->h = 0, tmp = 1; n > tmp - 1; t->h++, tmp *= t->b);
//t->h = (int)ceil(log(t->n_keys + 1) / log(t->b));
@@ -94,8 +102,10 @@ gen_st_init(gen_st * t, int b, int * a, int n)
t->pow[i] = t->pow[i - 1] * t->b;
// finally, set up a
- t->a = (int *)malloc(t->n_keys * sizeof(int));
- gen_st_fill(t, 0, 0, t->h, a, n);
+ t->a = (uint32_t *)malloc(t->n_keys * sizeof(uint32_t));
+ gen_st_fill(t, 0, 0, t->h, a_aux, t->n_keys);
+
+ free(a_aux);
}
View
@@ -1,11 +1,14 @@
#ifndef __GEN_ST_H
#define __GEN_ST_H
+#include <stdint.h>
#include <assert.h>
+//#define GEN_ST_BASE t->b
+#define GEN_ST_BASE 17
typedef struct {
- int * a;
+ uint32_t * a;
int * pow;
int b;
int h;
@@ -14,12 +17,12 @@ typedef struct {
} gen_st;
-void gen_st_init(gen_st *, int, int *, int);
+void gen_st_init(gen_st *, int, uint32_t *, int);
void gen_st_delete(gen_st *);
static inline int
-gen_st_search_node(int * node, int load, int val)
+gen_st_search_node(uint32_t * node, int load, uint32_t val)
{
assert(node != NULL);
@@ -49,10 +52,10 @@ gen_st_search_node(int * node, int load, int val)
static inline int
-gen_st_search(gen_st * t, int val)
+gen_st_search(gen_st * t, uint32_t val)
{
int node_depth, node_lev_idx, node_abs_idx, nodes_above;
- int * node;
+ uint32_t * node;
int range_start, range_end;
int k, h, idx, delta;
@@ -68,20 +71,20 @@ gen_st_search(gen_st * t, int val)
h = t->h;
while (h > 1) {
- assert(range_end - range_start > t->b - 1);
+ assert(range_end - range_start > GEN_ST_BASE - 1);
assert(t->pow[h - 1] - 1 < range_end - range_start && range_end - range_start <= t->pow[h] - 1);
- idx = gen_st_search_node(node, t->b - 1, val);
+ idx = gen_st_search_node(node, GEN_ST_BASE - 1, val);
idx--;
- k = ((range_end - range_start) - (t->pow[h - 1] - 1)) / ((t->b - 1) * t->pow[h - 2]);
- assert(0 <= k && k <= t->b);
+ k = ((range_end - range_start) - (t->pow[h - 1] - 1)) / ((GEN_ST_BASE - 1) * t->pow[h - 2]);
+ assert(0 <= k && k <= GEN_ST_BASE);
- if (k == t->b) {
+ if (k == GEN_ST_BASE) {
range_start += (idx + 1) * t->pow[h - 1];
range_end = range_start + t->pow[h - 1] - 1;
} else {
- delta = (range_end - range_start) - (t->b - 1) - k * (t->pow[h - 1] - 1) - (t->b - k - 1) * (t->pow[h - 2] - 1);
+ delta = (range_end - range_start) - (GEN_ST_BASE - 1) - k * (t->pow[h - 1] - 1) - (GEN_ST_BASE - k - 1) * (t->pow[h - 2] - 1);
if (idx < k) {
range_start += (idx + 1) * t->pow[h - 1];
if (idx + 1 < k) {
@@ -95,11 +98,11 @@ gen_st_search(gen_st * t, int val)
range_end = range_start + t->pow[h - 2] - 1;
}
}
- node_lev_idx = node_lev_idx * t->b + idx + 1;
+ node_lev_idx = node_lev_idx * GEN_ST_BASE + idx + 1;
nodes_above += t->pow[node_depth];
node_depth++;
node_abs_idx = nodes_above + node_lev_idx;
- node = &t->a[node_abs_idx * (t->b - 1)];
+ node = &t->a[node_abs_idx * (GEN_ST_BASE - 1)];
assert(0 <= node_lev_idx && node_lev_idx < t->pow[node_depth]);
assert(node_depth < t->h);
@@ -112,10 +115,10 @@ gen_st_search(gen_st * t, int val)
assert(h == 0 || node_abs_idx < t->n_nodes);
}
- assert(range_end - range_start <= t->b - 1);
+ assert(range_end - range_start <= GEN_ST_BASE - 1);
if (h == 1) {
- idx = gen_st_search_node(node, range_end - range_start, val);
+ idx = gen_st_search_node(node, GEN_ST_BASE - 1, val);
range_start += idx;
}
View
@@ -2788,7 +2788,7 @@ int main(int argc, char **argv){
load_genome_usecs += (gettimeinusecs() - before);
// initialize general search tree for contig offsets
- gen_st_init(&contig_offsets_gen_st, 17, (int *)contig_offsets, num_contigs);
+ gen_st_init(&contig_offsets_gen_st, 17, contig_offsets, num_contigs);
//
// Automatic genome index trimming

0 comments on commit 9a687bb

Please sign in to comment.