Skip to content

Commit

Permalink
add gene_t
Browse files Browse the repository at this point in the history
  • Loading branch information
r3fang committed Aug 14, 2015
1 parent 12944d7 commit 2b7ba35
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 83 deletions.
Binary file added A431-1-ABGHI_S1_L001_R1_001.fastq.gz
Binary file not shown.
Binary file added A431-1-ABGHI_S1_L001_R2_001.fastq.gz
Binary file not shown.
128 changes: 46 additions & 82 deletions src/predict.c
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,7 @@ static junction_t *junction_score(solution_pair_t *sol, junction_t *junc, double
return junc_res;
}


static int pred_usage(opt_t *opt){
fprintf(stderr, "\n");
fprintf(stderr, "Usage: tfc predict [options] <exon.fa> <R1.fq> <R2.fq>\n\n");
Expand All @@ -848,45 +849,13 @@ static int pred_usage(opt_t *opt){
fprintf(stderr, " R2.fq the other end of pair-end sequencing reads\n");
return 1;
}
static int prob_sort(solution_pair_t *a, solution_pair_t *b){
/* compare a to b (cast a and b appropriately)
* return (int) -1 if (a < b)
* return (int) 0 if (a == b)
* return (int) 1 if (a > b)
*/
return (a->prob - b->prob);
}

static solution_pair_t *solu_uniq(solution_pair_t *sol){
if(sol==NULL) return NULL;
HASH_SORT(sol, prob_sort);
solution_pair_t *s, *t, *cur, *res = NULL;;
int i, j;
bool flag=false;
i=0;
for(s=sol; s!=NULL; s=s->hh.next){
j=0; flag=false;
for(t=sol; t!=NULL; t=t->hh.next){
if(strcmp(s->junc_name, t->junc_name)==0 && strcmp(s->fuse_name, t->fuse_name)==0 && (s->r1->pos==t->r1->pos) && (s->r2->pos==t->r2->pos)){
if(j>i) flag=true;
}
j++;
}
if((cur=find_solution_pair(res, s->idx))==NULL && flag==false){
cur = mycalloc(1, solution_pair_t);
memcpy(cur, s, sizeof(solution_pair_t));
HASH_ADD_STR(res, idx, cur);
}
i++;
}
return res;
}
/*--------------------------------------------------------------------*/
/* main function. */
int predict(int argc, char *argv[]) {
opt_t *opt = opt_init(); // initlize options with default settings
int c, i;
srand48(11);
//srand48(11);
junction_t *junc_ht;
while ((c = getopt(argc, argv, "m:w:k:n:u:o:e:g:s:h:l:x:a:")) >= 0) {
switch (c) {
Expand Down Expand Up @@ -918,64 +887,59 @@ int predict(int argc, char *argv[]) {
if(opt->min_hits < MIN_HITS) die("[%s] -h must be within [%d, +INF)", __func__, MIN_HITS);
if(opt->min_align_score < MIN_ALIGN_SCORE || opt->min_align_score > MAX_ALIGN_SCORE) die("[%s] -a must be within [%d, %d]", __func__, MIN_ALIGN_SCORE, MAX_ALIGN_SCORE);

fprintf(stderr, "================================= TFC =================================\n");
fprintf(stderr, " \n");
fprintf(stderr, " Targeted Fusion Caller \n");
fprintf(stderr, " Version: 08.15.r01 \n");
fprintf(stderr, " Rongxin Fang (r3fang@ucsd.edu) \n");
fprintf(stderr, " \n");
fprintf(stderr, "=======================================================================\n");


fprintf(stderr, "[%s] loading sequences of targeted genes ... \n",__func__);
if((EXON_HT = fasta_read(opt->fa)) == NULL) die("[%s] fail to read %s", __func__, opt->fa);

fprintf(stderr, "[%s] getting genes infomration ... \n",__func__);
if((GENE_HT = fasta_get_info(EXON_HT)) == NULL) die("[%s] fail to gene genes' information", __func__);

fprintf(stderr, "[%s] indexing sequneces by kmer hash table ... \n",__func__);
if((KMER_HT = kmer_index(EXON_HT, opt->k))==NULL) die("[%s] can't index exon sequences", __func__);

fprintf(stderr, "[%s] constructing breakend associated graph ... \n", __func__);
if((BAGR_HT = bag_construct(KMER_HT, EXON_HT, opt->fq1, opt->fq2, opt->min_kmer_match, opt->min_edge_weight, opt->k)) == NULL) return 0;

fprintf(stderr, "[%s] triming graph by removing edges of weight smaller than %d... \n", __func__, opt->min_edge_weight);
if(bag_trim(&BAGR_HT, opt->min_edge_weight)!=0){
fprintf(stderr, "[%s] fail to trim graph \n", __func__);
return -1;
}
if(BAGR_HT == NULL) return 0;

fprintf(stderr, "[%s] identifying junctions for every fusion candiates... \n", __func__);
if(bag_junction_gen(&BAGR_HT, EXON_HT, KMER_HT, opt)!=0){
fprintf(stderr, "[%s] fail to identify junctions\n", __func__);
return -1;
}
if(BAGR_HT == NULL) return 0;

fprintf(stderr, "[%s] constructing transcript for identified junctions ... \n", __func__);
if((bag_transcript_gen(&BAGR_HT, EXON_HT, opt))!=0){
fprintf(stderr, "[%s] fail to construct transcript\n", __func__);
return -1;
}

fprintf(stderr, "[%s] testing junctions ... \n", __func__);
if((test_junction(&SOLU_HT, &BAGR_HT, opt))!=0){
fprintf(stderr, "[%s] fail to rescan reads\n", __func__);
return -1;
}

fprintf(stderr, "[%s] testing fusion ... \n", __func__);
if((test_fusion(&SOLU_HT, &BAGR_HT, opt))!=0){
fprintf(stderr, "[%s] fail to align supportive reads to transcript\n", __func__);
return -1;
}

solution_pair_t *s; for(s=SOLU_HT; s!=NULL; s=s->hh.next){printf("%s\t%s\t%s\t%f\t%f\t%d\t%f\t%d\n", s->idx, s->junc_name, s->fuse_name, s->prob, s->r1->pos, s->r1->prob, s->r2->prob, s->r2->pos);}

//fprintf(stderr, "[%s] indexing sequneces by kmer hash table ... \n",__func__);
//if((KMER_HT = kmer_index(EXON_HT, opt->k))==NULL) die("[%s] can't index exon sequences", __func__);
//
//fprintf(stderr, "[%s] constructing breakend associated graph ... \n", __func__);
//if((BAGR_HT = bag_construct(KMER_HT, EXON_HT, opt->fq1, opt->fq2, opt->min_kmer_match, opt->min_edge_weight, opt->k)) == NULL) return 0;
//
//fprintf(stderr, "[%s] triming graph by removing edges of weight smaller than %d... \n", __func__, opt->min_edge_weight);
//if(bag_trim(&BAGR_HT, opt->min_edge_weight)!=0){
// fprintf(stderr, "[%s] fail to trim graph \n", __func__);
// return -1;
//}
//if(BAGR_HT == NULL) return 0;
//
//fprintf(stderr, "[%s] identifying junctions for every fusion candiates... \n", __func__);
//if(bag_junction_gen(&BAGR_HT, EXON_HT, KMER_HT, opt)!=0){
// fprintf(stderr, "[%s] fail to identify junctions\n", __func__);
// return -1;
//}
//if(BAGR_HT == NULL) return 0;
//
//fprintf(stderr, "[%s] constructing transcript for identified junctions ... \n", __func__);
//if((bag_transcript_gen(&BAGR_HT, EXON_HT, opt))!=0){
// fprintf(stderr, "[%s] fail to construct transcript\n", __func__);
// return -1;
//}
//
//fprintf(stderr, "[%s] testing junctions ... \n", __func__);
//if((test_junction(&SOLU_HT, &BAGR_HT, opt))!=0){
// fprintf(stderr, "[%s] fail to rescan reads\n", __func__);
// return -1;
//}
//
//fprintf(stderr, "[%s] testing fusion ... \n", __func__);
//if((test_fusion(&SOLU_HT, &BAGR_HT, opt))!=0){
// fprintf(stderr, "[%s] fail to align supportive reads to transcript\n", __func__);
// return -1;
//}
//
//solution_pair_t *s; for(s=SOLU_HT; s!=NULL; s=s->hh.next){printf("%s\t%s\t%s\t%f\t%f\n", s->idx, s->junc_name, s->fuse_name, s->r1->prob, s->r2->prob);}
//
fprintf(stderr, "[%s] cleaning up ... \n", __func__);
if(EXON_HT) fasta_destroy(&EXON_HT);
if(KMER_HT) kmer_destroy(&KMER_HT);
if(BAGR_HT) bag_destory(&BAGR_HT);
if(SOLU_HT) solution_pair_destory(&SOLU_HT);
fprintf(stderr, "[%s] congradulations! it succeededs! \n", __func__);
fprintf(stderr, "[%s] congradualtions! it succeeded! \n", __func__);
return 0;
}

5 changes: 4 additions & 1 deletion src/predict.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ static fasta_t *EXON_HT = NULL; // stores sequences in in.fa
static kmer_t *KMER_HT = NULL; // kmer hash table by indexing in.fa
static bag_t *BAGR_HT = NULL; // Breakend Associated Graph (BAG)
static solution_pair_t *SOLU_HT = NULL; // alignment solition of reads against JUN0_HT
static gene_t *GENE_HT = NULL; // alignment solition of reads against JUN0_HT

//opt
typedef struct {
Expand Down Expand Up @@ -102,7 +103,9 @@ static inline gene_t *gene_init(){
instance->transcript = NULL;
return instance;
}

/*
* destory the gene_t
*/
static inline int gene_destory(gene_t **instance){
if(*instance==NULL) return -1;
gene_t *gene_cur, *gene_tmp;
Expand Down

0 comments on commit 2b7ba35

Please sign in to comment.