Skip to content

Commit

Permalink
bug fixed in sum_all_alignments function
Browse files Browse the repository at this point in the history
  • Loading branch information
vibansal committed Dec 6, 2018
1 parent fff1f21 commit bf1ac43
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 38 deletions.
20 changes: 10 additions & 10 deletions hairs-src/estimate_hmm_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ int BTI[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};

typedef struct // probabilities are in logspace
typedef struct // probabilities are in log10space
{
int states; // match, insertion,deletion
double** TRS; // transition probabilities between states
Expand All @@ -31,11 +31,11 @@ Align_Params* init_params()
(*AP).TRS = calloc((*AP).states,sizeof(double*)); // match =0, ins =1, del = 2
for (i=0;i<AP->states;i++) AP->TRS[i] = calloc(sizeof(double),AP->states);
// initialize alignment parameters data structure
AP->match = log(0.979); AP->mismatch = log(0.007);
AP->deletion = log(1); AP->insertion =log(1);
AP->TRS[0][0] = log(0.879); AP->TRS[0][1] = log(0.076); AP->TRS[0][2] = log(0.045);
AP->TRS[1][0] = log(0.865); AP->TRS[1][1] = log(0.135);
AP->TRS[2][0] = log(0.730); AP->TRS[2][2] = log(0.27);
AP->match = log10(0.97); AP->mismatch = log10(0.01);
AP->deletion = log10(1); AP->insertion =log10(1);
AP->TRS[0][0] = log10(0.892); AP->TRS[0][1] = log10(0.071); AP->TRS[0][2] = log10(0.037); // match to match, ins, del
AP->TRS[1][0] = log10(0.740); AP->TRS[1][1] = log10(0.26); // insertion
AP->TRS[2][0] = log10(0.88); AP->TRS[2][2] = log10(0.12); // deletion
AP->MEM = calloc(4,sizeof(double*)); for (i=0;i<4;i++) AP->MEM[i] = calloc(4,sizeof(double));
for (i=0;i<4;i++)
{
Expand Down Expand Up @@ -128,8 +128,8 @@ void print_error_params(int* emission_counts,int* trans_counts,int* indel_length
for (b2=0;b2<4;b2++) total += emission_counts[b1*4+b2];
for (b2=0;b2<4;b2++)
{
fprintf(stderr,"%c -> %c %0.4f | ",ITB[b1],ITB[b2],(float)emission_counts[b1*4+b2]/total);
AP->MEM[b1][b2] = log((float)emission_counts[b1*4+b2]/total);
AP->MEM[b1][b2] = log10((float)(emission_counts[b1*4+b2]+1)/total);
fprintf(stderr,"%c -> %c %0.4f %f | ",ITB[b1],ITB[b2],(float)emission_counts[b1*4+b2]/total,AP->MEM[b1][b2]);
}
fprintf(stderr,"\n");
}
Expand All @@ -139,8 +139,8 @@ void print_error_params(int* emission_counts,int* trans_counts,int* indel_length
for (b2=0;b2<3;b2++) total += trans_counts[b1*3+b2];
for (b2=0;b2<3;b2++)
{
fprintf(stderr,"%c -> %c %0.4f | ",state[b1],state[b2],(float)trans_counts[b1*3+b2]/total);
AP->TRS[b1][b2] = log((float)trans_counts[b1*3+b2]/total);
AP->TRS[b1][b2] = log10((float)(trans_counts[b1*3+b2]+1)/total);
fprintf(stderr,"%c -> %c %0.4f %f | ",state[b1],state[b2],(float)trans_counts[b1*3+b2]/total,AP->TRS[b1][b2]);
}
fprintf(stderr,"\n");
}
Expand Down
54 changes: 32 additions & 22 deletions hairs-src/realign_pairHMM.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@
int ALLOW_END_GAPS = 0; // for anchored alignment...
int BAND_WIDTH = 20; // default

#define logsum(a, b) (((a) > (b)) ? ((a) + log(1.0 + exp(b-a))) : ((b) + log(1.0 + exp( (a) - (b)))))
//#define logsum(a, b) (((a) > (b)) ? ((a) + log(1.0 + exp(b-a))) : ((b) + log(1.0 + exp( (a) - (b)))))
#define logsum(a, b) (((a) > (b)) ? ((a) + log10(1.0 + pow(10.0, (b) - (a)))) : ((b) + log10(1.0 + pow(10.0, (a) - (b)))))

double logsum1(double a, double b)
{
if (a > b) return a + log(1 + exp(b-a));
else return b + log(1+exp(a-b));
if (a > b) return a + log10(1.0 + pow(10,b-a));
else return b + log(1.0+pow(10,a-b));
}

// do we need anchors, just require the read-seq to be aligned end-2-end while allowing end-gaps for reference haplotype
Expand Down Expand Up @@ -74,17 +75,18 @@ double sum_all_alignments(char* v, char* w,Align_Params* params, int min_band_wi

for (j=band_start;j<band_end+1;j++)
{
int lower_continue = lower_prev[j] + params->TRS[1][1]; // insertion to insertion
int lower_from_middle = middle_prev[j] + params->TRS[0][1]; // match to insertion
double lower_continue = lower_prev[j] + params->TRS[1][1]; // insertion to insertion
double lower_from_middle = middle_prev[j] + params->TRS[0][1]; // match to insertion
lower_curr[j] = params->insertion + logsum(lower_continue,lower_from_middle);

int upper_continue = upper_curr[j-1] + params->TRS[2][2];
int upper_from_middle = middle_curr[j-1] + params->TRS[0][2];
double upper_continue = upper_curr[j-1] + params->TRS[2][2];
double upper_from_middle = middle_curr[j-1] + params->TRS[0][2];
upper_curr[j] = params->deletion + logsum(upper_continue,upper_from_middle);

int middle_from_lower = lower_prev[j-1] + params->TRS[1][0];
int middle_continue = middle_prev[j-1] + params->TRS[0][0];
int middle_from_upper = upper_prev[j-1] + params->TRS[2][0];
double middle_from_lower = lower_prev[j-1] + params->TRS[1][0];
double middle_continue = middle_prev[j-1] + params->TRS[0][0];
double middle_from_upper = upper_prev[j-1] + params->TRS[2][0];

double s = logsum(middle_from_lower,middle_continue);
double match_emission = params->match;
if (v[i-1] != w[j-1]) match_emission = params->mismatch;
Expand All @@ -106,7 +108,8 @@ double sum_all_alignments(char* v, char* w,Align_Params* params, int min_band_wi
double ll = middle_prev[lw];
free(lower_prev); free(middle_prev); free(upper_prev);
free(lower_curr); free(middle_curr); free(upper_curr);
return ll/log(10);
//fprintf(stderr,"v %s w %s %f \n",v,w,ll/log(10));
return ll;
}


Expand All @@ -115,12 +118,21 @@ void test_realignment()
Align_Params AP;
AP.states = 3;
AP.TRS = calloc(sizeof(double*),AP.states); // match =0, ins =1, del = 2
int i=0;
int i=0,j=0;
for (i=0;i<AP.states;i++) AP.TRS[i] = calloc(sizeof(double),AP.states);
AP.match = log(0.979); AP.mismatch = log(0.007); AP.deletion = log(1); AP.insertion =log(1);
AP.TRS[0][0] = log(0.879); AP.TRS[0][1] = log(0.076); AP.TRS[0][2] = log(0.045);
AP.TRS[1][0] = log(0.865); AP.TRS[1][1] = log(0.135);
AP.TRS[2][0] = log(0.730); AP.TRS[2][2] = log(0.27);
AP.match = log10(0.979); AP.mismatch = log10(0.007); AP.deletion = log10(1); AP.insertion =log10(1);
AP.TRS[0][0] = log10(0.879); AP.TRS[0][1] = log10(0.076); AP.TRS[0][2] = log10(0.045);
AP.TRS[1][0] = log10(0.865); AP.TRS[1][1] = log10(0.135);
AP.TRS[2][0] = log10(0.730); AP.TRS[2][2] = log10(0.27);
AP.MEM = calloc(4,sizeof(double*)); for (i=0;i<4;i++) AP.MEM[i] = calloc(4,sizeof(double));
for (i=0;i<4;i++)
{
for (j=0;j<4;j++)
{
if (i==j) AP.MEM[i][j] = AP.match; else AP.MEM[i][j] = AP.mismatch;
}
}


char* ref = malloc(1024); char* alt = malloc(1024); char* read = malloc(1024);
strcpy(ref,"GCTGGTGTAATGCAATG"); strcpy(alt,"GCTGGTGCAATGCAATG"); strcpy(read,"GCTGGTTAATGCAATG");
Expand All @@ -132,20 +144,18 @@ void test_realignment()
double score1 = sum_all_alignments(alt,read,&AP,BAND_WIDTH);
double scoreS = logsum(score0,score1);
double phred;
if (score0 > score1) phred = -10.0*(score1-scoreS)/log(10);
else phred = -10.0*(score0-scoreS)/log(10);
if (score0 > score1) phred = -10.0*(score1-scoreS);
else phred = -10.0*(score0-scoreS);
fprintf(stdout,"match %f %f %f \n",AP.TRS[0][0],AP.TRS[0][1],AP.TRS[0][2]);
fprintf(stdout,"ins %f %f del %f %f\n",AP.TRS[1][0],AP.TRS[1][1],AP.TRS[2][0],AP.TRS[2][2]);
fprintf(stdout,"code for pair HMM realignment of two sequences %f %f %f\n",log10(exp(score0)),log10(exp(score1)),phred);
fprintf(stdout,"code for pair HMM realignment of two sequences %f %f %f\n",score0,score1,phred);
}



/*
int main(int argc, char** argv)
{
test_realignment();
return 0;
}
*/

*/
8 changes: 2 additions & 6 deletions hairs-src/realignbamread.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ int MAX_SNPs_SHORT_HAP = 10; // max number of SNVs in a short haplotype
// given a=log10(x) and b=log10(y), returns log10(x-y)
#define subtractlogs(a, b) (((a) > (b)) ? ((a) + log10(1.0 - pow(10, (b) - (a)))) : ((b) + log10(1.0 - pow(10.0, (a) - (b)))))

// credit to Jeff Mercado at stack overflow for the original version of this function found here: https://stackoverflow.com/questions/5370753/using-stdlibs-qsort-to-sort-an-array-of-strings
int compare_strings(const void* a, const void* b)
{
const char *ia = *(const char **)a; //const char *ia = (const char *)a;
Expand All @@ -29,7 +28,6 @@ int compare_strings(const void* a, const void* b)
return strcmp(ia, ib);
}

// full credit for this function goes to theJPster at stackoverflow
// https://stackoverflow.com/questions/2509679/how-to-generate-a-random-number-from-within-a-range
unsigned int rand_interval(unsigned int min, unsigned int max){
int r;
Expand Down Expand Up @@ -217,7 +215,7 @@ int realign_HAPs(struct alignedread* read, REFLIST* reflist, int positions[], VA
if (SUM_ALL_ALIGN ==0) altscore = nw(althap,subread,0);
else altscore = sum_all_alignments(althap,subread,AP,20);

if (VERBOSE) fprintf(stdout,"score: %f\n",altscore);
//if (VERBOSE==0) fprintf(stdout,"h %d score: %f \n%s\n%s\n",h,altscore,althap,subread);

// for an index s in the short haplotype,
// maintain the log sum of scores that have a variant at s
Expand Down Expand Up @@ -246,8 +244,7 @@ int realign_HAPs(struct alignedread* read, REFLIST* reflist, int positions[], VA
n_max_haps++;
}

// add the reference score and alternate score to the total
//total_score = addlogs(total_score, refscore);
// add the alternate score to the total
total_score = addlogs(total_score, altscore);

//free(althap);
Expand All @@ -269,7 +266,6 @@ int realign_HAPs(struct alignedread* read, REFLIST* reflist, int positions[], VA
if (varlist[ss].type != 0 && !PARSEINDELS){
continue;
}

fragment->alist[fragment->variants].varid = ss;

if (max_hap & (int)(pow(2,s))){
Expand Down

0 comments on commit bf1ac43

Please sign in to comment.