-
Notifications
You must be signed in to change notification settings - Fork 0
/
Align.php
137 lines (133 loc) · 3.75 KB
/
Align.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
<?
class Align extends SeqIO{
//var $seqio; # I recently changed the name for this var w/out testing...
//var $type;
//var $numSequences;
function __construct($alignment){
parent::__construct($alignment);
return;
$alignmentClass=get_class($alignment);
if($alignmentClass=='SeqIO'){
$this->seqio=$alignment;
$this->type='fasta';
$this->numSequences=$alignment->numSequences;
}
elseif(is_string($alignment)){
$alignio=new AlignIO($alignment);
$align=$alignio->nextAlignment();
}
else{
die("Error: invalid argument given for class Align.");
}
}
/**
* trims away sequence information where there are gaps
* in every seq but one
* UNTESTED
*/
function getTrimmed(){
$gap='-';
$seq=$this->seq();
$numBases=strlen($seq[0]->sequence());
$numSequences=count($seq);
$numGapsForTrimming=5; # if this many gaps are found, then trim
for($i=0;$i<$numBases;$i++){
$numGaps=0;
$shouldBeTrimmed=false;
$numTrimmableBases=0; # if this number gets up to $numGapsForTrimming, then that number of bases should be trimmed
for($j=0;$j<$numSequences;$j++){
$base=$seq[$j]->substr($i,1);
if($base==$gap) $numGaps++;
}
if($numGaps>=$numSequences-1){
$shouldBeTrimmed=true;
}
if($shouldBeTrimmed==true){
for($j=0;$j<$numSequences;$j++){
$seq[$j]->removeBase($i);
}
$i--;
$numBases--;
}
}
return $seq;
}
/**
* finds indels and mismatches at loci where all other
* sequences are different than the 0th sequence
*/
function findIndels($backboneId){
$gap='-';
$i=0;
$backboneIndex=-1;
while(false!==($seq=$this->seqio->nextSeq())){
$sequence[$i]=$seq->sequence().$gap;
$seqid[$i]=$seq->id();
if($seqid[$i]==$backboneId) $backboneIndex=$i;
$i++;
}
#exit;
if($backboneIndex<0){
die("Error: backbone sequence not found for $backboneId\n");
}
# start checking the alignment for indels
$numBases=max(array_map('strlen',$sequence)); #max strlen for each seq
$numSequences=$this->numSequences;
$indel=array();
for($pos=0;$pos<$numBases;$pos++){
$backboneBase=$sequence[$backboneIndex]{$pos};
$del=$ins=$mismatch=0;
for($j=0;$j<$numSequences;$j++){
if($j==$backboneIndex) continue;
$seqId=$seqid[$i];
$thisBase=$sequence[$j]{$pos};
if($backboneBase!=$gap){
# insertion count: backbone is a base, others are gaps
if($thisBase==$gap){
$ins++;
}
# mismatch count: all others are different bases than backbone
elseif($backboneBase!=$thisBase){
$mismatch++;
}
}
elseif($backboneBase==$gap){
# deletion count: backbone is gap, others aren't
if($sequence[$j]{$pos}!=$gap){
$del++;
}
}
}
$feature="";
if($del==$numSequences-1){
$feature='deletion';
}
if($ins>$numSequences-2){
$feature='insertion';
}
if($mismatch==$numSequences-1){
$feature='mismatch';
}
$lastFeature=$currentFeature;
$currentFeature=$feature;
# combine ranges
# if the last and current features are the same, extend the end range
if($currentFeature==$lastFeature){
$endRange=$pos;
}
# if they are different, record the start/end and reset the start/end
elseif($lastFeature!=""){
#$endRange++;
$thisRange="$startRange-$endRange";
if($startRange==$endRange) $thisRange=$startRange;
$range[]="$lastFeature: $thisRange";
$startRange=$pos;
}
else{
$startRange=$endRange=$pos;
}
}
return $range;
}
}
?>