Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: julosaure/pyLatticeAligner
base: b3092ed728
...
head fork: julosaure/pyLatticeAligner
compare: bdd4ceb4a1
  • 4 commits
  • 4 files changed
  • 0 commit comments
  • 1 contributor
Showing with 21 additions and 63 deletions.
  1. +2 −3 batcher.py
  2. +5 −10 editDistance.py
  3. +0 −8 lalign.py
  4. +14 −42 multiAligner.py
View
5 batcher.py
@@ -8,8 +8,7 @@
def main(directory, refFile, opts=None):
begin = datetime.datetime.now()
- print "End after "+ str(datetime.datetime.now()-begin)
-
+
outFile = os.path.join(directory,refFile[:-4] + ".aligned")
out = open(outFile, "w")
@@ -50,6 +49,6 @@ def main(directory, refFile, opts=None):
parser.add_argument("-opts", nargs=argparse.REMAINDER, action="store", help="Options to pass to the multialigner")
args = parser.parse_args()
- print args
+ #print args
main(args.dir[0], args.refName[0], args.opts)
View
15 editDistance.py
@@ -4,7 +4,8 @@
from alignment import *
class SimpleEditDistance:
-
+ """ Standard Edit Distance.
+ """
def ins(self):
return 1
@@ -26,7 +27,9 @@ def match(self, item1, item2):
return score
class PosEditDistance:
-
+ """ POS enhanced edit distance: aligns preferentially tokens
+ that share a same POS.
+ """
def ins(self):
return 2
@@ -106,12 +109,4 @@ def match(self, item1, item2):
score = scoreTagMatch
break
- #elif tok1.tag == tok2.tag:
- #assert tok1.tag == tok2.tag, tok1.pp()+ " " +tok2.pp()
- # the POS match but not the tokens
- # score = 1
- #else:
- # substitution
- # score = 2
-
return score
View
8 lalign.py
@@ -40,13 +40,9 @@ def __str__(self):
class LNumSentence(list):
def __init__(self, lSentence_):
lSentence = copy.copy(lSentence_)
- #lPos = [Pos(i) for i in xrange(len(lSentence))]
- #l = zip(lPos, lSentence)
- #self.extend(l)
i = 0
for sent in lSentence:
p = AbsSent([AbsPos(i), sent])
- #print p
self.append(p)
i += 1
@@ -60,10 +56,6 @@ def popBySentenceNum__(self, num_):
break
pos += 1
-#class LAlign(list):
-# def __setitem__(self, pos, item):
-# assert isinstance(item, Alignment) or isinstance(item, PosSent), str(type(item))
-# self[pos] = item
def getSentOrAlignAtPos(self, pos):
item = self[pos]
View
56 multiAligner.py
@@ -23,7 +23,7 @@ def align(self):
lAlign = copy.copy(lNumSentence)
while len(lAlign) > 1:
- distMat = self.computeDistanceMatrix2(lAlign)
+ distMat = self.computeDistanceMatrix(lAlign)
print distMat
i1, i2 = self.pickItemsToAlign(distMat, lAlign)
a2 = lAlign.pop(i2)
@@ -38,6 +38,8 @@ def align(self):
return align, alignstr
def pickItemsToAlign(self, distMat, lAlign):
+ """ Finds a pair of Sentence and/or Alignments whose distance is minimal.
+ """
minVal = 999
minI = 0
minJ = 0
@@ -49,7 +51,9 @@ def pickItemsToAlign(self, distMat, lAlign):
minJ = j
return minI, minJ
- def computeDistanceMatrix2(self, lAlign):
+ def computeDistanceMatrix(self, lAlign):
+ """ Computes the distance matrix between all Sentence and Alignments.
+ """
nbSentence = len(lAlign)
distMat = numpy.zeros((nbSentence, nbSentence), int)
@@ -63,35 +67,9 @@ def computeDistanceMatrix2(self, lAlign):
return distMat
- def pickSentencePair(self, distMat, sentencesToAlign):
- """ Pick the sentence pair with minimal edit distance in distMat.
- """
- minVal = 999
- minI = 0
- minJ = 0
- for i in xrange(len(sentencesToAlign)):
- for j in xrange(i+1, len(sentencesToAlign)):
- if distMat[i,j] < minVal:
- minVal = distMat[i,j]
- minI = i
- minJ = j
- return minI, minJ
-
- def pickMinSentence(self, distMat, sentencesToAlign, lAlignedSentences):
- """ Pick the sentence with minimal edit distance to previously aligned sentences.
- """
- minVal = 999
- minJ = 0
- for i in lAlignedSentences:
- for j in xrange(len(sentencesToAlign)):
- if j in lAlignedSentences:
- continue
- if distMat[i,j] < minVal:
- minVal = distMat[i,j]
- minJ = j
- return minJ
-
def alignItems(self, a1, a2, sentencesToAlign):
+ """ Aligns 2 items, either Sentence or Alignment.
+ """
if isinstance(a1, tuple) and isinstance(a2, tuple):
print a1
print a2
@@ -109,6 +87,8 @@ def alignItems(self, a1, a2, sentencesToAlign):
return align
def alignAlignments(self, a1, a2):
+ """Aligns 2 Alignment.
+ """
finalCell = self.computeEditDistance(a1, a2)
#print editMat
@@ -164,6 +144,8 @@ def alignAlignments(self, a1, a2):
def alignSentenceVsAlignment(self, a1, a2):
+ """Aligns an Alignment and a Sentence.
+ """
n2, s2 = a2 #align.lSentence[n2]
#print s2
finalCell = self.computeEditDistance(a1, s2)
@@ -213,6 +195,8 @@ def alignSentenceVsAlignment(self, a1, a2):
def alignSentencePair(self, a1, a2, sentencesToAlign):
+ """ Aligns 2 Sentence.
+ """
n1, s1 = a1 #sentencesToAlign[n1]
n2, s2 = a2 #sentencesToAlign[n2]
finalCell = self.computeEditDistance(s1, s2)
@@ -246,18 +230,6 @@ def alignSentencePair(self, a1, a2, sentencesToAlign):
align.alignedSentences.extend([n1, n2])
return align
- def computeDistanceMatrix(self, sentenceToAlign):
- """ Compute the matrix of edit distance between all pairs of items (Sentence or Alignment) of sentencesToAlign.
- """
- nbSentence = len(sentenceToAlign)
- distMat = numpy.zeros((nbSentence, nbSentence), int)
-
- for i in xrange(len(sentenceToAlign)):
- for j in xrange(i+1, len(sentenceToAlign)):
- editMat, finalCell = self.computeEditDistance(sentenceToAlign[i], sentenceToAlign[j])
- distMat[i,j] = finalCell.val
- return distMat
-
@memo
def computeEditDistance(self, s1, s2):
""" Compute the edit distance betweem to items, either Sentences or Alignments.

No commit comments for this range

Something went wrong with that request. Please try again.