Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

We’re showing branches in this repository, but you can also compare across forks.

base fork: julosaure/pyLatticeAligner
base: b3092ed728
...
head fork: julosaure/pyLatticeAligner
compare: bdd4ceb4a1
  • 4 commits
  • 4 files changed
  • 0 commit comments
  • 1 contributor
Showing with 21 additions and 63 deletions.
  1. +2 −3 batcher.py
  2. +5 −10 editDistance.py
  3. +0 −8 lalign.py
  4. +14 −42 multiAligner.py
5 batcher.py
View
@@ -8,8 +8,7 @@
def main(directory, refFile, opts=None):
begin = datetime.datetime.now()
- print "End after "+ str(datetime.datetime.now()-begin)
-
+
outFile = os.path.join(directory,refFile[:-4] + ".aligned")
out = open(outFile, "w")
@@ -50,6 +49,6 @@ def main(directory, refFile, opts=None):
parser.add_argument("-opts", nargs=argparse.REMAINDER, action="store", help="Options to pass to the multialigner")
args = parser.parse_args()
- print args
+ #print args
main(args.dir[0], args.refName[0], args.opts)
15 editDistance.py
View
@@ -4,7 +4,8 @@
from alignment import *
class SimpleEditDistance:
-
+ """ Standard Edit Distance.
+ """
def ins(self):
return 1
@@ -26,7 +27,9 @@ def match(self, item1, item2):
return score
class PosEditDistance:
-
+ """ POS enhanced edit distance: aligns preferentially tokens
+ that share a same POS.
+ """
def ins(self):
return 2
@@ -106,12 +109,4 @@ def match(self, item1, item2):
score = scoreTagMatch
break
- #elif tok1.tag == tok2.tag:
- #assert tok1.tag == tok2.tag, tok1.pp()+ " " +tok2.pp()
- # the POS match but not the tokens
- # score = 1
- #else:
- # substitution
- # score = 2
-
return score
8 lalign.py
View
@@ -40,13 +40,9 @@ def __str__(self):
class LNumSentence(list):
def __init__(self, lSentence_):
lSentence = copy.copy(lSentence_)
- #lPos = [Pos(i) for i in xrange(len(lSentence))]
- #l = zip(lPos, lSentence)
- #self.extend(l)
i = 0
for sent in lSentence:
p = AbsSent([AbsPos(i), sent])
- #print p
self.append(p)
i += 1
@@ -60,10 +56,6 @@ def popBySentenceNum__(self, num_):
break
pos += 1
-#class LAlign(list):
-# def __setitem__(self, pos, item):
-# assert isinstance(item, Alignment) or isinstance(item, PosSent), str(type(item))
-# self[pos] = item
def getSentOrAlignAtPos(self, pos):
item = self[pos]
56 multiAligner.py
View
@@ -23,7 +23,7 @@ def align(self):
lAlign = copy.copy(lNumSentence)
while len(lAlign) > 1:
- distMat = self.computeDistanceMatrix2(lAlign)
+ distMat = self.computeDistanceMatrix(lAlign)
print distMat
i1, i2 = self.pickItemsToAlign(distMat, lAlign)
a2 = lAlign.pop(i2)
@@ -38,6 +38,8 @@ def align(self):
return align, alignstr
def pickItemsToAlign(self, distMat, lAlign):
+ """ Finds a pair of Sentence and/or Alignments whose distance is minimal.
+ """
minVal = 999
minI = 0
minJ = 0
@@ -49,7 +51,9 @@ def pickItemsToAlign(self, distMat, lAlign):
minJ = j
return minI, minJ
- def computeDistanceMatrix2(self, lAlign):
+ def computeDistanceMatrix(self, lAlign):
+ """ Computes the distance matrix between all Sentence and Alignments.
+ """
nbSentence = len(lAlign)
distMat = numpy.zeros((nbSentence, nbSentence), int)
@@ -63,35 +67,9 @@ def computeDistanceMatrix2(self, lAlign):
return distMat
- def pickSentencePair(self, distMat, sentencesToAlign):
- """ Pick the sentence pair with minimal edit distance in distMat.
- """
- minVal = 999
- minI = 0
- minJ = 0
- for i in xrange(len(sentencesToAlign)):
- for j in xrange(i+1, len(sentencesToAlign)):
- if distMat[i,j] < minVal:
- minVal = distMat[i,j]
- minI = i
- minJ = j
- return minI, minJ
-
- def pickMinSentence(self, distMat, sentencesToAlign, lAlignedSentences):
- """ Pick the sentence with minimal edit distance to previously aligned sentences.
- """
- minVal = 999
- minJ = 0
- for i in lAlignedSentences:
- for j in xrange(len(sentencesToAlign)):
- if j in lAlignedSentences:
- continue
- if distMat[i,j] < minVal:
- minVal = distMat[i,j]
- minJ = j
- return minJ
-
def alignItems(self, a1, a2, sentencesToAlign):
+ """ Aligns 2 items, either Sentence or Alignment.
+ """
if isinstance(a1, tuple) and isinstance(a2, tuple):
print a1
print a2
@@ -109,6 +87,8 @@ def alignItems(self, a1, a2, sentencesToAlign):
return align
def alignAlignments(self, a1, a2):
+ """Aligns 2 Alignment.
+ """
finalCell = self.computeEditDistance(a1, a2)
#print editMat
@@ -164,6 +144,8 @@ def alignAlignments(self, a1, a2):
def alignSentenceVsAlignment(self, a1, a2):
+ """Aligns an Alignment and a Sentence.
+ """
n2, s2 = a2 #align.lSentence[n2]
#print s2
finalCell = self.computeEditDistance(a1, s2)
@@ -213,6 +195,8 @@ def alignSentenceVsAlignment(self, a1, a2):
def alignSentencePair(self, a1, a2, sentencesToAlign):
+ """ Aligns 2 Sentence.
+ """
n1, s1 = a1 #sentencesToAlign[n1]
n2, s2 = a2 #sentencesToAlign[n2]
finalCell = self.computeEditDistance(s1, s2)
@@ -246,18 +230,6 @@ def alignSentencePair(self, a1, a2, sentencesToAlign):
align.alignedSentences.extend([n1, n2])
return align
- def computeDistanceMatrix(self, sentenceToAlign):
- """ Compute the matrix of edit distance between all pairs of items (Sentence or Alignment) of sentencesToAlign.
- """
- nbSentence = len(sentenceToAlign)
- distMat = numpy.zeros((nbSentence, nbSentence), int)
-
- for i in xrange(len(sentenceToAlign)):
- for j in xrange(i+1, len(sentenceToAlign)):
- editMat, finalCell = self.computeEditDistance(sentenceToAlign[i], sentenceToAlign[j])
- distMat[i,j] = finalCell.val
- return distMat
-
@memo
def computeEditDistance(self, s1, s2):
""" Compute the edit distance betweem to items, either Sentences or Alignments.

No commit comments for this range

Something went wrong with that request. Please try again.