Permalink
Browse files

check for null fields in both TermVector and TermVectorCentroid class…

…es to avoid NPEs
  • Loading branch information...
thedatachef committed Apr 27, 2011
1 parent e3fdfb3 commit 15cb007f7213afe8d1b9a004f722006378a2d0d1
Showing with 13 additions and 9 deletions.
  1. +5 −3 src/main/java/varaha/text/TermVector.java
  2. +8 −6 src/main/java/varaha/text/TermVectorCentroid.java
@@ -69,13 +69,15 @@ public Double cosineSimilarity(TermVector other) throws ExecException {
@param other: Another term vector
@return the dot product
- */
+ */
public Double dotProduct(TermVector other) throws ExecException {
Double result = 0.0;
for (Tuple x_i : this) {
for (Tuple y_i : other) {
- if (x_i.get(0).toString().equals(y_i.get(0).toString())) {
- result += (Double)x_i.get(1)*(Double)y_i.get(1);
+ if ( !(x_i.isNull(0) || x_i.isNull(1) || y_i.isNull(0) || y_i.isNull(1)) ) {
+ if (x_i.get(0).toString().equals(y_i.get(0).toString())) {
+ result += (Double)x_i.get(1)*(Double)y_i.get(1);
+ }
}
}
}
@@ -65,12 +65,14 @@ public DataBag exec(Tuple input) throws IOException {
for (Tuple t : bagOfVectors) {
DataBag v = (DataBag)t.get(0);
for (Tuple v_i : v) {
- String term = v_i.get(0).toString();
- Object currentValue = termSums.get(term);
- if (currentValue == null) {
- termSums.put(term, v_i.get(1));
- } else {
- termSums.put(term, (Double)v_i.get(1) + (Double)currentValue);
+ if (!(v_i.isNull(0) || v_i.isNull(1))) {
+ String term = v_i.get(0).toString();
+ Object currentValue = termSums.get(term);
+ if (currentValue == null) {
+ termSums.put(term, v_i.get(1));
+ } else {
+ termSums.put(term, (Double)v_i.get(1) + (Double)currentValue);
+ }
}
}
}

0 comments on commit 15cb007

Please sign in to comment.