Merge branch 'master' of github.com:ririw/lawyered

ririw · Oct 5, 2012 · 599a7be · 599a7be
2 parents 34393a7 + cf66e8f
commit 599a7be
Show file tree

Hide file tree

Showing 3 changed files with 152 additions and 2 deletions.
diff --git a/Tentative.txt b/Tentative.txt
@@ -0,0 +1,10 @@
+maybe
+seem
+seems
+perhaps
+suggest
+suggests
+tends
+tend
+appears
+appear
diff --git a/TestWordQuantities.py b/TestWordQuantities.py
@@ -2,6 +2,86 @@
 import WordTypeQuantities
 from Data import Data
 
+class TestModifierCheck(unittest.TestCase):
+    def test_empty(self):
+        v = WordTypeQuantities.ModifierCheck()
+        d = Data("", True)
+        assert(v.build(d) == 0)
+        d = Data("", False)
+        assert(v.build(d) == 0)
+
+    def test_words(self):
+        v = WordTypeQuantities.ModifierCheck()
+        cases = {"""I was nervous because I had never been in a strip club before, it was like entering a building where people danced on poles for money; it was exactly that type of place.""": 4, 
+                 """Perhaps there are aliens on Mars, we will only truly know when we ask.""": 4,
+                 """Things that seem bad always maybe appear to be worse""": 3}
+        for (words, count) in cases.items():
+            d = Data(words, True)
+            assert(v.build(d) == count)
+            d = Data(words, False)
+            assert(v.build(d) == count)
+
+
+class TestWordQuantity(unittest.TestCase):
+    def test_empty(self):
+        v = WordTypeQuantities.WordQuantity()
+        d = Data("", True)
+        assert(v.build(d) == 0)
+        d = Data("", False)
+        assert(v.build(d) == 0)
+
+    def test_words(self):
+        v = WordTypeQuantities.WordQuantity()
+        cases = {"""I was nervous because I had never been in a strip club before, it was like entering a building where people danced on poles for money; it was exactly that type of place.""": 33, 
+                 """Perhaps there are aliens on Mars, we will only truly know when we ask.""": 14,
+                 """Things that seem bad always maybe appear to be worse""": 10}
+        for (words, count) in cases.items():
+            d = Data(words, True)
+            assert(v.build(d) == count)
+            d = Data(words, False)
+            assert(v.build(d) == count)
+
+
+class TestTentativeCheck(unittest.TestCase):
+    def test_empty(self):
+        v = WordTypeQuantities.TentativeCheck()
+        d = Data("", True)
+        assert(v.build(d) == 0)
+        d = Data("", False)
+        assert(v.build(d) == 0)
+
+    def test_with_words(self):
+        v = WordTypeQuantities.TentativeCheck()
+        cases = {"""I was nervous because I had never been in  a strip club before, it was like entering a building where people danced on poles for money; it was exactly that type of place.""": 0, 
+                 """Perhaps there are aliens on Mars, we will only truly know when we ask.""": 1,
+                 """Things that seem bad always maybe appear to be worse""": 3}
+        for (words, count) in cases.items():
+            d = Data(words, True)
+            assert(v.build(d) == count)
+            d = Data(words, False)
+            assert(v.build(d) == count)
+
+
+class TestCausationQuantity(unittest.TestCase):
+    def test_empty(self):
+        v = WordTypeQuantities.CausationCheck()
+        d = Data("", True)
+        assert(v.build(d) == 0)
+        d = Data("", False)
+        assert(v.build(d) == 0)
+
+    def test_causation(self):
+        v = WordTypeQuantities.CausationCheck()
+        cases = {"""I am not sorry because   I was merely repeating.""": 1, 
+                 """There are three effects in effects in effects puppies. """: 3,
+                 """The effect of because me turtle.""": 2}
+        for (words, count) in cases.items():
+            d = Data(words, True)
+            assert(v.build(d) == count)
+            d = Data(words, False)
+            assert(v.build(d) == count)
+
+
 class TestVerbQuantity(unittest.TestCase):
    def test_empty(self):
       v = WordTypeQuantities.VerbQuantity()

diff --git a/WordTypeQuantities.py b/WordTypeQuantities.py
@@ -1,6 +1,7 @@
 from Feature import ContinuousFeature
 from Data import Data
 import nltk
+import string
 
 class SummingScentenceTag(ContinuousFeature):
    def tag(self, data):
@@ -15,16 +16,75 @@ def totalBy(self, itemToCountFn, data):
       tagged = self.tag(data.string)
       for w in tagged:
          if len(w) == 2:
-            stepSum += itemToCountFn(w[1])
+            stepSum += itemToCountFn(w)
       return stepSum
    def build(self, data):
       assert(isinstance(data, Data))
       return self.totalBy(self.sumFn, data)
 
 class VerbQuantity(SummingScentenceTag):
    def sumFn(self, v):
-      if "VB" in v:
+      if "VB" in v[1]:
          return 1
       else:
          return 0
 
+class ModifierCheck(SummingScentenceTag):
+    def sumFn(self,v):
+        tags = {"JJ","JJR","JJS","JJT","NR","RB","RBR","RBT","RN","RP","WRB"}
+        if v[1] in tags:
+            return 1
+        else:
+            return 0
+
+class WordQuantity(SummingScentenceTag):
+    def sumFn(self,v):
+        if v[0] in string.punctuation:
+            return 0    
+        else:
+            return 1
+
+class CausationCheck(SummingScentenceTag):
+    def sumFn(self, v):
+        f = open('Causation.txt')
+        causationWords = f.read().split('\n')
+        if v[0].lower() in causationWords:
+            return 1
+        else:
+            return 0
+
+class TentativeCheck(SummingScentenceTag):
+    def sumFn(self, v):
+        f = open('Tentative.txt')
+        tentativeWords = f.read().split('\n')
+        if v[0].lower() in tentativeWords:
+            return 1
+        else:
+            return 0
+
+class firstPersonSingularPronoun(SummingScentenceTag):
+    def sumFn(self, v):
+        f = open('1stPersonSingularPronouns.txt')
+        singularPronoun = f.read().split('\n')
+        if v[0].lower() in singularPronoun:
+            return 1
+        else:
+            return 0
+
+class firstPersonPluralPronoun(SummingScentenceTag):
+    def sumFn(self, v):
+        f = open('1stPersonPluralPronouns.txt')
+        pluralPronoun = f.read().split('\n')
+        if v[0].lower() in pluralPronoun:
+            return 1
+        else:
+            return 0
+
+class thirdPersonPronoun(SummingScentenceTag):
+    def sumFn(self, v):
+        f = open('3rdPersonPronouns.txt')
+        thirdPronoun = f.read().split('\n')
+        if v[0].lower() in thirdPronoun:
+            return 1
+        else:
+            return 0