Various updates and bugfixes.

ninjanerdbgm · Aug 21, 2016 · 8286819 · 8286819
1 parent 2e566a5
commit 8286819
Show file tree

Hide file tree

Showing 12 changed files with 830 additions and 336 deletions.
diff --git a/chatterbotapi.py b/chatterbotapi.py
@@ -1,7 +1,19 @@
+import re
+import sys
 import hashlib
-import urllib
-import urllib2
-from cookielib import CookieJar
+
+if sys.version_info >= (3, 0):
+    from urllib.request import build_opener, HTTPCookieProcessor, urlopen
+    from urllib.parse import urlencode
+    import http.cookiejar as cookielib
+
+else:
+    from urllib import urlencode, urlopen
+    from urllib2 import build_opener, HTTPCookieProcessor
+    import cookielib
+
+from collections import OrderedDict
+
 import uuid
 import xml.dom.minidom
 
@@ -37,7 +49,7 @@ class ChatterBotFactory:
 
     def create(self, type, arg = None):
         if type == ChatterBotType.CLEVERBOT:
-            return _Cleverbot('http://www.cleverbot.com', 'http://www.cleverbot.com/webservicemin', 35)
+            return _Cleverbot('http://www.cleverbot.com', 'http://www.cleverbot.com/webservicemin?uc=165', 35)
         elif type == ChatterBotType.JABBERWACKY:
             return _Cleverbot('http://jabberwacky.com', 'http://jabberwacky.com/webservicemin', 29)
         elif type == ChatterBotType.PANDORABOTS:
@@ -83,35 +95,27 @@ class _CleverbotSession(ChatterBotSession):
 
     def __init__(self, bot):
         self.bot = bot
-        self.vars = {}
-        self.vars['start'] = 'y'
-        self.vars['icognoid'] = 'wsf'
-        self.vars['fno'] = '0'
-        self.vars['sub'] = 'Say'
+        self.vars = OrderedDict()
+        #self.vars['start'] = 'y'
+        self.vars['stimulus'] = ''
         self.vars['islearning'] = '1'
-        self.vars['cleanslate'] = 'false'
-	self.cookieJar = CookieJar()
-	try:
-	        self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookieJar))
-	        self.opener.open(self.bot.baseUrl)
-	except:
-		print "Error with cleverbot."
-		pass
+        self.vars['icognoid'] = 'wsf'
+        #self.vars['fno'] = '0'
+        #self.vars['sub'] = 'Say'
+        #self.vars['cleanslate'] = 'false'
+        self.cookieJar = cookielib.CookieJar()
+        self.opener = build_opener(HTTPCookieProcessor(self.cookieJar))
+        self.opener.open(self.bot.baseUrl)
 
     def think_thought(self, thought):
         self.vars['stimulus'] = thought.text
-        data = urllib.urlencode(self.vars)
+        data = urlencode(self.vars)
         data_to_digest = data[9:self.bot.endIndex]
-        data_digest = hashlib.md5(data_to_digest).hexdigest()
+        data_digest = hashlib.md5(data_to_digest.encode('utf-8')).hexdigest()
         data = data + '&icognocheck=' + data_digest
-	#-- bgm's cookie mod
-	#yummy = CookieJar()
-	#cookieSesh = urllib2.build_opener(urllib2.HTTPCookieProcessor(yummy))
-	#cookieSesh.open('http://www.cleverbot.com')
-	#- end mod
-        url_response = self.opener.open(self.bot.serviceUrl, data)
-        response = url_response.read()
-        response_values = response.split('\r')
+        url_response = self.opener.open(self.bot.serviceUrl, data.encode('utf-8'))
+        response = str(url_response.read())
+        response_values = re.split(r'\\r|\r', response)
         #self.vars['??'] = _utils_string_at_index(response_values, 0)
         self.vars['sessionid'] = _utils_string_at_index(response_values, 1)
         self.vars['logurl'] = _utils_string_at_index(response_values, 2)
@@ -124,20 +128,20 @@ def think_thought(self, thought):
         self.vars['vText2'] = _utils_string_at_index(response_values, 9)
         self.vars['prevref'] = _utils_string_at_index(response_values, 10)
         #self.vars['??'] = _utils_string_at_index(response_values, 11)
-        self.vars['emotionalhistory'] = _utils_string_at_index(response_values, 12)
-        self.vars['ttsLocMP3'] = _utils_string_at_index(response_values, 13)
-        self.vars['ttsLocTXT'] = _utils_string_at_index(response_values, 14)
-        self.vars['ttsLocTXT3'] = _utils_string_at_index(response_values, 15)
-        self.vars['ttsText'] = _utils_string_at_index(response_values, 16)
-        self.vars['lineRef'] = _utils_string_at_index(response_values, 17)
-        self.vars['lineURL'] = _utils_string_at_index(response_values, 18)
-        self.vars['linePOST'] = _utils_string_at_index(response_values, 19)
-        self.vars['lineChoices'] = _utils_string_at_index(response_values, 20)
-        self.vars['lineChoicesAbbrev'] = _utils_string_at_index(response_values, 21)
-        self.vars['typingData'] = _utils_string_at_index(response_values, 22)
-        self.vars['divert'] = _utils_string_at_index(response_values, 23)
+#        self.vars['emotionalhistory'] = _utils_string_at_index(response_values, 12)
+#        self.vars['ttsLocMP3'] = _utils_string_at_index(response_values, 13)
+#        self.vars['ttsLocTXT'] = _utils_string_at_index(response_values, 14)
+#        self.vars['ttsLocTXT3'] = _utils_string_at_index(response_values, 15)
+#        self.vars['ttsText'] = _utils_string_at_index(response_values, 16)
+#        self.vars['lineRef'] = _utils_string_at_index(response_values, 17)
+#        self.vars['lineURL'] = _utils_string_at_index(response_values, 18)
+#        self.vars['linePOST'] = _utils_string_at_index(response_values, 19)
+#        self.vars['lineChoices'] = _utils_string_at_index(response_values, 20)
+#        self.vars['lineChoicesAbbrev'] = _utils_string_at_index(response_values, 21)
+#        self.vars['typingData'] = _utils_string_at_index(response_values, 22)
+#        self.vars['divert'] = _utils_string_at_index(response_values, 23)
         response_thought = ChatterBotThought()
-        response_thought.text = _utils_string_at_index(response_values, 16)
+        response_thought.text = _utils_string_at_index(response_values, 0)
         return response_thought
 
 #################################################
@@ -161,8 +165,8 @@ def __init__(self, bot):
 
     def think_thought(self, thought):
         self.vars['input'] = thought.text
-        data = urllib.urlencode(self.vars)
-        url_response = urllib2.urlopen('http://www.pandorabots.com/pandora/talk-xml', data)
+        data = urlencode(self.vars)
+        url_response = urlopen('http://www.pandorabots.com/pandora/talk-xml', data)
         response = url_response.read()
         response_dom = xml.dom.minidom.parseString(response)
         response_thought = ChatterBotThought()

diff --git a/googlebot.py b/googlebot.py
@@ -0,0 +1,84 @@
+#!usr/bin/env python
+
+import google
+import urllib2
+import random
+from operator import itemgetter
+from goose import Goose
+
+def retFirstResult(search):
+	result = {}
+	res = google.search(search,stop=0)
+	for i in res:
+		o = urllib2.build_opener(urllib2.HTTPCookieProcessor())
+		try:
+			r = o.open(i)
+		except:
+			return "~*403"
+		html = r.read()
+		g = Goose()
+		a = g.extract(raw_html=html)
+		result["title"] = a.title
+		result["url"] = i
+		result["blob"] = getBlob(search,a.cleaned_text)
+		return result
+
+def retRandomResult(search):
+	result = {}
+	res = google.search(search)
+	for c,i in enumerate(res):
+		if c > 15: break
+		if c % 6 == random.randint(0,5): 
+	                g = Goose()
+	                a = g.extract(raw_html=google.get_page(i))
+			result["resNum"] = c
+	                result["title"] = a.title
+	                result["url"] = i
+	                result["blob"] = getBlob(search,a.cleaned_text)
+		else: 
+			continue
+	if len(result) == 0:
+		for i in res:
+	                g = Goose()
+	                a = g.extract(raw_html=google.get_page(i))
+	                result["title"] = a.title
+	                result["url"] = i
+	                result["blob"] = getBlob(search,a.cleaned_text)
+	                return result
+        return result
+
+def retResults(search):
+	result = {}
+	res = google.search(search,stop=2)
+	for i,v in enumerate(res):
+		if len(result) > 2: break
+		result[i] = {}
+                o = urllib2.build_opener(urllib2.HTTPCookieProcessor())
+                try:
+                        r = o.open(v)
+                except:
+                        continue
+                html = r.read()
+                g = Goose()
+                a = g.extract(raw_html=html)
+                result[i]["title"] = a.title
+                result[i]["url"] = v
+        return result
+
+
+def getBlob(search, text):
+	matches = {}
+	text = text.replace("\n"," ")
+	text = text.split(" ")
+	try:
+		while text.index("") >= 0:
+			text.remove(text[text.index("")])
+	except ValueError:
+		for i,v in enumerate(search.split()):			
+			matches[i] = [a for a, b in enumerate(text) if (str(b).strip().lower() == str(v).strip().lower() or (a+1 < len(text) and "".join(itemgetter(a,a+1)(text)).strip().lower() == str(v).strip().lower()))]
+		blob = ""
+		for words in matches:
+			for index in  matches[words]:
+				blob += "..." + " ".join(text[index-4:index+15]) + "... | "
+		blob = blob[:len(blob) - 3]
+		return blob