Merge pull request #22 from Merola/pynik-metacritic-rewrite

Rewrite/repair Metacritic plugin, using the Metacritic autosearch API
serpis · Oct 1, 2015 · 5f8633f · 5f8633f
2 parents 0db8034 + 17a4c00
commit 5f8633f
Show file tree

Hide file tree

Showing 3 changed files with 202 additions and 166 deletions.
diff --git a/plugins/httpget.py b/plugins/httpget.py
@@ -1,142 +1,150 @@
 import sys
 import re
 import urllib
+import urllib2
 from socket import *
 
-def write(s, text):
-	s.send(text)
-	s.send("\r\n")
+USER_AGENT = 'Pynik/0.1'
 
-class http_get_request:
-	def __init__(self, file):
-		self.file = file
-		self.headers = []
+def read_url(url, http_headers={}, http_post_data=None):
+    m = re.match("^(.{3,5}):\/\/([^\/]*)(:?\d*)(\/.*?)?$", url)
+    if m:
+        protocol, address, port, file = m.group(1, 2, 3, 4)
 
-	def add_header(self, name, value):
-		self.headers.append((name, value))
+        if protocol == 'http' and not http_headers and http_post_data is None:
+            return _legacy_http_read(url, protocol, address, port, file)
+        elif protocol in ['http', 'https']:
+            return _normal_http_read(url, http_headers, http_post_data)
+        else:
+            print "Only http(s) is supported at this moment."
+            return None
+    else:
+        print "NOT AN URL: %s" % url
+        return None
+
+def _write(s, text):
+    s.send(text)
+    s.send("\r\n")
 
-	def send(self, s):
-		write(s, "GET %s HTTP/1.0" % self.file)
-		write(s, "\r\n".join(map(lambda x: "%s: %s" % x, self.headers)))
-		write(s, "")
-		write(s, "")
+class _http_get_request:
+    def __init__(self, file):
+        self.file = file
+        self.headers = []
+
+    def add_header(self, name, value):
+        self.headers.append((name, value))
 
-def read_line(s):
-	line = ""
+    def send(self, s):
+        _write(s, "GET %s HTTP/1.0" % self.file)
+        _write(s, "\r\n".join(map(lambda x: "%s: %s" % x, self.headers)))
+        _write(s, "")
+        _write(s, "")
+
+def _read_line(s):
+    line = ""
+
+    while True:
+        line += s.recv(1)
 
-	while True:
-		line += s.recv(1)
-
-		if line and line[-1:] == "\n":
-			line = line[0:-1]
-			if len(line) and line[-1] == "\r":
-				line = line[0:-1]
+        if line and line[-1:] == "\n":
+            line = line[0:-1]
+            if len(line) and line[-1] == "\r":
+                line = line[0:-1]
+
+            return line
+
+def _read_http_headers(s):
+    m = re.match("^(.+?) (.+?) (.+)$", _read_line(s))
+    protocol, response_num, response_string = m.groups()
+    headers = {}
+
+    while True:
+        line = _read_line(s)
+        if len(line) == 0:
+            break
+
+        m = re.match("^(.+?) (.+)$", line)
+        if m:
+            headers[m.group(1)[0:-1]] = m.group(2)
+
+    return (protocol, int(response_num), response_string, headers)
+
+def _read_http_data(s, length):
+    data = ''
+    while not length or len(data) < length:
+        to_receive = 1024
+        if length:
+            to_receive = min(length - len(data), 1024)
+
+        new_data = s.recv(to_receive)
+
+        if new_data:
+            data += new_data
+        else:
+            break
+
+    return data
+
+def _legacy_http_read(url, protocol, address, port, file):
+    if not port:
+        port = 80
+    if not file:
+        file = '/'
+
+    # print "Connecting to %s" % address
+
+    request = _http_get_request(file)
+    request.add_header("User-Agent", USER_AGENT)
+    request.add_header("Accept", "*/*")
+    request.add_header("Host", address)
+
+    s = socket(AF_INET, SOCK_STREAM)
+
+    s.connect((address, port))
+    request.send(s)
+
+    protocol, response_num, response_string, headers = _read_http_headers(s)
+
+    if response_num == 301 or response_num == 302:
+        s.close()
+
+        # Let's do some simple loop detection...
+        if url == headers['Location']:
+            print "Redirect loop discovered at: %s" % headers['Location']
+            return None
+        else:
+            print "Site moved to: %s" % headers['Location']
+            return read_url(headers['Location'])
+    elif response_num == 200:
+        # print "Got response 200. Sweet!"
+        length = 1024 * 1024  # max one megabyte
+        if "Content-Length" in headers:
+            length = min(length, int(headers["Content-Length"]))
 
-			return line
+        data = _read_http_data(s, length)
 
-def read_http_headers(s):
-	m = re.match("^(.+?) (.+?) (.+)$", read_line(s))
-	protocol, response_num, response_string = m.groups()
-	headers = {}
-
-	while True:
-		line = read_line(s)
-		if len(line) == 0:
-			break
-
-		m = re.match("^(.+?) (.+)$", line)
-		if m:
-			headers[m.group(1)[0:-1]] = m.group(2)
-
-	return (protocol, int(response_num), response_string, headers)
-
-def read_http_data(s, length):
-	data = ''
-	while not length or len(data) < length:
-		to_receive = 1024
-		if length:
-			to_receive = min(length - len(data), 1024)
-
-		new_data = s.recv(to_receive)
-
-		if new_data:
-			data += new_data
-		else:
-			break
-
-	return data
-
-class AppURLopener(urllib.FancyURLopener):
-	version = "Pynik/0.1"
-
-def read_url(url):
-	m = re.match("^(.{3,5}):\/\/([^\/]*)(:?\d*)(\/.*?)?$", url)
-	if m:
-		protocol, address, port, file = m.group(1, 2, 3, 4)
-
-		if protocol == 'https':
-			# Use the built-in functions
-
-			try:
-				urllib._urlopener = AppURLopener()
-				file = urllib.urlopen(url)
-			except IOError:
-				return None
-
-			result = { "url": file.geturl(),
-						"data": file.read(1024*1024),
-						"info": file.info() }
-
-			file.close()
-			return result
-
-		elif protocol != 'http':
-			print "Only http(s) is supported at this moment."
-			return None
-		else:
-			if not port:
-				port = 80
-			if not file:
-				file = '/'
-
-			#print "Connecting to %s" % address
-
-			request = http_get_request(file)
-			request.add_header("User-Agent", "Pynik/0.1")
-			request.add_header("Accept", "*/*")
-			request.add_header("Host", address)
-
-			s = socket(AF_INET, SOCK_STREAM)
-
-			s.connect((address, port))
-			request.send(s)
-
-			protocol, response_num, response_string, headers = read_http_headers(s)
-
-			if response_num == 301 or response_num == 302:
-				s.close()
-
-				# Let's do some simple loop detection...
-				if url == headers['Location']:
-					print "Redirect loop discovered at: %s" % headers['Location']
-					return None
-				else:
-					print "Site moved to: %s" % headers['Location']
-					return read_url(headers['Location'])
-			elif response_num == 200:
-				#print "Got response 200. Sweet!"
-				length = 1024*1024 # max one megabyte
-				if "Content-Length" in headers:
-					length = min(length, int(headers["Content-Length"]))
-
-				data = read_http_data(s, length)
-
-				s.close()
-
-				return { "url": url, "data": data }
-			else:
-				print "Got unhandled response code: %s" % response_num
-				return None
-	else:
-		print "NOT AN URL: %s" % url
-		return None
+        s.close()
+
+        return { "url": url, "data": data }
+    else:
+        print "Got unhandled response code: %s" % response_num
+        return None
+
+def _normal_http_read(url, http_headers, http_post_data):
+    if http_post_data is not None:
+        http_post_data = urllib.urlencode(http_post_data)
+
+    request = urllib2.Request(url, headers=http_headers, data=http_post_data)
+    request.add_header('User-Agent', USER_AGENT)
+
+    try:
+        file = urllib2.urlopen(request)
+    except IOError:
+        return None
+
+    result = {"url": file.geturl(),
+              "data": file.read(1024 * 1024),
+              "info": file.info()}
+
+    file.close()
+    return result
diff --git a/plugins/metacritic.py b/plugins/metacritic.py
@@ -1,48 +1,76 @@
-# coding: utf-8
+# -*- coding: utf-8 -*-
 
-### teetow snodde googlefight lol
-
-import re
+import json
 import utility
 from commands import Command
 
-class metacritic(Command):
-	def __init__(self):
-		pass
+class Metacritic(Command):
+    def trig_metacritic(self, bot, source, target, trigger, argument):
+        """Command used to search the review aggregation site Metacritic.com"""
+        return self._run_command(argument.strip()).encode('utf-8')
+
+    USAGE = u"Usage: .metacritic <title>"
+
+    URL_BASE = 'http://www.metacritic.com'
+    URL_API = URL_BASE + '/autosearch'
+    URL_MANUAL_SEARCH = URL_BASE + '/search/all/%s/results'
+
+    def __init__(self):
+        pass
+
+    def _run_command(self, search_term):
+        if not search_term:
+            return self.USAGE
+
+        raw_result = self._get_raw_result(search_term)
+        if not raw_result:
+            return u"Could not retrieve data from Metacritic :("
+
+        decoded_result = json.loads(raw_result['data'])
+        items = decoded_result['autoComplete']
+
+        if len(items) == 0:
+            return u"No item found. Manual search: " + self._manual_search_url(search_term)
+        else:
+            return self._formatted_search_result(items[0], search_term)
 
+    def _get_raw_result(self, search_term):
+        headers = {'X-Requested-With': 'XMLHttpRequest',
+                   'Referer': self.URL_BASE}
+        post_data = {'search_term': search_term}
 
-	def parse_result(self, response, term, fullurl):
-		rslt = re.search(r'a total of <b>(?P<numresults>\d+) result.+?<p><br>View Results.+?<p>1. <strong>(?P<platform>.+?):</strong>.+?<b>(?P<title>.+?)</b>.+?\((?P<year>\d+)\).+?<SPAN.+?>(?P<score>.+?)</SPAN>.+?<br>(?P<publisher>.+?)</p>', response, re.S)
+        return utility.read_url(self.URL_API, headers, post_data)
 
-		if rslt:
-			if (rslt.group('numresults') == '1'):
-				return "%s (%s, %s, %s): %s" % (rslt.group('title'), rslt.group('year'), rslt.group('publisher'), rslt.group('platform'), rslt.group('score'))
-			else:
-				return "%s (%s, %s, %s): %s          %s (%s hits)" % (rslt.group('title'), rslt.group('year'), rslt.group('publisher'), rslt.group('platform'), rslt.group('score'), fullurl, rslt.group('numresults'))
-		else:
-		    return None
+    def _manual_search_url(self, search_term):
+        return self.URL_MANUAL_SEARCH % utility.escape(search_term)
 
+    def _formatted_search_result(self, item, search_term):
+        template = u"{name} ({formatted_info}), {formatted_score}, {item_url}" + \
+            u" | All results: {manual_search_url}"
 
-	def trig_mc(self, bot, source, target, trigger, argument):
-		term = argument.strip()
+        data = {'name': item['name'],
+                'formatted_info': self._formatted_item_info(item),
+                'formatted_score': self._formatted_item_score(item),
+                'item_url': self.URL_BASE + item['url'],
+                'manual_search_url': self._manual_search_url(search_term)}
 
-		if not term:
-			return "usage: .metacritic <game title> or <game> <platform> (slower)"
+        return template.format(**data)
 
-		url = 'http://apps.metacritic.com/search/process?ty=3&tfs=game_title&ts=' + utility.escape(term)
-		data = utility.read_url(url)["data"]
-		result = self.parse_result(data, term, url)
+    def _formatted_item_info(self, item):
+        result = u"%s" % item['refType']
 
-		if result:
-			return result
+        if item['itemDate']:
+            result += u", %s" % str(item['itemDate'])
 
-		print "title search failed."
-		url = 'http://apps.metacritic.com/search/process?ty=3&ts=' + utility.escape(term)
-		data = utility.read_url(url)["data"]
-		result = self.parse_result(data, term, url)
+        return result
 
-		if result:
-			return result
+    def _formatted_item_score(self, item):
+        if item['metaScore']:
+            result = u"%s/100" % str(item['metaScore'])
+        else:
+            result = u"No score"
 
-		return "Found nothing. Try it yourself: " + 'http://apps.metacritic.com/search/process?ty=3&ts=' + utility.escape(term)
+        if item['scoreWord']:
+            result += u" (%s)" % item['scoreWord']
 
+        return result