Skip to content

Commit

Permalink
Merge pull request #22 from Merola/pynik-metacritic-rewrite
Browse files Browse the repository at this point in the history
Rewrite/repair Metacritic plugin, using the Metacritic autosearch API
  • Loading branch information
serpis committed Oct 1, 2015
2 parents 0db8034 + 17a4c00 commit 5f8633f
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 166 deletions.
268 changes: 138 additions & 130 deletions plugins/httpget.py
Original file line number Diff line number Diff line change
@@ -1,142 +1,150 @@
import sys
import re
import urllib
import urllib2
from socket import *

def write(s, text):
s.send(text)
s.send("\r\n")
USER_AGENT = 'Pynik/0.1'

class http_get_request:
def __init__(self, file):
self.file = file
self.headers = []
def read_url(url, http_headers={}, http_post_data=None):
m = re.match("^(.{3,5}):\/\/([^\/]*)(:?\d*)(\/.*?)?$", url)
if m:
protocol, address, port, file = m.group(1, 2, 3, 4)

def add_header(self, name, value):
self.headers.append((name, value))
if protocol == 'http' and not http_headers and http_post_data is None:
return _legacy_http_read(url, protocol, address, port, file)
elif protocol in ['http', 'https']:
return _normal_http_read(url, http_headers, http_post_data)
else:
print "Only http(s) is supported at this moment."
return None
else:
print "NOT AN URL: %s" % url
return None

def _write(s, text):
s.send(text)
s.send("\r\n")

def send(self, s):
write(s, "GET %s HTTP/1.0" % self.file)
write(s, "\r\n".join(map(lambda x: "%s: %s" % x, self.headers)))
write(s, "")
write(s, "")
class _http_get_request:
def __init__(self, file):
self.file = file
self.headers = []

def add_header(self, name, value):
self.headers.append((name, value))

def read_line(s):
line = ""
def send(self, s):
_write(s, "GET %s HTTP/1.0" % self.file)
_write(s, "\r\n".join(map(lambda x: "%s: %s" % x, self.headers)))
_write(s, "")
_write(s, "")

def _read_line(s):
line = ""

while True:
line += s.recv(1)

while True:
line += s.recv(1)

if line and line[-1:] == "\n":
line = line[0:-1]
if len(line) and line[-1] == "\r":
line = line[0:-1]
if line and line[-1:] == "\n":
line = line[0:-1]
if len(line) and line[-1] == "\r":
line = line[0:-1]

return line

def _read_http_headers(s):
m = re.match("^(.+?) (.+?) (.+)$", _read_line(s))
protocol, response_num, response_string = m.groups()
headers = {}

while True:
line = _read_line(s)
if len(line) == 0:
break

m = re.match("^(.+?) (.+)$", line)
if m:
headers[m.group(1)[0:-1]] = m.group(2)

return (protocol, int(response_num), response_string, headers)

def _read_http_data(s, length):
data = ''
while not length or len(data) < length:
to_receive = 1024
if length:
to_receive = min(length - len(data), 1024)

new_data = s.recv(to_receive)

if new_data:
data += new_data
else:
break

return data

def _legacy_http_read(url, protocol, address, port, file):
if not port:
port = 80
if not file:
file = '/'

# print "Connecting to %s" % address

request = _http_get_request(file)
request.add_header("User-Agent", USER_AGENT)
request.add_header("Accept", "*/*")
request.add_header("Host", address)

s = socket(AF_INET, SOCK_STREAM)

s.connect((address, port))
request.send(s)

protocol, response_num, response_string, headers = _read_http_headers(s)

if response_num == 301 or response_num == 302:
s.close()

# Let's do some simple loop detection...
if url == headers['Location']:
print "Redirect loop discovered at: %s" % headers['Location']
return None
else:
print "Site moved to: %s" % headers['Location']
return read_url(headers['Location'])
elif response_num == 200:
# print "Got response 200. Sweet!"
length = 1024 * 1024 # max one megabyte
if "Content-Length" in headers:
length = min(length, int(headers["Content-Length"]))

return line
data = _read_http_data(s, length)

def read_http_headers(s):
m = re.match("^(.+?) (.+?) (.+)$", read_line(s))
protocol, response_num, response_string = m.groups()
headers = {}

while True:
line = read_line(s)
if len(line) == 0:
break

m = re.match("^(.+?) (.+)$", line)
if m:
headers[m.group(1)[0:-1]] = m.group(2)

return (protocol, int(response_num), response_string, headers)

def read_http_data(s, length):
data = ''
while not length or len(data) < length:
to_receive = 1024
if length:
to_receive = min(length - len(data), 1024)

new_data = s.recv(to_receive)

if new_data:
data += new_data
else:
break

return data

class AppURLopener(urllib.FancyURLopener):
version = "Pynik/0.1"

def read_url(url):
m = re.match("^(.{3,5}):\/\/([^\/]*)(:?\d*)(\/.*?)?$", url)
if m:
protocol, address, port, file = m.group(1, 2, 3, 4)

if protocol == 'https':
# Use the built-in functions

try:
urllib._urlopener = AppURLopener()
file = urllib.urlopen(url)
except IOError:
return None

result = { "url": file.geturl(),
"data": file.read(1024*1024),
"info": file.info() }

file.close()
return result

elif protocol != 'http':
print "Only http(s) is supported at this moment."
return None
else:
if not port:
port = 80
if not file:
file = '/'

#print "Connecting to %s" % address

request = http_get_request(file)
request.add_header("User-Agent", "Pynik/0.1")
request.add_header("Accept", "*/*")
request.add_header("Host", address)

s = socket(AF_INET, SOCK_STREAM)

s.connect((address, port))
request.send(s)

protocol, response_num, response_string, headers = read_http_headers(s)

if response_num == 301 or response_num == 302:
s.close()

# Let's do some simple loop detection...
if url == headers['Location']:
print "Redirect loop discovered at: %s" % headers['Location']
return None
else:
print "Site moved to: %s" % headers['Location']
return read_url(headers['Location'])
elif response_num == 200:
#print "Got response 200. Sweet!"
length = 1024*1024 # max one megabyte
if "Content-Length" in headers:
length = min(length, int(headers["Content-Length"]))

data = read_http_data(s, length)

s.close()

return { "url": url, "data": data }
else:
print "Got unhandled response code: %s" % response_num
return None
else:
print "NOT AN URL: %s" % url
return None
s.close()

return { "url": url, "data": data }
else:
print "Got unhandled response code: %s" % response_num
return None

def _normal_http_read(url, http_headers, http_post_data):
if http_post_data is not None:
http_post_data = urllib.urlencode(http_post_data)

request = urllib2.Request(url, headers=http_headers, data=http_post_data)
request.add_header('User-Agent', USER_AGENT)

try:
file = urllib2.urlopen(request)
except IOError:
return None

result = {"url": file.geturl(),
"data": file.read(1024 * 1024),
"info": file.info()}

file.close()
return result
92 changes: 60 additions & 32 deletions plugins/metacritic.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,76 @@
# coding: utf-8
# -*- coding: utf-8 -*-

### teetow snodde googlefight lol

import re
import json
import utility
from commands import Command

class metacritic(Command):
def __init__(self):
pass
class Metacritic(Command):
def trig_metacritic(self, bot, source, target, trigger, argument):
"""Command used to search the review aggregation site Metacritic.com"""
return self._run_command(argument.strip()).encode('utf-8')

USAGE = u"Usage: .metacritic <title>"

URL_BASE = 'http://www.metacritic.com'
URL_API = URL_BASE + '/autosearch'
URL_MANUAL_SEARCH = URL_BASE + '/search/all/%s/results'

def __init__(self):
pass

def _run_command(self, search_term):
if not search_term:
return self.USAGE

raw_result = self._get_raw_result(search_term)
if not raw_result:
return u"Could not retrieve data from Metacritic :("

decoded_result = json.loads(raw_result['data'])
items = decoded_result['autoComplete']

if len(items) == 0:
return u"No item found. Manual search: " + self._manual_search_url(search_term)
else:
return self._formatted_search_result(items[0], search_term)

def _get_raw_result(self, search_term):
headers = {'X-Requested-With': 'XMLHttpRequest',
'Referer': self.URL_BASE}
post_data = {'search_term': search_term}

def parse_result(self, response, term, fullurl):
rslt = re.search(r'a total of <b>(?P<numresults>\d+) result.+?<p><br>View Results.+?<p>1. <strong>(?P<platform>.+?):</strong>.+?<b>(?P<title>.+?)</b>.+?\((?P<year>\d+)\).+?<SPAN.+?>(?P<score>.+?)</SPAN>.+?<br>(?P<publisher>.+?)</p>', response, re.S)
return utility.read_url(self.URL_API, headers, post_data)

if rslt:
if (rslt.group('numresults') == '1'):
return "%s (%s, %s, %s): %s" % (rslt.group('title'), rslt.group('year'), rslt.group('publisher'), rslt.group('platform'), rslt.group('score'))
else:
return "%s (%s, %s, %s): %s %s (%s hits)" % (rslt.group('title'), rslt.group('year'), rslt.group('publisher'), rslt.group('platform'), rslt.group('score'), fullurl, rslt.group('numresults'))
else:
return None
def _manual_search_url(self, search_term):
return self.URL_MANUAL_SEARCH % utility.escape(search_term)

def _formatted_search_result(self, item, search_term):
template = u"{name} ({formatted_info}), {formatted_score}, {item_url}" + \
u" | All results: {manual_search_url}"

def trig_mc(self, bot, source, target, trigger, argument):
term = argument.strip()
data = {'name': item['name'],
'formatted_info': self._formatted_item_info(item),
'formatted_score': self._formatted_item_score(item),
'item_url': self.URL_BASE + item['url'],
'manual_search_url': self._manual_search_url(search_term)}

if not term:
return "usage: .metacritic <game title> or <game> <platform> (slower)"
return template.format(**data)

url = 'http://apps.metacritic.com/search/process?ty=3&tfs=game_title&ts=' + utility.escape(term)
data = utility.read_url(url)["data"]
result = self.parse_result(data, term, url)
def _formatted_item_info(self, item):
result = u"%s" % item['refType']

if result:
return result
if item['itemDate']:
result += u", %s" % str(item['itemDate'])

print "title search failed."
url = 'http://apps.metacritic.com/search/process?ty=3&ts=' + utility.escape(term)
data = utility.read_url(url)["data"]
result = self.parse_result(data, term, url)
return result

if result:
return result
def _formatted_item_score(self, item):
if item['metaScore']:
result = u"%s/100" % str(item['metaScore'])
else:
result = u"No score"

return "Found nothing. Try it yourself: " + 'http://apps.metacritic.com/search/process?ty=3&ts=' + utility.escape(term)
if item['scoreWord']:
result += u" (%s)" % item['scoreWord']

return result
Loading

0 comments on commit 5f8633f

Please sign in to comment.