-
Notifications
You must be signed in to change notification settings - Fork 0
/
imdb2.py
executable file
·85 lines (73 loc) · 2.43 KB
/
imdb2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
"""Get the rating of a movie on IMDB."""
# ImdbRating
# Laszlo Szathmary, 2011 (jabba.laci@gmail.com)
#
# Project's home page:
# https://pythonadventures.wordpress.com/2011/03/25/get-the-imdb-rating-of-a-movie/
#
# Version: 0.2
# Date: 2011-03-29 (yyyy-mm-dd)
#
# Inspired by the script of Rag Sagar:
# https://ragsagar.wordpress.com/2010/11/20/python-script-to-find-imdb-rating/
#
# This free software is copyleft licensed under the same terms as Python, or,
# at your option, under version 2 of the GPL license.
import sys
import re
import urllib
import urlparse
from mechanize import Browser
from BeautifulSoup import BeautifulSoup
class MyOpener(urllib.FancyURLopener):
"""Tricking web servers."""
version = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15'
class ImdbRating:
"""Get the rating of a movie."""
# title of the movie
title = None
# IMDB URL of the movie
url = None
# IMDB rating of the movie
rating = None
# Did we find a result?
found = False
# constant
BASE_URL = 'http://www.imdb.com'
def __init__(self, title):
self.title = title
self._process()
def _process(self):
"""Start the work."""
movie = '+'.join(self.title.split())
br = Browser()
url = "%s/find?s=tt&q=%s" % (self.BASE_URL, movie)
br.open(url)
if re.search(r'/title/tt.*', br.geturl()):
self.url = "%s://%s%s" % urlparse.urlparse(br.geturl())[:3]
soup = BeautifulSoup( MyOpener().open(url).read() )
else:
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
self.url = urlparse.urljoin(self.BASE_URL, link.url)
soup = BeautifulSoup(res.read())
try:
self.title = soup.find('h1').contents[0].strip()
for span in soup.findAll('span'):
if span.has_key('itemprop') and span['itemprop'] == 'ratingValue':
self.rating = span.contents[0]
break
self.found = True
except:
pass
# class ImdbRating
if __name__ == "__main__":
if len(sys.argv) == 1:
print "Usage: %s 'Movie title'" % (sys.argv[0])
else:
imdb = ImdbRating(sys.argv[1])
if imdb.found:
print imdb.url
print imdb.title
print imdb.rating