@@ -0,0 +1,11 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2

# -*- coding: utf-8 -*-
import re
import sys

from setuptools.command.easy_install import main

if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())
@@ -0,0 +1,11 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2

# -*- coding: utf-8 -*-
import re
import sys

from setuptools.command.easy_install import main

if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())
@@ -0,0 +1,56 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
get_character.py
Usage: get_character "characterID"
Show some info about the character with the given characterID (e.g. '0000001'
for "Jesse James", using 'http' or 'mobile').
Notice that characterID, using 'sql', are not the same IDs used on the web.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "characterID"' % sys.argv[0]
sys.exit(2)

characterID = sys.argv[1]

i = imdb.IMDb()

out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

try:
# Get a character object with the data about the character identified by
# the given characterID.
character = i.get_character(characterID)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)


if not character:
print 'It seems that there\'s no character with characterID "%s"' % characterID
sys.exit(4)

# XXX: this is the easier way to print the main info about a character;
# calling the summary() method of a character object will returns a string
# with the main information about the character.
# Obviously it's not really meaningful if you want to know how
# to access the data stored in a character object, so look below; the
# commented lines show some ways to retrieve information from a
# character object.
print character.summary().encode(out_encoding, 'replace')


@@ -0,0 +1,56 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
get_company.py
Usage: get_company "companyID"
Show some info about the company with the given companyID (e.g. '0071509'
for "Columbia Pictures [us]", using 'http' or 'mobile').
Notice that companyID, using 'sql', are not the same IDs used on the web.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "companyID"' % sys.argv[0]
sys.exit(2)

companyID = sys.argv[1]

i = imdb.IMDb()

out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

try:
# Get a company object with the data about the company identified by
# the given companyID.
company = i.get_company(companyID)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)


if not company:
print 'It seems that there\'s no company with companyID "%s"' % companyID
sys.exit(4)

# XXX: this is the easier way to print the main info about a company;
# calling the summary() method of a company object will returns a string
# with the main information about the company.
# Obviously it's not really meaningful if you want to know how
# to access the data stored in a company object, so look below; the
# commented lines show some ways to retrieve information from a
# company object.
print company.summary().encode(out_encoding, 'replace')


@@ -0,0 +1,59 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
get_first_character.py
Usage: get_first_character "character name"
Search for the given name and print the best matching result.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "character name"' % sys.argv[0]
sys.exit(2)

name = sys.argv[1]


i = imdb.IMDb()

in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

name = unicode(name, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of character objects).
results = i.search_character(name)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)

if not results:
print 'No matches for "%s", sorry.' % name.encode(out_encoding, 'replace')
sys.exit(0)

# Print only the first result.
print ' Best match for "%s"' % name.encode(out_encoding, 'replace')

# This is a character instance.
character = results[0]

# So far the character object only contains basic information like the
# name; retrieve main information:
i.update(character)

print character.summary().encode(out_encoding, 'replace')



@@ -0,0 +1,59 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
get_first_company.py
Usage: get_first_company "company name"
Search for the given name and print the best matching result.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "company name"' % sys.argv[0]
sys.exit(2)

name = sys.argv[1]


i = imdb.IMDb()

in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

name = unicode(name, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of company objects).
results = i.search_company(name)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)

if not results:
print 'No matches for "%s", sorry.' % name.encode(out_encoding, 'replace')
sys.exit(0)

# Print only the first result.
print ' Best match for "%s"' % name.encode(out_encoding, 'replace')

# This is a company instance.
company = results[0]

# So far the company object only contains basic information like the
# name; retrieve main information:
i.update(company)

print company.summary().encode(out_encoding, 'replace')



@@ -0,0 +1,59 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
get_first_movie.py
Usage: get_first_movie "movie title"
Search for the given title and print the best matching result.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "movie title"' % sys.argv[0]
sys.exit(2)

title = sys.argv[1]


i = imdb.IMDb()

in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

title = unicode(title, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of Movie objects).
results = i.search_movie(title)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)

if not results:
print 'No matches for "%s", sorry.' % title.encode(out_encoding, 'replace')
sys.exit(0)

# Print only the first result.
print ' Best match for "%s"' % title.encode(out_encoding, 'replace')

# This is a Movie instance.
movie = results[0]

# So far the Movie object only contains basic information like the
# title and the year; retrieve main information:
i.update(movie)

print movie.summary().encode(out_encoding, 'replace')



@@ -0,0 +1,59 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
get_first_person.py
Usage: get_first_person "person name"
Search for the given name and print the best matching result.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "person name"' % sys.argv[0]
sys.exit(2)

name = sys.argv[1]


i = imdb.IMDb()

in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

name = unicode(name, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of Person objects).
results = i.search_person(name)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)

if not results:
print 'No matches for "%s", sorry.' % name.encode(out_encoding, 'replace')
sys.exit(0)

# Print only the first result.
print ' Best match for "%s"' % name.encode(out_encoding, 'replace')

# This is a Person instance.
person = results[0]

# So far the Person object only contains basic information like the
# name; retrieve main information:
i.update(person)

print person.summary().encode(out_encoding, 'replace')



@@ -0,0 +1,53 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
get_keyword.py
Usage: get_keyword "keyword"
search for movies tagged with the given keyword and print the results.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "keyword"' % sys.argv[0]
sys.exit(2)

name = sys.argv[1]


i = imdb.IMDb()

in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

name = unicode(name, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of movies).
results = i.get_keyword(name, results=20)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)

# Print the results.
print ' %s result%s for "%s":' % (len(results),
('', 's')[len(results) != 1],
name.encode(out_encoding, 'replace'))
print ' : movie title'

# Print the long imdb title for every movie.
for idx, movie in enumerate(results):
outp = u'%d: %s' % (idx+1, movie['long imdb title'])
print outp.encode(out_encoding, 'replace')


@@ -0,0 +1,106 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
get_movie.py
Usage: get_movie "movieID"
Show some info about the movie with the given movieID (e.g. '0133093'
for "The Matrix", using 'http' or 'mobile').
Notice that movieID, using 'sql', are not the same IDs used on the web.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "movieID"' % sys.argv[0]
sys.exit(2)

movieID = sys.argv[1]

i = imdb.IMDb()

out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

try:
# Get a Movie object with the data about the movie identified by
# the given movieID.
movie = i.get_movie(movieID)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)


if not movie:
print 'It seems that there\'s no movie with movieID "%s"' % movieID
sys.exit(4)

# XXX: this is the easier way to print the main info about a movie;
# calling the summary() method of a Movie object will returns a string
# with the main information about the movie.
# Obviously it's not really meaningful if you want to know how
# to access the data stored in a Movie object, so look below; the
# commented lines show some ways to retrieve information from a
# Movie object.
print movie.summary().encode(out_encoding, 'replace')

# Show some info about the movie.
# This is only a short example; you can get a longer summary using
# 'print movie.summary()' and the complete set of information looking for
# the output of the movie.keys() method.
#print '==== "%s" / movieID: %s ====' % (movie['title'], movieID)
# XXX: use the IMDb instance to get the IMDb web URL for the movie.
#imdbURL = i.get_imdbURL(movie)
#if imdbURL:
# print 'IMDb URL: %s' % imdbURL
#
# XXX: many keys return a list of values, like "genres".
#genres = movie.get('genres')
#if genres:
# print 'Genres: %s' % ' '.join(genres)
#
# XXX: even when only one value is present (e.g.: movie with only one
# director), fields that can be multiple are ALWAYS a list.
# Note that the 'name' variable is a Person object, but since its
# __str__() method returns a string with the name, we can use it
# directly, instead of name['name']
#director = movie.get('director')
#if director:
# print 'Director(s): ',
# for name in director:
# sys.stdout.write('%s ' % name)
# print ''
#
# XXX: notice that every name in the cast is a Person object, with a
# currentRole instance variable, which is a string for the played role.
#cast = movie.get('cast')
#if cast:
# print 'Cast: '
# cast = cast[:5]
# for name in cast:
# print ' %s (%s)' % (name['name'], name.currentRole)
# XXX: some information are not lists of strings or Person objects, but simple
# strings, like 'rating'.
#rating = movie.get('rating')
#if rating:
# print 'Rating: %s' % rating
# XXX: an example of how to use information sets; retrieve the "trivia"
# info set; check if it contains some data, select and print a
# random entry.
#import random
#i.update(movie, info=['trivia'])
#trivia = movie.get('trivia')
#if trivia:
# rand_trivia = trivia[random.randrange(len(trivia))]
# print 'Random trivia: %s' % rand_trivia


@@ -0,0 +1,90 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
get_person.py
Usage: get_person "personID"
Show some info about the person with the given personID (e.g. '0000210'
for "Julia Roberts".
Notice that personID, using 'sql', are not the same IDs used on the web.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "personID"' % sys.argv[0]
sys.exit(2)

personID = sys.argv[1]

i = imdb.IMDb()

out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

try:
# Get a Person object with the data about the person identified by
# the given personID.
person = i.get_person(personID)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)


if not person:
print 'It seems that there\'s no person with personID "%s"' % personID
sys.exit(4)

# XXX: this is the easier way to print the main info about a person;
# calling the summary() method of a Person object will returns a string
# with the main information about the person.
# Obviously it's not really meaningful if you want to know how
# to access the data stored in a Person object, so look below; the
# commented lines show some ways to retrieve information from a
# Person object.
print person.summary().encode(out_encoding, 'replace')

# Show some info about the person.
# This is only a short example; you can get a longer summary using
# 'print person.summary()' and the complete set of information looking for
# the output of the person.keys() method.
#print '==== "%s" / personID: %s ====' % (person['name'], personID)
# XXX: use the IMDb instance to get the IMDb web URL for the person.
#imdbURL = i.get_imdbURL(person)
#if imdbURL:
# print 'IMDb URL: %s' % imdbURL
# XXX: print the birth date and birth notes.
#d_date = person.get('birth date')
#if d_date:
# print 'Birth date: %s' % d_date
# b_notes = person.get('birth notes')
# if b_notes:
# print 'Birth notes: %s' % b_notes
# XXX: print the last five movies he/she acted in, and the played role.
#movies_acted = person.get('actor') or person.get('actress')
#if movies_acted:
# print 'Last roles played: '
# for movie in movies_acted[:5]:
# print ' %s (in "%s")' % (movie.currentRole, movie['title'])
# XXX: example of the use of information sets.
#import random
#i.update(person, info=['awards'])
#awards = person.get('awards')
#if awards:
# rand_award = awards[random.randrange(len(awards))]
# s = 'Random award: in year '
# s += rand_award.get('year', '')
# s += ' %s "%s"' % (rand_award.get('result', '').lower(),
# rand_award.get('award', ''))
# print s


@@ -0,0 +1,39 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
get_top_bottom_movies.py
Usage: get_top_bottom_movies
Return top and bottom 10 movies, by ratings.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 1:
print 'No arguments are required.'
sys.exit(2)

i = imdb.IMDb()

top250 = i.get_top250_movies()
bottom100 = i.get_bottom100_movies()

out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

for label, ml in [('top 10', top250[:10]), ('bottom 10', bottom100[:10])]:
print ''
print '%s movies' % label
print 'rating\tvotes\ttitle'
for movie in ml:
outl = u'%s\t%s\t%s' % (movie.get('rating'), movie.get('votes'),
movie['long imdb title'])
print outl.encode(out_encoding, 'replace')

Large diffs are not rendered by default.

@@ -0,0 +1,11 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2

# -*- coding: utf-8 -*-
import re
import sys

from migrate.versioning.shell import main

if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())
@@ -0,0 +1,11 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2

# -*- coding: utf-8 -*-
import re
import sys

from migrate.versioning.migrate_repository import main

if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())
@@ -0,0 +1,11 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2

# -*- coding: utf-8 -*-
import re
import sys

from pbr.cmd.main import main

if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())
@@ -0,0 +1,11 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2

# -*- coding: utf-8 -*-
import re
import sys

from pip import main

if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())
@@ -0,0 +1,11 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2

# -*- coding: utf-8 -*-
import re
import sys

from pip import main

if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())
@@ -0,0 +1,11 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2

# -*- coding: utf-8 -*-
import re
import sys

from pip import main

if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())
Binary file not shown.
@@ -0,0 +1,54 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
search_character.py
Usage: search_character "character name"
Search for the given name and print the results.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "character name"' % sys.argv[0]
sys.exit(2)

name = sys.argv[1]


i = imdb.IMDb()

in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

name = unicode(name, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of character objects).
results = i.search_character(name)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)

# Print the results.
print ' %s result%s for "%s":' % (len(results),
('', 's')[len(results) != 1],
name.encode(out_encoding, 'replace'))
print 'characterID\t: imdbID : name'

# Print the long imdb name for every character.
for character in results:
outp = u'%s\t\t: %s : %s' % (character.characterID, i.get_imdbID(character),
character['long imdb name'])
print outp.encode(out_encoding, 'replace')


@@ -0,0 +1,54 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
search_company.py
Usage: search_company "company name"
Search for the given name and print the results.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "company name"' % sys.argv[0]
sys.exit(2)

name = sys.argv[1]


i = imdb.IMDb()

in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

name = unicode(name, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of company objects).
results = i.search_company(name)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)

# Print the results.
print ' %s result%s for "%s":' % (len(results),
('', 's')[len(results) != 1],
name.encode(out_encoding, 'replace'))
print 'companyID\t: imdbID : name'

# Print the long imdb name for every company.
for company in results:
outp = u'%s\t\t: %s : %s' % (company.companyID, i.get_imdbID(company),
company['long imdb name'])
print outp.encode(out_encoding, 'replace')


@@ -0,0 +1,53 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
search_keyword.py
Usage: search_keyword "keyword"
Search for keywords similar to the give one and print the results.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "keyword name"' % sys.argv[0]
sys.exit(2)

name = sys.argv[1]


i = imdb.IMDb()

in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

name = unicode(name, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of keyword strings).
results = i.search_keyword(name, results=20)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)

# Print the results.
print ' %s result%s for "%s":' % (len(results),
('', 's')[len(results) != 1],
name.encode(out_encoding, 'replace'))
print ' : keyword'

# Print every keyword.
for idx, keyword in enumerate(results):
outp = u'%d: %s' % (idx+1, keyword)
print outp.encode(out_encoding, 'replace')


@@ -0,0 +1,54 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
search_movie.py
Usage: search_movie "movie title"
Search for the given title and print the results.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "movie title"' % sys.argv[0]
sys.exit(2)

title = sys.argv[1]


i = imdb.IMDb()

in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

title = unicode(title, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of Movie objects).
results = i.search_movie(title)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)

# Print the results.
print ' %s result%s for "%s":' % (len(results),
('', 's')[len(results) != 1],
title.encode(out_encoding, 'replace'))
print 'movieID\t: imdbID : title'

# Print the long imdb title for every movie.
for movie in results:
outp = u'%s\t: %s : %s' % (movie.movieID, i.get_imdbID(movie),
movie['long imdb title'])
print outp.encode(out_encoding, 'replace')


@@ -0,0 +1,54 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
"""
search_person.py
Usage: search_person "person name"
Search for the given name and print the results.
"""

import sys

# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)


if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "person name"' % sys.argv[0]
sys.exit(2)

name = sys.argv[1]


i = imdb.IMDb()

in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()

name = unicode(name, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of Person objects).
results = i.search_person(name)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)

# Print the results.
print ' %s result%s for "%s":' % (len(results),
('', 's')[len(results) != 1],
name.encode(out_encoding, 'replace'))
print 'personID\t: imdbID : name'

# Print the long imdb name for every person.
for person in results:
outp = u'%s\t: %s : %s' % (person.personID, i.get_imdbID(person),
person['long imdb name'])
print outp.encode(out_encoding, 'replace')


@@ -0,0 +1,109 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2

# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
#
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php.

import optparse
import os
import sys

import sqlparse
from sqlparse.exceptions import SQLParseError


_CASE_CHOICES = ['upper', 'lower', 'capitalize']


parser = optparse.OptionParser(usage='%prog [OPTIONS] FILE, ...',
version='%%prog %s' % sqlparse.__version__)
parser.set_description(('Format FILE according to OPTIONS. Use "-" as FILE '
'to read from stdin.'))
parser.add_option('-v', '--verbose', dest='verbose', action='store_true')
parser.add_option('-o', '--outfile', dest='outfile', metavar='FILE',
help='write output to FILE (defaults to stdout)')
group = parser.add_option_group('Formatting Options')
group.add_option('-k', '--keywords', metavar='CHOICE',
dest='keyword_case', choices=_CASE_CHOICES,
help=('change case of keywords, CHOICE is one of %s'
% ', '.join('"%s"' % x for x in _CASE_CHOICES)))
group.add_option('-i', '--identifiers', metavar='CHOICE',
dest='identifier_case', choices=_CASE_CHOICES,
help=('change case of identifiers, CHOICE is one of %s'
% ', '.join('"%s"' % x for x in _CASE_CHOICES)))
group.add_option('-l', '--language', metavar='LANG',
dest='output_format', choices=['python', 'php'],
help=('output a snippet in programming language LANG, '
'choices are "python", "php"'))
group.add_option('--strip-comments', dest='strip_comments',
action='store_true', default=False,
help='remove comments')
group.add_option('-r', '--reindent', dest='reindent',
action='store_true', default=False,
help='reindent statements')
group.add_option('--indent_width', dest='indent_width', default=2,
help='indentation width (defaults to 2 spaces)')

_FORMATTING_GROUP = group


def _error(msg, exit_=None):
"""Print msg and optionally exit with return code exit_."""
sys.stderr.write('[ERROR] %s\n' % msg)
if exit_ is not None:
sys.exit(exit_)


def _build_formatter_opts(options):
"""Convert command line options to dictionary."""
d = {}
for option in _FORMATTING_GROUP.option_list:
d[option.dest] = getattr(options, option.dest)
return d


def main():
options, args = parser.parse_args()
if options.verbose:
sys.stderr.write('Verbose mode\n')

if len(args) != 1:
_error('No input data.')
parser.print_usage()
sys.exit(1)

if '-' in args: # read from stdin
data = sys.stdin.read()
else:
try:
data = ''.join(open(args[0]).readlines())
except OSError:
err = sys.exc_info()[1] # Python 2.5 compatibility
_error('Failed to read %s: %s' % (args[0], err), exit_=1)

if options.outfile:
try:
stream = open(options.outfile, 'w')
except OSError:
err = sys.exc_info()[1] # Python 2.5 compatibility
_error('Failed to open %s: %s' % (options.outfile, err), exit_=1)
else:
stream = sys.stdout

formatter_opts = _build_formatter_opts(options)
try:
formatter_opts = sqlparse.formatter.validate_options(formatter_opts)
except SQLParseError:
err = sys.exc_info()[1] # Python 2.5 compatibility
_error('Invalid options: %s' % err, exit_=1)

s = sqlparse.format(data, **formatter_opts)
if sys.version_info < (3,):
s = s.encode('utf-8', 'replace')
stream.write(s)
stream.flush()


if __name__ == '__main__':
main()
@@ -0,0 +1,35 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2
import sys
import os

try:
import pkg_resources
pkg_resources.require('SQLObject>0.6.1')
except (ImportError, pkg_resources.DistributionNotFound):
# Oh well, we tried...
pass

try:
import sqlobject.manager
except ImportError:
try:
here = __file__
except NameError:
here = sys.argv[0]
updir = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(here))),
'sqlobject')
if os.path.exists(updir):
sys.path.insert(0, os.path.dirname(updir))
else:
print 'I cannot find the sqlobject module'
print 'If SQLObject is installed, you may need to set $PYTHONPATH'
sys.exit(3)
# Now we have to get rid of possibly stale modules from that import
# up there
for name, value in sys.modules.items():
if name.startswith('sqlobject'):
del sys.modules[name]

from sqlobject.manager import command
command.the_runner.run(sys.argv)
@@ -0,0 +1,18 @@
#!/home/zaphod/workspace/trek_predict/trekenv/bin/python2

import sys

try:
uri = sys.argv[1]
except IndexError:
sys.exit("Usage: %s old-style-URI" % sys.argv[0])

try:
import pkg_resources
pkg_resources.require('SQLObject>=1.0.0')
except (ImportError, pkg_resources.DistributionNotFound):
pass

from sqlobject import connectionForURI
conn = connectionForURI(uri, oldUri=True)
print conn.uri()
@@ -0,0 +1,22 @@

NOTE: see also CONTRIBUTORS.txt for a list of developers of important
portions of the code and CREDITS.txt for a list of people who
contributed with hints, patches and so on.


AUTHOR: Davide Alberani

e-mail: da@erlug.linux.it or alberanid@libero.it
IMDbPY page: http://imdbpy.sf.net/
my homepage: http://erlug.linux.it/~da/ (Italian)
my cine-blog: http://cinemelma.blogspot.com/ (Italian)
Jabber ID: alberanid@jabber.linux.it (I'm on-line only from time to time)
ICQ UIN: 83641305 (nick 'Mad77') (I'm on-line only from time to time)
PGP KeyID: 0x465BFD47 (the key is available in my homepage and
through every keyserver)

Sometimes (very seldom) you can find me in IRC on channels #python,
#linux-it and so on over irc.freenode.net with the nick 'Mad77'.

Feel free to contact me for any ideas, bug reports and everything else. :-)

@@ -0,0 +1,72 @@
OTHER AUTHORS
=============

People who contributed with a substantial amount of work and that
share the copyright over some portions of the code:

NAME: H. Turgut Uyar
EMAIL: <uyar --> tekir.org>
CONTRIBUTION: the whole new "http" data access system (using a DOM and
XPath-based approach) is based on his work. The imdbpykit interface
was mostly written by him and he holds the copyright over the whole
code (with some portions shared with others).


NAME: Giuseppe "Cowo" Corbelli
EMAIL: <cowo --> lugbs.linux.it>
CONTRIBUTION: provided a lot of code and hints to integrate IMDbPY
with SQLObject, working on the imdbpy2sql.py script and the dbschema.py
module.


Actually, besides Turgut, Giuseppe and me, these other people are
listed as developers for the IMDbPY project on sourceforge and may
share copyright on some (minor) portions of the code:

NAME: Alberto Malagoli
CONTRIBUTION: developed the new web site, and detains the copyright of it,
and provided helper functions and other code.


NAME: Martin Kirst
EMAIL: <martin.kirst --> s1998.tu-chemnitz.de>
CONTRIBUTION: has done an important refactoring of the imdbpyweb
program and shares with me the copyright on the whole program.


NAME: Jesper Nøhr
EMAIL: <jesper --> noehr.org>
CONTRIBUTION: provided extensive testing and some patches for
the 'http' data access system.


NAME: Joachim Selke
EMAIL: <j.selke --> tu-bs.de>
CONTRIBUTION: many tests on IBM DB2 and work on the CSV support.


NAME: Timo Schulz
EMAIL: <gnuknight --> users.sourceforge.net>
CONTRIBUTION: did a lot of work 'sql', DB2 and CSV support and
extensive analysis aimed at diff files support.

NAME: Roy Stead
EMAIL: <roystead247 --> gmail.com>
CONTRIBUTION: provided the download_applydiffs.py script.


TRANSLATORS
===========

Additional translations were provided by:
- strel (Spanish)
- Stéphane Aulery (French)


DONATIONS
=========

We'd like to thank these persons for their donations:
- Fabian Winter
- Diego Sarmentero

@@ -0,0 +1,302 @@

CREDITS
=======

See also CONTRIBUTORS.txt for a list of the most important developers
who share the copyright on some portions of the code.

First of all, I want to thank all the maintainers of the
packages, listed on http://imdbpy.sf.net/?page=download#otherpkg,
and especially Ana Guerrero.
Another big thank to the developers who used IMDbPY for their
projects and researches; they can be found here:
http://imdbpy.sf.net/?page=programs

Other very special thanks go to some people who followed very
closely the development of IMDbPY, providing hints and insights:
Ori Cohen, James Rubino, Tero Saarni and Jesper Noer (for a lot
of help, and also for the wonderful http://bitbucket.org)


Below, a list of persons who contributed with bug reports, small
patches and hints (kept in a reverse order since IMDbPY 4.5):

* all the translators on transifex.

* Troy Deck for a path for MySQL.

* miles82 for a patch on metascore parsing.

* Albert Claret for the parser of the critic reviews page.

* Shobhit Singhal for fixes in parsing biographies and plots.

* Dan Poirier for documentation improvements.

* Frank Braam for a fix for MSSQL.

* Darshana Umakanth for a bug report the search functions.

* Osman Boyaci for a bug report on movie quotes.

* Mikko Matilainen for a patch on encodings.

* Roy Stead for the download_applydiffs.py script.

* Matt Keenan for a report about i18n in search results.

* belgabor for a patch in the goofs parser.

* Ian Havelock for a bug report on charset identification.

* Mikael Puhakka for a bug report about foreign languages results doing a search.

* Wu Mao for a bug report on the GAE environment.

* legrostdg for a bug report on the new search pages.

* Haukur Páll Hallvarðsson for a patch on query parameters.

* Arthur de Peretti-Schlomoff for a list of French articles and
fixes to Spanish articles.

* John Lambert, Rick Summerhill and Maciej for reports and fixes
for the search query.

* Kaspars "Darklow" Sprogis for an impressive amount of tests and reports about
bugs parsing the plain text data files and many new ideas.

* Damien Stewart for many bug reports about the Windows environment.

* Vincenzo Ampolo for a bug report about the new imdbIDs save/restore queries.

* Tomáš Hnyk for the idea of an option to reraise caught exceptions.

* Emmanuel Tabard for ideas, code and testing on restoring imdbIDs.

* Fabian Roth for a bug report about the new style of episodes list.

* Y. Josuin for a bug report on missing info in crazy credits file.

* Arfrever Frehtes Taifersar Arahesis for a patch for locales.

* Gustaf Nilsson for bug reports about BeautifulSoup.

* Jernej Kos for patches to handle "in production" information
and birth/death years.

* Saravanan Thirumuruganathan for a bug report about genres in mobile.

* Paul Koan, for a bug report about DVD pages and movie references.

* Greg Walters for a report about a bug with queries with too
many results.

* Olav Kolbu for tests and report about how the IMDb.com servers
reply to queries made with and without cookies.

* Jef "ofthelit", for a patch for the reduce.sh script bug
reports for Windows.

* Reiner Herrmann for benchmarks using SSD hard drives.

* Thomas Stewart for some tests and reports about a bug
with charset in the plain text data files.

* Ju-Hee Bae for an important series of bug reports about
the problems derived by the last IMDb's redesign.

* Luis Liras and Petite Abeille for a report and a bugfix about
imdbpy2sql.py used with SQLite and SQLObject.

* Kevin S. Anthony for a bug report about episodes list.

* Bhupinder Singh for a bug report about exception handling in Python 2.4.

* Ronald Hatcher for a bug report on the GAE environment.

* Ramusus for a lot of precious bug reports.

* Laurent Vergne for a hint about InnoDB, MyISAM and foreign keys.

* Israel Fruch for patches to support the new set of parsers.

* Inf3cted MonkeY, for a bug report about 'vote details'.

* Alexmipego, for suggesting to add a md5sum to titles and names.

* belgabortm for a bug report about movies with multiple 'countries'.

* David Kaufman for an idea to make the 'update' method more robust.

* Dustin Wyatt for a bug with SQLite of Python 2.6.

* Julian Scheid for bug reports about garbage in the ptdf.

* Adeodato Simó for a bug report about the new imdb.com layout.

* Josh Harding for a bug report about the new imdb.com layout.

* Xavier Naidoo for a bug report about top250 and BeautifulSoup.

* Basil Shubin for hints about a new helper function.

* Mark Jeffery, for some help debugging a lxml bug.

* Hieu Nguyen for a bug report about fetching real imdbIDs.

* Rdian06 for a patch for movies without plot authors.

* Tero Saarni, for the series 60 GUI and a lot of testing and
debugging.

* Ana Guerrero, for maintaining the official debian package.

* H. Turgut Uyar for a number of bug reports and a lot of work on
the test-suite.

* Ori Cohen for some code and various hints.

* Jesper Nøhr for a lot of testing, especially on 'sql'.

* James Rubino for many bug reports.

* Cesare Lasorella for a bug report about newer versions of SQLObject.

* Andre LeBlanc for a bug report about airing date of tv series episodes.

* aow for a note about some misleading descriptions.

* Sébastien Ragons for tests and reports.

* Sridhar Ratnakumar for info about PKG-INF.

* neonrush for a bug parsing Malcolm McDowell filmography!

* Alen Ribic for some bug reports and hints.

* Joachim Selke for some bug reports with SQLAlchemy and DB2 and a lot
of testing and debugging of the ibm_db driver (plus a lot of hints
about how to improve the imdbpy2sql.py script).

* Karl Newman for bug reports about the installer of version 4.5.

* Saruke Kun and Treas0n for bug reports about 'Forbidden' errors
from the imdb.com server.

* Chris Thompson for some bug reports about summary() methods.

* Mike Castle for performace tests with SQLite and numerous hints.

* Indy (indyx) for a bug about series cast parsing using BeautifulSoup.

* Yoav Aviram for a bug report about tv mini-series.

* Arjan Gijsberts for a bug report and patch for a problem with
movies listed in the Bottom 100.

* Helio MC Pereira for a bug report about unicode.

* Michael Charclo for some bug reports performing 'http' queries.

* Amit Belani for bug reports about plot outline and other changes.

* Matt Warnock for some tests with MySQL.

* Mark Armendariz for a bug report about too long field in MySQL db
and some tests/analyses.

* Alexy Khrabrov, for a report about a subtle bug in imdbpy2sql.py.

* Clark Bassett for bug reports and fixes about the imdbpy2sql.py
script and the cutils.c C module.

* mumas for reporting a bug in summary methods.

* Ken R. Garland for a bug report about 'cover url' and a lot of
other hints.

* Steven Ovits for hints and tests with Microsoft SQL Server, SQLExpress
and preliminary work on supporting diff files.

* Fredrik Arnell for tests and bug reports about the imdbpy2sql.py script.

* Arnab for a bug report in the imdbpy2sql.py script.

* Elefterios Stamatogiannakis for the hint about transactions and SQLite,
to obtain an impressive improvement in performances.

* Jon Sabo for a bug report about unicode and the imdbpy2sql.py script
and some feedback.

* Andrew Pendleton for a report about a very hideous bug in
the imdbpy2sql.py (garbage in the plain text data files + programming
errors + utf8 strings + postgres).

* Ataru Moroboshi ;-) for a bug report about role/duty and notes.

* Ivan Kedrin for a bug report about the analyze_title function.

* Hadley Rich for reporting bugs and providing patches for troubles
parsing tv series' episodes and searching for tv series' titles.

* Jamie R. Rytlewski for a suggestion about saving imbIDs in 'sql'.

* Vincent Crevot, for a bug report about unicode support.

* Jay Klein for a bug report and testing to fix a nasty bug in the
imdbpy2sql.py script (splitting too large data sets).

* Ivan Garcia for an important bug report about the use of IMDbPY
within wxPython programs.

* Kessia Pinheiro for a bug report about tv series list of episodes.

* Michael G. Noll for a bug report and a patch to fix a bug
retrieving 'plot keywords'.

* Alain Michel, for a bug report about search_*.py and get_*.py scripts.

* Martin Arpon and Andreas Schoenle for bug reports (and patches)
about "runtime", "aka titles" and "production notes" information
not being parsed.

* none none (dclist at gmail.com) for a useful hint and code to
retrieve a movie/person object, given an URL.

* Sebastian Pölsterl, for a bug report about the cover url for
tv (mini) series, and another one about search_* methods.

* Martin Kirst for many hints and the work on the imdbpyweb program.

* Julian Mayer, for a bug report and a patch about non-ascii chars.

* Wim Schut and "eccentric", for bug reports and a patches about
movies' cover url.

* Alfio Ferrara, for a bug report about the get_first_movie.py script.

* Magnus Lie Hetland for an hint about the searches in sql package.

* Thomas Jadjewski for a bug report about the imdbpy2sql.py script.

* Trevor MacPhail, for a bug report about search_* methods and
the ParserBase.parse method.

* Guillaume Wisniewski, for a bug report.

* Kent Johnson, for a bug report.

* Andras Bali, for the hint about the "plot outline" information.

* Nick S. Novikov, who provided the Windows installer until I've
managed to set up a Windows development environment.

* Simone Bacciglieri, who downloaded the plain text data files for me.

* Carmine Noviello, for some design hints.

* "Basilius" for a bug report.

* Davide for a bug report.

Large diffs are not rendered by default.

@@ -0,0 +1,15 @@
DISCLAIMER
==========

IMDbPY (and the author) is not affiliated with Internet Movie Database Inc.

IMDb is a trademark of Internet Movie Database Inc. and all contents
and data included on the IMDb's site is the property of IMDb or its
content suppliers and protected by United States and international
copyright laws.

Please, read the IMDb's conditions of use in their website:
- http://www.imdb.com/help/show_article?conditions
- http://www.imdb.com/help/show_leaf?usedatasoftware
- any other notice in the http://www.imdb.com/ site.

@@ -0,0 +1,132 @@
IMDbPY FAQS
===========

Q1: Since version 3.7, parsing the data from the IMDb web site is slow,
sloow, slooow! Why?

A1: if python-lxml is not installed in your system, IMDbPY uses the
pure-python BeautifulSoup module as a fall-back; BeautifulSoup does
an impressive job, but it can't be as fast as a parser written in C.
You can install python-lxml following the instructions in the
README.newparsers file.


Q2: why the movieID (and other IDs) used in the 'sql' database are not
the same used on the IMDb.com site?

A2: first, a bit of nomenclature: we'll call "movieID" (or things like
"personID", for instance of the Person class) a unique identifier used
by IMDbPY to manage a single movie (or other kinds of object).
We'll call "imdbID" a unique identifier used, for the same kind
of data, by the IMDb.com site (i.e.: the 7-digit number in tt0094226,
as seen in the URL for "The Untouchables").

Using IMDbPY to access the web ('http' and 'mobile' data access
systems), movieIDs and imdbIDs are the same thing - beware that
in this case a movieID is a string, with the leading zeroes.

Unfortunately, populating a sql database with data from the plain
text data files, we don't have access to imdbIDs - since they are
not distributed at all - and so we have to made them by ourselves
(they are the 'id' column in tables like 'title' or 'name').
This mean that these values are valid only for your current database:
if you update it with a newer set of plain text data files, these IDs
will surely change (and, by the way, they are integers).
It's also obvious, now, that you can't exchange IDs between the
'http' (or 'mobile') data access system and 'sql', and in the same
way you can't use imdbIDs with your local database or vice-versa.


Q3: using a sql database, what's the imdb_id (or something like that)
column in tables like 'title', 'name' and so on?

A3: it's internally used by IMDbPY to remember the imdbID (the one
used by the web site - accessing the database you'll use the numeric
value of the 'id' column, as movieID) of a movie, once it stumbled
upon. This way, if IMDbPY is asked again about the imdbID of
a movie (or person, or ...), it doesn't have to contact again to
the web site. Notice that you have to access the sql database using
a user with write permission, to update it.

As a bonus, when possible, the values of these imdbIDs are saved
between updates of the sql database (using the imdbpy2sql.py script).
Beware that it's tricky and not always possible, but the script does
its best to succeed.


Q4: but what if I really need the imdbIDs, to use my database?

A4: no, you don't. Search for a title, get its information. Be happy!


Q5: I have a great idea: write a script to fetch all the imdbID from the
web site! Can't you do it?

A5: yeah, I can. But I won't. :-)
It would be somewhat easy to map every title on the web to its
imdbID, but there are still a lot of problems.
First of all, every user will end up doing it for its own copy
of the plain text data files (and this will make the imdbpy2sql.py
script painfully slow and prone to all sort of problems).
Moreover, the imdbIDs are unique and never reused, true, but movie
title _do_ change: to fix typos, override working titles, to cope
with a new movie with the same title release in the same year (not
to mention cancelled or postponed movies).

Besides that, we'd have to do the same for persons, characters and
companies. Believe me: it doesn't make sense.
Work on your local database using your movieIDs (or even better:
don't mind about movieIDs and think in terms of searches and Movie
instances!) and retrieve the imdbID only in the rare circumstances
when you really need them (see the next FAQ).
Repeat with me: I DON'T NEED ALL THE imdbIDs. :-)


Q6: using a sql database, how can I convert a movieID (whose value
is valid only locally) to an imdbID (the ID used by the imdb.com site)?

A6: various functions can be used to convert a movieID (or personID or
other IDs) to the imdbID used by the web site.
Example of code:

from imdb import IMDb
ia = IMDb('sql', uri=URI_TO_YOUR_SQL_DATABASE)
movie = ia.search_movie('The Untouchables')[0] # a Movie instance.
print 'The movieID for The Untouchables:', movie.movieID
print 'The imdbID used by the site:', ia.get_imdbMovieID(movie.movieID)
print 'Same ID, smarter function:', ia.get_imdbID(movie)

It goes without saying that get_imdbMovieID has some sibling
methods: get_imdbPersonID, get_imdbCompanyID and get_imdbCharacterID.
Also notice that the get_imdbID method is smarter, and takes any kind
of instance (the other functions need a movieID, personID, ...)

Another method that will try to retrieve the imdbID is get_imdbURL,
which works like get_imdbID but returns an URL.

In case of problems, these methods will return None.


Q7: I have a movie title (in the format used by the plain text data files)
or other kind of data (like a person/character/company name) and I want
to get its imdbID. How can I do?

A7: the safest thing, is probably to do a normal search on IMDb (using the
'http' or 'mobile' data access system of IMDbPY) and see if the first
item is the correct one.
You can also try the 'title2imdbID' method (and similar) of the IMDb
instance (no matter if you're using 'http', 'mobile' or 'sql'), but
expect some failures - it returns None in this case.


Q8: I have an URL (of a movie, person or something else); how can I
get a Movie/Person/... instance?

A8: import the imdb.helpers module and use the get_byURL function.


Q9: I'm writing an interface based on IMDbPY and I have problems handling
encoding, chars conversions, replacements of references and so on.

A9: see the many functions in the imdb.helpers module.

Large diffs are not rendered by default.

@@ -0,0 +1,8 @@
INSTALLATION
============

See the "README.txt" file.

You've to read it anyway, isn't it? <g>


@@ -0,0 +1,29 @@
IMDbPY

NOTE: see also the recommendations in the "DISCLAIMER.txt" file.

NOTE: for a list of other persons who share with me the copyright over
specific portions of code, see the "CONTRIBUTORS.txt" file.

NOTE: IMDbPY includes an unmodified version of BeautifulSoup,
renamed _bsoup.py; that code is copyrighted by its author,
Leonard Richardson <leonardr at segfault.org> and is released
under a New-style BSD license.


Copyright 2004-2009 Davide Alberani <da@erlug.linux.it>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

@@ -0,0 +1,73 @@
IMDbPY for (too) sensitive people
=================================

Since version 2.0 (shame on me! I've noticed this only after more
than a year of development!!!) by default adult movies are included
in the result of the search_movie(), search_episode() and search_person()
methods.

If for some unintelligible reason you don't want classics
like "Debbie Does Dallas" to show up in your list of results,
you can disable this feature initializing the IMDb class with
the 'adultSearch' argument set to 0 (or other "False" value).

E.g.:
from imdb import IMDb
ia = IMDb(accessSystem='http', adultSearch=0)


The behavior of a IMDb class's instance can be modified at
runtime, calling the do_adult_search() method.

E.g.:
from imdb import IMDb

# By default in the horny-mode.
ia = IMDb(accessSystem='http')

# Just for this example, be sure to exclude the proxy.
ia.set_proxy(None)

results = ia.search_movie('debby does dallas', results=5)
for movie in results:
print movie['long imdb title'], movie.movieID
# It will print:
# Debbie Does Dallas (1978) 0077415
# Debbie Does Dallas Part II (1981) 0083807
# Debbie Does Dallas: The Next Generation (1997) (V) 0160174
# Debbie Does Dallas '99 (1999) (V) 0233539
# Debbie Does Dallas 3 (1985) 0124352

# You can now revert to the old puritan behavior.
ia.do_adult_search(0)

results = ia.search_movie('debby does dallas', results=5)
for movie in results:
print movie['long imdb title'], movie.movieID
# It will print only:
# Pauly Does Dallas (1993) (TV) 0208347


The do_adult_search() method of the http and mobile data access system
also takes another couple of arguments: "cookie_id" and "cookie_uu", so
that you can select _your own_ IMDb's account; if cookie_id is set to
None, no cookies are sent. These parameters can also be set in
the imdbpy.cfg configuration file.
For the strings to use, see your "cookie" or "cookie.txt" file.
Obviously you need to activate the "adult movies" option for
your account; see http://imdb.com/find/preferences?_adult=1


OTHER DATA ACCESS SYSTEMS
=========================

Since version 2.2 every other data access system (sql)
support the same behavior of the http and mobile data access
systems (i.e.: you can set the 'adultSearch' argument and use
the 'do_adult_search' method).

Notice that for the sql data access system only results from the
search_movie() and search_episode() methods are filtered: there's no
easy (and fast) way to tell that an actor/actress is a porn-star.


@@ -0,0 +1,18 @@

COMPANIES DATA
==============

Since IMDbPY 3.6, companies have their own class and every reference
to a company is now an instance of the Company class.

NOTES
=====

- no references (inside text field) about companies are retrieved,
so far; I don't even think there are any in the IMDb web site, but
I may be wrong.

- 'mobile' data access system collects companies informations through
the 'httpThin' method; it should be fast enough.


@@ -0,0 +1,97 @@

THE currentRole ATTRIBUTE AND THE Character CLASS
=================================================

Since version 3.3, IMDbPY supports the character pages of the IMDb
database; this required some substantial changes to how actors'
and acresses' roles were handled.
Starting with release 3.4, "sql" data access system is supported,
too - but it works a bit differently from "http" and "mobile".
See "SQL" below.

The currentRole instance attribute can be found in every instance
of Person, Movie and Character classes, even if actually the Character
never uses it.

The currentRole of a Person object is set to a Character instance,
inside a list of person who acted in a given movie.
The currentRole of a Movie object is set to a Character instance,
inside a list of movies played be given person.
The currentRole of a Movie object is set to a Person instance,
inside a list of movies in which a given character was portrayed.

Schema:
movie['cast'][0].currentRole -> a Character object.
|
+-> a Person object.

person['actor'][0].currentRole -> a Character object.
|
+-> a Movie object.

character['filmography'][0].currentRole -> a Person object.
|
+-> a Movie object.

The roleID attribute can be used to access/set the characterID
or personID instance attribute of the current currentRole.
Building Movie or Person objects, you can pass the currentRole
parameter and the roleID parameter (to set the ID).
The currentRole parameter can be an object (Character or Person),
an unicode string (in which case a Character or Person object is
automatically instanced) or a list of objects or strings (to
handle multiple characters played by the same actor/actress in
a movie, or character played by more then a single actor/actress
in the same movie).

Anyway, currentRole objects (Character or Person instances) can
be pretty-printed easily: calling unicode(CharacterOrPersonObject)
will return a good-old-unicode string, like expected in the previous
version of IMDbPY.


SQL
===

Fetching data from the web, only characters with an active page
on the web site will have their characterID; we don't have these
information accessing "sql", so _every_ character will have an
associated characterID.
This way, every character with the same name will share the same
characterID, even if - in fact - they may not be portraying the
same character.


GOODIES
=======

To help getting the required information from Movie, Person and
Character objects, in the "helpers" module there's a new factory
function, makeObject2Txt, which can be used to create your
pretty-printing function.
It takes some optional parameters: movieTxt, personTxt, characterTxt
and companyTxt; in these strings %(value)s items are replaced with
object['value'] or with obj.value (if the first is not present).

E.g.:
import imdb
myPrint = imdb.helpers.makeObject2Txt(personTxt=u'%(name)s ... %(currentRole)s')
i = imdb.IMDb()
m = i.get_movie('0057012')
ps = m['cast'][0]
print myPrint(ps)
# The output will be something like:
Peter Sellers ... Group Captain Lionel Mandrake / President Merkin Muffley / Dr. Strangelove


Portions of the formatting string can be stripped conditionally: if
the specified condition is false, they will be cancelled.

E.g.:
myPrint = imdb.helpers.makeObject2Txt(personTxt='<if personID><a href=/person/%(personID)s></if personID>%(long imdb name)s<if personID></a></if personID><if currentRole> ... %(currentRole)s<if notes> %(notes)s</if notes></if currentRole>'


Another useful argument is 'applyToValues': if set to a function,
it will be applied to every value before the substitution; it can
be useful to format strings for html output.

@@ -0,0 +1,274 @@
DEVELOPMENT OF IMDbPY
=====================

A lot of other information useful to IMDbPY developers are available
in the "README.package" file.

Sections in this file:
* STRUCTURE OF THE IMDbPY PACKAGE
* GENERIC DESCRIPTION
* HOW TO EXTEND


STRUCTURE OF THE IMDbPY PACKAGE
===============================

imdb (package)
|
+-> _compat
+-> _exceptions
+-> _logging
+-> linguistics
+-> Movie
+-> Person
+-> Character
+-> Company
+-> utils
+-> helpers
+-> parser (package)
|
+-> http (package)
| |
| +-> movieParser
| +-> personParser
| +-> characterParser
| +-> companyParser
| +-> searchMovieParser
| +-> searchPersonParser
| +-> searchCharacterParser
| +-> searchCompanyParser
| +-> searchKeywordParser
| +-> topBottomParser
| +-> utils
| +-> bsouplxml
| |
| +-> _bsoup.py
| +-> etree.py
| +-> html.py
| +-> bsoupxpath.py
|
+-> mobile (package)
|
+-> sql (package)
|
+-> dbschema
+-> alchemyadapter
+-> objectadapter
+-> cutils (C module)


Description:
imdb (package): contains the IMDb function, the IMDbBase class and imports
the IMDbError exception class.
_compat: compatibility functions and class for some strange environments
(internally used).
_exceptions: defines the exceptions internally used.
_logging: provides the logging facility used by IMDbPY.
linguistics: defines some functions and data useful to smartly guess the
language of a movie title (internally used).
Movie: contains the Movie class, used to describe and manage a movie.
Person: contains the Person class, used to describe and manage a person.
Character: contains the Character class, used to describe and manage
a character.
Company: contains the Company, used to describe and manage a company.
utils: miscellaneous utilities used by many IMDbPY modules.
parser (package): a package containing a package for every data access system
implemented.
http (package): contains the IMDbHTTPAccessSystem class which is a subclass
of the imdb.IMDbBase class; it provides the methods used to
retrieve and manage data from the web server (using,
in turn, the other modules in the package).
It defines methods to get a movie and to search for a title.
http.movieParser: parse html strings from the pages on the IMDb web server about
a movie; returns dictionaries of {key: value}
http.personParser: parse html strings from the pages on the IMDb web server
about a person; returns dictionaries.
http.characterParser: parse html strings from the pages on the IMDb web server
about a character; returns dictionaries.
http.companyParser: parse html strings from the pages on the IMDb web server
about a company; returns dictionaries.
http.searchMovieParser: parse an html string, result of a query for a movie
title.
http.searchPersonParser: parse an html string, result of a query for a person
name.
http.searchCharacterParser: parse an html string, result of a query for a
character name.
http.searchCompanyParser: parse an html string, result of a query for a
company name.
http.searchKeywordParser: parse an html string, result of a query for a keyword.
http.topBottomParser: parse an html string, result of a query for top250
and bottom100 movies.
http.utils: miscellaneous utilities used only by the http package.
http.bsouplxml (package): adapter to make BeautifulSoup behave like lxml
(internally, the API of lxml is always used).
http.bsouplxml._bsoup: just a copy of the BeautifulSoup module, so that it's not
an external dependency.
http.bsouplxml.etree: adapter for the lxml.etree module.
http.bsouplxml.html: adapter for the lxml.html module.
http.bsouplxml.bsoupxpath: xpath support for beautifulsoup.


The parser.sql package manages the access to the data in the SQL
database, created with the imdbpy2sql.py script; see the README.sqldb file.
The dbschema module contains tables definitions and some useful functions;
The alchemyadapter adapts the SQLAlchemy ORM to the internal mechanisms
of IMDbPY, and the objectadapter does the same for the SQLObject ORM
(internally the API of SQLObject is always used).
The cutils module is a C module containing C function to speed up the
'sql' data access system; if it can't be compiled, a set of fall'back
functions will be used.

The class in the parser.mobile package is a subclass of the one found
in parser.http, with some method overridden to be many times faster (from
2 to 20 times); it's useful for systems with slow bandwidth and not
much CPU power.

The helpers module contains functions and other goodies not directly
used by the IMDbPY package, but that can be useful to develop
IMDbPY-based programs.


GENERIC DESCRIPTION
===================

I wanted to stay independent from the source of the data for a given
movie/person/character/company, and so the imdb.IMDb function returns
an instance of a class that provides specific methods to access a given
data source (web server, SQL database, etc.)

Unfortunately that means that the movieID in the Movie class, the
personID in the Person class and the characterID in the Character class
are dependent on the data access system used.
So, when a Movie, a Person or a Character object is instantiated, the
accessSystem instance variable is set to a string used to identify the
used data access system.


HOW TO EXTEND
=============

To introduce a new data access system, you've to write a new package
inside the "parser" package; this new package must provide a subclass
of the imdb.IMDb class which must define at least the following methods:
_search_movie(title) - to search for a given title; must return a
list of (movieID, {movieData}) tuples.
_search_episode(title) - to search for a given episode title; must return a
list of (movieID, {movieData}) tuples.
_search_person(name) - to search for a given name; must return a
list of (movieID, {personData}) tuples.
_search_character(name) - to search for a given character's name; must
return a list of (characterID, {characterData})
tuples.
_search_company(name) - to search for a given company's name; must
return a list of (companyID, {companyData})
tuples.
get_movie_*(movieID) - a set of methods, one for every set of information
defined for a Movie object; should return
a dictionary with the relative information.
This dictionary can contains some optional keys:
'data': must be a dictionary with the movie info.
'titlesRefs': a dictionary of 'movie title': movieObj
pairs.
'namesRefs': a dictionary of 'person name': personObj
pairs.
get_person_*(personID) - a set of methods, one for every set of information
defined for a Person object; should return
a dictionary with the relative information.
get_character_*(characterID) - a set of methods, one for every set of
information defined for a character object;
should return a dictionary with the relative
information.
get_company_*(companyID) - a set of methods, one for every set of
information defined for a company object;
should return a dictionary with the relative
information.
_get_top_bottom_movies(kind) - kind can be one of 'top' and 'bottom';
returns the related list of movies.
_get_keyword(keyword) - return a list of Movie objects with the given keyword.
_search_keyword(key) - return a list of keywords similar to the given key.
get_imdbMovieID(movieID) - must convert the given movieID to a string
representing the imdbID, as used by the IMDb web
server (e.g.: '0094226' for Brian De Palma's
"The Untouchables").
get_imdbPersonID(personID) - must convert the given personID to a string
representing the imdbID, as used by the IMDb web
server (e.g.: '0000154' for "Mel Gibson").
get_imdbCharacterID(characterID) - must convert the given characterID to a
string representing the imdbID, as used by
the IMDb web server (e.g.: '0000001' for
"Jesse James").
get_imdbCompanyID(companyID) - must convert the given companyID to a
string representing the imdbID, as used by
the IMDb web server (e.g.: '0071509' for
"Columbia Pictures [us]").
_normalize_movieID(movieID) - must convert the provided movieID in a
format suitable for internal use (e.g.:
convert a string to a long int).
NOTE: as a rule of thumb you _always_ need
to provide a way to convert a "string
representation of the movieID" into the
internally used format, and the internally
used format should _always_ be converted to
a string, in a way or another.
Rationale: a movieID can be passed from the
command line, or from a web browser.
_normalize_personID(personID) - idem.
_normalize_characterID(characterID) - idem.
_normalize_companyID(companyID) - idem.
_get_real_movieID(movieID) - return the true movieID; useful to handle
title aliases.
_get_real_personID(personID) - idem.
_get_real_characterID(characterID) - idem.
_get_real_companyID(companyID) - idem.

The class should raise the appropriate exceptions, when needed;
IMDbDataAccessError must be raised when you cannot access the resource
you need to retrieve movie info or you're unable to do a query (this is
_not_ the case when a query returns zero matches: in this situation an
empty list must be returned); IMDbParserError should be raised when an
error occurred parsing some data.

Now you've to modify the imdb.IMDb function so that, when the right
data access system is selected with the "accessSystem" parameter, an
instance of your newly created class is returned.

NOTE: this is a somewhat misleading example: we already have a
data access system for sql database (it's called 'sql' and it supports
also MySQL, amongst other). Maybe I'll find a better example...
E.g.: if you want to call your new data access system "mysql" (meaning
that the data are stored in a mysql database), you've to add to the imdb.IMDb
function something like:
if accessSystem == 'mysql':
from parser.mysql import IMDbMysqlAccessSystem
return IMDbMysqlAccessSystem(*arguments, **keywords)

where "parser.mysql" is the package you've created to access the
local installation, and "IMDbMysqlAccessSystem" is the subclass of
imdb.IMDbBase.
Then it's possibile to use the new data access system like:
from imdb import IMDb
i = IMDb(accessSystem='mysql')
results = i.search_movie('the matrix')
print results

A specific data access system implementation can defines it's own
methods.
As an example, the IMDbHTTPAccessSystem that is in the parser.http package
defines the method set_proxy() to manage the use a web proxy; you
can use it this way:
from imdb import IMDb
i = IMDb(accessSystem='http') # the 'accessSystem' argument is not
# really needed, since "http" is the default.
i.set_proxy('http://localhost:8080/')

A list of special methods provided by the imdb.IMDbBase subclass, along
with their description, is always available calling the get_special_methods()
of the IMDb class.
E.g.:
i = IMDb(accessSystem='http')
print i.get_special_methods()

will print a dictionary with the format:
{'method_name': 'method_description', ...}


@@ -0,0 +1,20 @@
IMDbPY HTTP CONNECTION
======================

HTTP is the default data access system of IMDbPY, meaning that by default
data are requested at the IMDb web servers.
For other kinds of data access, see README.sqldb and README.mobile.

By default IMDbPY uses its own account to access the IMDb web server (this
is done to enable searches on adult titles); if you want to uses your own
account, see README.adult.

CONNECTION PROBLEMS
===================

It has been reported some kind of problems connecting to the IMDb servers;
the problem seems to be related to the use of our cookie and the geographical
location of the user.
If you experience such a problem, report it and try to disable the use of the
cookie (to do so, see README.adult).

@@ -0,0 +1,100 @@

INFORMATION IN XML FORMAT
=========================

Since version 4.0, IMDbPY can output information of Movie, Person,
Character and Company instances in XML format.
It's possible to get a single information (a key) in XML format,
using the getAsXML(key) method (it will return None if the key is
not found).
E.g.:
from imdb import IMDb
ia = IMDb('http')
movie = ia.get_movie(theMovieID)
print movie.getAsXML('keywords')

It's also possible to get a representation of a whole object,
using the asXML() method:
print movie.asXML()

The returned strings are unicode. The _with_add_keys argument
of the asXML() method can be set to False (default: True) to
exclude the dynamically generated keys (like 'smart canonical title'
and so on).


XML FORMAT
==========

Keywords are converted to tags, items in lists are enclosed in
a 'item' tag. E.g.:
<keywords>
<item>a keyword</item>
<item>another keyword</item>
</keywords>

Except when keys are known to be not fixed (e.g.: a list of keywords),
in which case this schema is used:
<item key="EscapedKeyword">
...
</item>

In general, the 'key' attribute is present whenever the used tag
doesn't match the key name.

Movie, Person, Character and Company instances are converted like
that (portions enclosed in squares are optionals):
<movie id="movieID" access-system="accessSystem">
<title>A Long IMDb Movie Title (YEAR)</title>
[<current-role>
<person id="personID" access-system="accessSystem">
<name>Name Surname</name>
[<notes>A Note About The Person</notes>]
</person>
</current-role>]
[<notes>A Note About The Movie</notes>]
</movie>

Every 'id' can be empty.

Actually the returned XML is mostly not pretty-printed.


REFERENCES
==========

Some text keys can contain references to other movies, persons
and characters. The user can provide the defaultModFunct function (see
the "MOVIE TITLES AND PERSON/CHARACTER NAMES REFERENCES" section of
the README.package file), to replace these references with their own
strings (e.g.: a link to a web page); it's up to the user, to be sure
that the output of the defaultModFunct function is valid XML.


DTD
===

Since version 4.1 a DTD is available; it can be found in this
directory or on the web, at:
http://imdbpy.sf.net/dtd/imdbpy41.dtd

The version number changes with the IMDbPY version.


LOCALIZATION
============

Since version 4.1 it's possible to translate the XML tags;
see README.locale.


FROM XML TO OBJECTS
===================

Since version 4.6, you can dump the generated XML in a string or
in a file, using it - later - to rebuild the original object.
In the imdb.helpers module there's the parseXML() function which
takes a string as input and return - if possible - an instance of
the Movie, Person, Character or Company classes.


@@ -0,0 +1,41 @@

KEYWORDS
========

Since version 4.0, it's possible (for every data access system) to
search for movies' keywords.
People's keywords are not supported.


SEARCH FOR A KEYWORD SIMILAR TO A GIVEN STRING
==============================================

The search_keyword(unicode_string) can be used to search amongst
keywords: a list of keywords similar to the given string will be
returned, sorted by similarity. Notice that the keywords in the
returned list are plain unicode strings, and not instances of
some class (like the ones returned by other search_SOMETHING methods).

E.g.:
from imdb import IMDb
ia = IMDb('http')
print ia.search_keyword(u'alabama')


GET A LIST OF MOVIES FOR A GIVEN KEYWORD
========================================

To get a list of movies that are tagged with the given keyword,
use the get_keyword(unicode_string) method.
E.g.:
from imdb import IMDb
ia = IMDb('http')
print ia.get_keyword(u'alabama')

Beware that by default the list is limited to 100 movies, and
it's not possible to get more results, using 'http'.
Moreover, the lists returned using 'sql' are not sorted in any way.
Another limit is that actually (as of february 2009), the IMDb's
web server is unable to serve pages about non-ascii keywords.
It's a known problem of their systems.

@@ -0,0 +1,18 @@
LOCAL INSTALLATION
==================

Simple instruction: switch to 'sql' (see the README.sqldb file).

The 'local' data access system was removed since IMDbPY 4.2, for
a series of good reasons:
- the moviedb program was no more distributed by IMDb.
- the new format for movie titles ("The Title" instead of "Title, The")
created way too many problems almost impossible to fix, since a lot
of damage was done by the - never updated - moviedb program.
- it was slower and less complete than 'sql'.
- there were very few users of it.


If you are veeery willing to resuscitate it, you can write in
the mailing list about your crazy idea. :-)

@@ -0,0 +1,109 @@

LOCALIZATION FOR IMDbPY
=======================

Since version 4.1 it's easy to translate the labels that describe
sets of information.


LIMITATION
==========

So far no internal message or exception is translated, the
internationalization is limited to the "tags" returned
by the getAsXML and asXML methods of the Movie, Person, Character
or Company classes. Beware that in many cases these "tags" are not
the same as the "keys" used to access information in the same
classes, as if they are dictionaries.
E.g.: you can translate "long-imdb-name" - the tag returned by
the call person.getAsXML('long imdb name') - but not "long imdb name"
directly.
To translate keys, you can use the helpers.translateKey function in
the 'helpers' module.


USAGE
=====

If you want to add i18n to your IMDbPY-based application, all you need
to do is to switch to the 'imdbpy' text domain.

E.g.:
import imdb.locale

# Standard gettext stuff.
import gettext
from gettext import gettext as _

# Switch to the imdbpy domain.
gettext.textdomain('imdbpy')

# Request a translation.
print _(u'long-imdb-name')


ADD A NEW LANGUAGE
==================

You can (but you're not forced to) use Transifex to manage/coordinate
your translations; see: http://www.transifex.net/projects/p/imdbpy/c/default/
Below, the generic instruction about how translation works.

In the imdb.locale package, you'll find some scripts useful to build
your own internationalization files.
If you create a new translation or update an existing one, you can send
it to the <imdbpy-devel@lists.sourceforge.net> mailing list, for
inclusion in the next releases.

- the generatepot.py should be used only when the DTD is changed; it's
used to create the imdbpy.pot file (the one shipped is always
up-to-date).
- you can copy the imdbpy.pot file to your language's .po file (e.g.
imdbpy-fr.po for French) and modify it accordingly to your needs.
- then you must run rebuildmo.py (which is automatically called
at install time, by the setup.py script) to create the .mo files.

If you need to upgrade an existing .po file, after changes to the .pot
file (usually because the DTD was changed), you can use the msgmerge
tool, part of the GNU gettext suite.
E.g.:
msgmerge -N imdbpy-fr.po imdbpy.pot > new-imdbpy-fr.po



ARTICLES IN TITLES
==================

Converting a title to its 'Title, The' canonical format, IMDbPY does
some assumptions about what is an article and what not, and this could
lead to some wrong canonical titles. E.g.: "Hard, Die" instead of
"Die Hard", since 'Die' is guessed as an article (and it is, in Germany...)
To solve this problem, there are other keys: "smart canonical title",
"smart long imdb canonical title", "smart canonical series title",
"smart canonical episode title" which can be used to do a better job
converting a title into its canonical format.

It works, but it needs to know something about articles in various
languages: if you want to help, see the LANG_ARTICLES and LANG_COUNTRIES
dictionaries in the 'linguistics' module.

To know what the language in which a movie title is assumed to be,
call its 'guessLanguage' method (it will return None, if unable to guess).
If you want to force a given language instead of the guessed one, you
can call its 'smartCanonicalTitle' method, setting the 'lang' argument
appropriately.


TITLE AKAS
==========

Sometimes it's useful to manage title's AKAs knowing their languages.
In the 'helpers' module there are some (hopefully) useful functions:
akasLanguages(movie) - given a movie, return a list of tuples
in (lang, AKA) format (lang can be None, if unable to detect).
sortAKAsBySimilarity(movie, title) - sorts the AKAs on a movie considering
how much they are similar to a given title (see
the code for more options).
getAKAsInLanguage(movie, lang) - return a list of AKAs of the movie in the given
language (see the code for more options).

@@ -0,0 +1,15 @@

LOGGING
=======

Since version 4.4 IMDbPY provides a logging facility, using the
powerful "logging" module.
You can find documentation about it here:
http://docs.python.org/library/logging.html

By default information are logged on standard error; you can read
on the module documentation how to stream them elsewhere.

The default logging level is "warning"; this can be changed
modifying the "loggingLevel" key of your imdbpy.cfg file.

@@ -0,0 +1,114 @@
IMDbPY FOR SMALL SYSTEMS
========================

Since version 1.8, IMDbPY tries to be usable even on
systems with very limited storage space, bandwidth and
CPU power, like PDA, hand-held devices and mobile phones.

Sections in this file:
* INSTALLATION OPTIONS
how to save a little space installing IMDbPY.
* THE "MOBILE" DATA ACCESS SYSTEM
useful for systems with very little CPU power and bandwidth.
* THE "HTTPTHIN" DATA ACCESS SYSTEM
for systems with normal CPU power, but insufficient bandwidth.
* OTHER TIPS

Please read all the following section.


INSTALLATION OPTIONS
====================

You can call the setup.py script with some arguments:

The --without-sql argument, if used, will excludes the parser.sql
package; you don't need it if your system does not have any of the
SQLObject or SQLAlchemy packages and/or you don't want to store the
whole IMDb's plain text database files in a SQL database.

Now, if you're installing IMDbPY (using ./setup.py install), you
should take a look at some options, like "--no-compile" and "-O0"
to exclude pyc and pyo files, saving hundreds of KBs.

Moreover, if you're creating a package (rpm, deb or whatever),
in the setup.cfg you can exclude from your package things
like the documentation (more than 200Kb) and the scripts in the
./bin/ directory.


THE "MOBILE" DATA ACCESS SYSTEM
===============================

Intended to be used with PDA, smart phones and hand-held devices,
the "mobile" data access system is a subclass of the default
"httpThin" data access system, with some methods replaced with faster
string methods, instead of the html parser. Moreover, for
the movies, only the main information are retrieved (see the 'httpThin'
notes). It should be, at usage time, from 2 to 20 times faster than
the "http"/"httpThin" data access system.

This code still needs tests on mobile phones!
Please report any bugs/ideas/hints...

Usage:
from imdb import IMDb
i = IMDb('mobile')
sp = i.search_person('mel gibson', results=10)
p = sp[0]
i.update(p)
sm = i.search_movie('mystic river', results=15)
m = sm[0]
i.update(m)
...and so on...


A GUI for Series 60 smart phones, is available at:
http://imdbpy.sourceforge.net/?page=mobile


THE "HTTPTHIN" DATA ACCESS SYSTEM
=================================

Instead of the default data access system ('http'), you can
also use 'httpThin' (or 'webThin' or 'htmlThin').

I.e.:
from imdb import IMDb
i = IMDb('httpThin')
sp = i.search_person('mel gibson', results=10)
sm = i.search_movie('mystic river', results=15)
...and so on...


The main difference is that, parsing movies' information,
the "maindetails" page is parsed, in place of the "combined" page.
This reduces the required bandwidth and the CPU power needed.
Obviously a lot of information are lost (and only the first 15
people of the cast are listed), but it still retrieves everything
you usually need (director, writer, runtime, country, language, akas,
etc.)

Another difference is that, if the "defaultModFuct" parameter is not
provided (as default) calling the IMDb() function, no references
to people or movie are collected from textual information (like
the plot of a movie).


OTHER TIPS
==========

Remember that, calling the search_movie(), search_episode() and
search_person() methods of the "IMDb" object, you can provide a "results"
parameter, to download only a limited amount of results (20,
by default).

With the http, httpThin and mobile data access systems you can
set a proxy with the set_proxy() method; e.g.:
i = IMDb('http')
i.set_proxy('http://localhost:8080/')

Remember that the proxy is automatically used if the $HTTP_PROXY
environment variable is set.


@@ -0,0 +1,81 @@
IMDbPY'S NEW HTML PARSERS
=========================

Since version 3.7, IMDbPY has moved its parsers for the HTML of
the IMDb's website from a set of subclasses of SGMLParser (they
were finite-states machines, being SGMLParser a SAX parser) to
a set of parsers based on the libxml2 library or on the BeautifulSoup
module (and so, using a DOM/XPath-based approach).
The idea and the implementation of these new parsers is mostly a
work of H. Turgut Uyar, and can bring to parsers that are shorter,
easier to write and maybe even faster.

The old set of parsers was removed since IMDbYP 4.0.


LIBXML AND/OR BEAUTIFULSOUP
===========================

To use "lxml", you need the libxml2 library installed (and its
python-lxml binding). If it's not present on your system, you'll
fall-back to BeautifulSoup - distributed alongside IMDbPY, and so
you don't need to install anything.
However, beware that being pure-Python, BeautifulSoup is much
slower than lxml, so install it, if you can.

If for some reason you can't get lxml and BeautifulSoup is too
slow for your needs, consider the use of the 'mobile' data
access system.


GETTING LIBXML, LIBXSLT AND PYTHON-LXML
=======================================

If you're in a Microsoft Windows environment, all you need is
python-lxml (it includes all the required libraries), which can
be downloaded from here:
http://pypi.python.org/pypi/lxml/

Otherwise, if you're in a Unix environment, you can download libxml2
and libxslt from here (you need both, to install python-lxml):
http://xmlsoft.org/downloads.html
http://xmlsoft.org/XSLT/downloads.html

The python-lxml package can be found here:
http://codespeak.net/lxml/index.html#download

Obviously you should first check if these libraries are already
packaged for your distribution/operating system.

IMDbPY was tested with libxml2 2.7.1, libxslt 1.1.24 and
python-lxml python-lxml 2.1.1. Older versions can work, too; if
you have problems, submit a bug report specifying your versions.

You can also get the latest version of BeautifulSoup from here:
http://www.crummy.com/software/BeautifulSoup/
but since it's distributed with IMDbPY, you don't need it (or
you have to override the '_bsoup.py' file in the imdb/parser/http
directory), and this is probably not a good idea, since newer versions
of BeautifulSoup behave in new and unexpected ways.


USING THE OLD PARSERS
=====================

The old set of parsers was removed since IMDbYP 4.0.


FORCING LXML OR BEAUTIFULSOUP
=============================

By default, IMdbPY uses python-lxml, if it's installed.
You can force the use of one given parser passing the 'useModule'
parameter. Valid values are 'lxml' and 'BeautifulSoup'. E.g.:
from imdb import IMDb
ia = IMDb('http', useModule='BeautifulSoup')
...

useModule can also be a list/tuple of strings, to specify the
preferred order.