Skip to content

Commit

Permalink
file dated Nov 17, 2007 10:37 pm
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Folkinshteyn committed Jul 2, 2009
1 parent b873074 commit d037474
Showing 1 changed file with 54 additions and 41 deletions.
95 changes: 54 additions & 41 deletions ricemaker.py
Expand Up @@ -32,7 +32,7 @@
#
# Author: Daniel Folkinshteyn <dfolkins@temple.edu>
#
# Version: ricemaker.py 0.1.4 14-Nov-2007 dfolkins@temple.edu
# Version: ricemaker.py 0.2.1 18-Nov-2007 dfolkins@temple.edu
#
# Project home (where to get the freshest version):
# http://smokyflavor.wikispaces.com/RiceMaker
Expand All @@ -41,6 +41,7 @@

import urllib, urllib2
from BeautifulSoup import BeautifulSoup
import optparse
import subprocess
import re
import os.path
Expand All @@ -49,49 +50,34 @@
import traceback
import pickle

###############
# Some configuration variables
###############

# if you have wordnet installed, set this to full path of commandline wordnet executable
# on linux, this would usually be '/usr/bin/wn'
# on windows, it might be something like 'C:\Program Files\WordNet\wn.exe'
wordnetpath = '/usr/bin/wn'

# loop delay config: sleep a random number of seconds, between lowsec and highsec
# it is recommended that you not reduce the average sleep to less than 3 seconds
# so as not to stress the freerice server too much, and to not appear too much like a bot. :)
# (notice how the average of the default values is 3 secs)
sleeplowsec = 1
sleephighsec = 5

# filename for internally generated dictionary
# you may specify a full path here, otherwise it will just get written to the same
# directory where this script resides (default behavior)
# no need to change this unless you really feel like it.
freericedictfilename = 'freericewordlist.txt'

# number of iterations between dictionary dumps to file
# more often than 5 minutes is really unnecessary...
# consider: iterations * avgsleeptime = time between dumps
iterationsbetweendumps = 100

###############
# Start code
###############
class VersionInfo:
'''Version information storage
'''
def __init__(self):
self.name = "RiceMaker"
self.version = "0.2.1"
self.description = "Script to automatically generate rice on freerice.com"
self.url = "http://smokyflavor.wikispaces.com/RiceMaker"
self.license = "GPL"
self.author = "Daniel Folkinshteyn"
self.author_email = "dfolkins@temple.edu"
self.platform = "Any"

class RiceMaker:

def __init__(self, url):
self.version = VersionInfo()
self.options=None
self.ParseOptions()

self.url = url
response = urllib2.urlopen(self.url)
result = response.read()
self.soup = BeautifulSoup(result)

if os.path.lexists(freericedictfilename) and os.path.getsize(freericedictfilename) > 0:
if os.path.lexists(self.options.freericedictfilename) and os.path.getsize(self.options.freericedictfilename) > 0:
try:
f = open(freericedictfilename, 'rb')
f = open(self.options.freericedictfilename, 'rb')
self.ricewordlist = pickle.load(f)
f.close()
print "dict read successful"
Expand All @@ -101,16 +87,43 @@ def __init__(self, url):
self.ricewordlist = {}
else:
self.ricewordlist = {}

def ParseOptions(self):
'''Read command line options
'''
parser = optparse.OptionParser(
version=self.version.name.capitalize() + " version " +self.version.version + "\nProject homepage: " + self.version.url,
description="RiceMaker will automatically play the vocabulary game on freerice.com to generate rice donations. For a more detailed usage manual, see the project homepage: " + self.version.url,
formatter=optparse.TitledHelpFormatter(),
usage="python %prog [options]")
parser.add_option("-d", "--debug", action="store_true", dest="debug", help="Debug mode (print some extra debug output). [default: %default]")
parser.add_option("-w", "--wordnetpath", action="store", dest="wordnetpath", help="Full path to the WordNet commandline executable, if installed. On Linux, something like '/usr/bin/wn'; on Windows, something like 'C:\Program Files\WordNet\wn.exe'. [default: %default]")
parser.add_option("-l", "--sleeplowsec", action="store", type="float", dest="sleeplowsec", help="Lower bound on the random number of seconds to sleep between iterations. [default: %default]")
parser.add_option("-m", "--sleephighsec", action="store", type="float", dest="sleephighsec", help="Upper bound on the random number of seconds to sleep between iterations. [default: %default]")
parser.add_option("-f", "--freericedictfilename", action="store", dest="freericedictfilename", help="Filename for internally generated dictionary. You may specify a full path here, otherwise it will just get written to the same directory where this script resides (default behavior). No need to change this unless you really feel like it. [default: %default]")
parser.add_option("-i", "--iterationsbetweendumps", action="store", type="int", dest="iterationsbetweendumps", help="Number of iterations between dictionary dumps to file. More often than 5 minutes is really unnecessary (Time between dumps is iterationsbetweendumps * avgsleeptime = time between dumps.) [default: %default]")

parser.set_defaults(debug=False,
wordnetpath="/usr/bin/wn",
sleeplowsec=1,
sleephighsec=5,
freericedictfilename="freericewordlist.txt",
iterationsbetweendumps=100)

(self.options, args) = parser.parse_args()
if self.options.debug:
print "Your commandline options:\n", self.options

def __del__(self):
f = open(freericedictfilename, 'wb')
pickle.dump(self.ricewordlist, f, -1)
f.close()
print 'dump successful'
if self.options != None: #when running with -h option, optparse exits before doing anything, including initializing options...
f = open(self.options.freericedictfilename, 'wb')
pickle.dump(self.ricewordlist, f, -1)
f.close()
print 'dump successful'

def dbDump(self):
try:
f = open(freericedictfilename, 'wb')
f = open(self.options.freericedictfilename, 'wb')
pickle.dump(self.ricewordlist, f, -1)
f.close()
print 'dump successful'
Expand All @@ -124,10 +137,10 @@ def start(self):
while 1:
i = i+1
print "*************************"
if i % iterationsbetweendumps == 0:
if i % self.options.iterationsbetweendumps == 0:
self.dbDump()

time.sleep(random.randint(sleeplowsec,sleephighsec)) # let's wait - to not hammer the server, and to not appear too much like a bot
time.sleep(random.uniform(self.options.sleeplowsec,self.options.sleephighsec)) # let's wait - to not hammer the server, and to not appear too much like a bot

try: #to catch all exceptions and ignore them
mydiv = self.soup.findAll(attrs={'class':'wordSelection'})
Expand Down Expand Up @@ -211,7 +224,7 @@ def lookupInMyDict(self, targetword, wordlist):
return self.lookupInWordnet(targetword, wordlist)

def lookupInWordnet(self, targetword, wordlist):
if os.path.lexists(wordnetpath):
if os.path.lexists(self.options.wordnetpath):
executionstring = "wn '" + targetword + "' -synsn -synsv -synsa -synsr -hypen -hypev -hypon -hypov"
p = subprocess.Popen(executionstring, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
returncode = p.wait()
Expand Down

0 comments on commit d037474

Please sign in to comment.