Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Branch: master
Fetching contributors…

Cannot retrieve contributors at this time

456 lines (399 sloc) 22.576 kB
#!/usr/bin/python
#################################################
##
## RiceMaker: Automate the gameplay on freerice.com
## Copyright (C) 2008 Daniel Folkinshteyn <dfolkins@temple.edu>
##
## http://smokyflavor.wikispaces.com/RiceMaker
##
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License
## as published by the Free Software Foundation; either version 3
## of the License, or (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
##
#################################################
#################################################
# requires python (http://www.python.org)
# requires BeautifulSoup module (http://www.crummy.com/software/BeautifulSoup/)
# run with command:
# python ricemaker.py
#################################################
# ricemaker.py
# Script to automatically generate rice on freerice.com
#
# Author: Daniel Folkinshteyn <nanotube@users.sourceforge.net>
#
# Project home (where to get the freshest version):
# http://wiki.df.dreamhosters.com/wiki/RiceMaker
#
#################################################
import urllib, urllib2
from BeautifulSoup import BeautifulSoup
import optparse
import subprocess
import re
import os.path
import random
import time
import traceback
import pickle
import threading
import Queue
class VersionInfo:
'''Version information storage
'''
def __init__(self):
self.name = "RiceMaker"
self.version = "0.5.3"
self.description = "Script to automatically generate rice on freerice.com"
self.url = "http://wiki.df.dreamhosters.com/wiki/RiceMaker"
self.license = "GPL"
self.author = "Daniel Folkinshteyn"
self.author_email = "nanotube@users.sourceforge.net"
self.platform = "Any"
class RiceMakerController:
'''This class spawns a number of RiceMaker threads.'''
def __init__(self):
self.version = VersionInfo()
self.options=None
self.ParseOptions()
self.session_filename="ricemakersession.txt"
self.readDictFile()
self.running_rice_total = 0
self.read_session_info()
self.ricecounter = 0
self.answer_value = self.get_answer_value(url='http://www.freerice.com/index.php')
self.queue = Queue.Queue(0)
self.queueitem = {}
self.threadlist = []
for i in range(self.options.threads):
self.threadlist.append(RiceMaker(url='http://www.freerice.com/index.php', options = self.options, wordlist = self.ricewordlist, queue=self.queue, threadnumber = i))
def get_answer_value(self, url):
response = urllib2.urlopen(urllib2.Request(url, headers={'User-Agent':self.options.useragent})) # spoof useragent
result = response.read()
soup = BeautifulSoup(result)
try:
text = soup.find(text=re.compile(r'1 right ='))
answer_value = re.search(r'=\s*(\d+)\s*grains', text).group(1)
return int(answer_value)
except:
print 'Failed to get answer value, using 10 as default...'
return 10 # if we can't find the grain value, just assume it's 10.
def start(self):
'''This is where we start the threads, and process the data queue'''
print "Starting threads..."
i = 0
for t in self.threadlist:
t.start()
print "started thread", i
i += 1
print "Started all threads!"
print "******************************************"
self.starttime = time.time() #our start time - will use this to figure out rice per second stats
try:
self.iterator = 0
while 1:
self.iterator += 1
try:
self.queueitem = self.queue.get(block=True, timeout=10)
self.ricecounter += int(self.queueitem['rice'])
print "iteration:", self.iterator
print "thread number:", self.queueitem['print']['threadnumber']
print "targetword:", self.queueitem['print']['targetword']
print "answer:", self.queueitem['print']['answer']
print "correct?", self.queueitem['print']['correct']
print "vocab level:", self.queueitem['print']['vocablevel']
print "total rice this session:", self.ricecounter
print "total rice all recorded sessions:", self.running_rice_total + self.ricecounter
print "percent correct this session:", str(round(self.ricecounter/self.answer_value/self.iterator*100.0, 2))+"%"
print "iterations per second", str(self.iterator/(time.time()-self.starttime)), ";", "rice per second", str(self.ricecounter/(time.time() - self.starttime))
print "******************************************"
for key in self.queueitem['dict'].keys():
self.ricewordlist[key] = self.queueitem['dict'][key]
if self.iterator % self.options.iterationsbetweendumps == 0:
self.dbDump()
self.write_session_info()
except Queue.Empty:
self.iterator -= 1
pass #empty queue, we will try again.
except: #catch everything
traceback.print_exc()
for t in self.threadlist:
t.cancel()
def __del__(self):
if self.options != None: #when running with -h option, optparse exits before doing anything, including initializing options...
f = open(self.options.freericedictfilename, 'wb')
pickle.dump(self.ricewordlist, f, -1)
f.close()
print 'Successfully wrote internal dictionary to file.', len(self.ricewordlist), "elements in dictionary."
f = open(self.session_filename, 'wb')
pickle.dump(self.running_rice_total + self.ricecounter, f, -1)
f.close()
print 'Successfully wrote session info file.'
print 'Rice this session:', self.ricecounter
print 'Rice all stored sessions:', self.running_rice_total + self.ricecounter
for t in self.threadlist:
t.cancel()
def readDictFile(self):
if os.path.lexists(self.options.freericedictfilename) and os.path.getsize(self.options.freericedictfilename) > 0:
try:
f = open(self.options.freericedictfilename, 'rb')
self.ricewordlist = pickle.load(f)
f.close()
print "dict read successful,", len(self.ricewordlist), "elements in dictionary"
except:
print "bad dict"
traceback.print_exc()
self.ricewordlist = {}
else:
self.ricewordlist = {}
def dbDump(self):
try:
f = open(self.options.freericedictfilename, 'wb')
pickle.dump(self.ricewordlist, f, -1)
f.close()
self.printDebug('dump successful')
except:
print 'bad dump'
traceback.print_exc()
pass # keep going, what else can we do?
def read_session_info(self):
if os.path.exists(self.session_filename) and os.path.getsize(self.session_filename) > 0:
try:
f = open(self.session_filename, 'rb')
self.running_rice_total = pickle.load(f)
f.close()
print "session read successful. current running rice total:", self.running_rice_total
except:
print "bad session info file"
traceback.print_exc()
self.running_rice_total = 0
else:
self.running_rice_total = 0
def write_session_info(self):
try:
f = open(self.session_filename, 'wb')
pickle.dump(self.running_rice_total + self.ricecounter, f, -1)
f.close()
print 'Session dump successful, total rice:', self.running_rice_total + self.ricecounter
except:
print 'bad session dump'
traceback.print_exc()
pass # keep going, what else can we do?
def ParseOptions(self):
'''Read command line options
'''
parser = optparse.OptionParser(
version=self.version.name.capitalize() + " version " +self.version.version + "\nProject homepage: " + self.version.url,
description="RiceMaker will automatically play the vocabulary game on freerice.com to generate rice donations. For a more detailed usage manual, see the project homepage: " + self.version.url,
formatter=optparse.TitledHelpFormatter(),
usage="python %prog [options]")
parser.add_option("-d", "--debug", action="store_true", dest="debug", help="Debug mode (print some extra debug output). [default: %default]")
parser.add_option("-s", "--savesession", action="store_true", dest="savesession", help="Save session in a file. This will store a running total of the rice generated by you, across multiple runs of ricemaker. [default: %default]")
parser.add_option("-w", "--wordnetpath", action="store", dest="wordnetpath", help="Full path to the WordNet commandline executable, if installed. On Linux, something like '/usr/bin/wn'; on Windows, something like 'C:\Program Files\WordNet\wn.exe'. [default: %default]")
parser.add_option("-u", "--useragent", action="store", dest="useragent", help="The User-Agent string to use for the HTTP requests. The default is the User-Agent from IE7 on WinXP SP2. [default: %default]")
parser.add_option("-l", "--sleeplowsec", action="store", type="float", dest="sleeplowsec", help="Lower bound on the random number of seconds to sleep between iterations. [default: %default]")
parser.add_option("-m", "--sleephighsec", action="store", type="float", dest="sleephighsec", help="Upper bound on the random number of seconds to sleep between iterations. [default: %default]")
parser.add_option("-f", "--freericedictfilename", action="store", dest="freericedictfilename", help="Filename for internally generated dictionary. You may specify a full path here, otherwise it will just get written to the same directory where this script resides (default behavior). No need to change this unless you really feel like it. [default: %default]")
parser.add_option("-i", "--iterationsbetweendumps", action="store", type="int", dest="iterationsbetweendumps", help="Number of iterations between dictionary dumps to file. More often than 5 minutes is really unnecessary (Time between dumps is iterationsbetweendumps * avgsleeptime = time between dumps.) [default: %default]")
parser.add_option("-t", "--threads", action="store", type="int", dest="threads", help="Number of simultaneous threads of RiceMaker to start. Stick with 1 to reduce probability of being filtered out as a bot. [default: %default]")
parser.add_option("-b", "--benchmark", type="choice", action="append", dest="benchmark", choices=['dict.org','wordnet', 'idict'], help="For benchmarking or dictionary building purposes: do you want to skip dict.org lookups and/or wordnet and/or internal dictionary lookups ('dict.org' to skip dict.org, 'wordnet' to skip wordnet, 'idict' to skip internal dictionary). [default: %default]")
parser.set_defaults(debug=False,
savesession=True,
wordnetpath="/usr/bin/wn",
useragent="Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
sleeplowsec=3,
sleephighsec=6,
freericedictfilename="freericewordlist.txt",
iterationsbetweendumps=1000,
threads=1,
benchmark=[])
(self.options, args) = parser.parse_args()
self.printDebug("Your commandline options:\n", self.options)
def printDebug(self, *args):
if self.options.debug:
for arg in args:
print arg,
print
class RiceMaker(threading.Thread):
def __init__(self, url, options, wordlist, queue, threadnumber):
threading.Thread.__init__(self)
self.options=options
self.url = url
self.finished = threading.Event()
self.ricecounter = [0,0,0] #[total, previous iteration value, current iteration value]
self.ricewordlist = wordlist
self.queue = queue
self.queueitem = {'print':{}, 'dict':{}, 'rice':0}
self.threadnumber = threadnumber
response = urllib2.urlopen(urllib2.Request(self.url, headers={'User-Agent':self.options.useragent})) # spoof useragent
result = response.read()
self.soup = BeautifulSoup(result)
def cancel(self):
"""Stop the thread"""
self.finished.set()
def run(self):
self.iterator = 0
while not self.finished.isSet():
self.iterator = self.iterator+1
time.sleep(random.uniform(self.options.sleeplowsec,self.options.sleephighsec)) # let's wait - to not hammer the server, and to not appear too much like a bot
self.queueitem = {'print':{'threadnumber':self.threadnumber}, 'dict':{}, 'rice':0}
try: #to catch all exceptions and ignore them
mydiv = self.soup.findAll(attrs={'class':'questionDisplayChoices'})
myol = mydiv[0].ol
targetword = re.sub("&#8217;", "'", str(myol.li.strong.string)).strip()
self.queueitem['print']['targetword'] = targetword
itemlist = myol.findAll('li')
self.wordlist={}
for li in itemlist[1:5]:
## format: 'word' = ' 1 '
word = re.sub("&#8217;", "'", str(li.a.string))
word = re.sub("&nbsp;", "", str(li.a.string)).strip()
self.wordlist[word] = str(li.noscript.input['value']).strip()
self.match = self.lookupWord(targetword,self.wordlist)
#self.postdict = {'PAST':'','INFO':'','INFO2':''}
self.postdict = {'PAST':'','INFO':'','INFO2':'', 'INFO3':''}
for key in self.postdict.keys():
self.postdict[key] = self.soup.find("input",{'name':key})['value']
try:
self.postdict['SELECTED'] = self.wordlist[re.sub("&#8217;", "'", self.match)].strip()
except KeyError:
print "Match found but not present in possible answers. Using random."
answer = wordlist.keys()[random.randint(0,3)]
self.queueitem['print']['answer'] = answer+" (source: random)"
self.postdict['SELECTED'] = self.wordlist[answer].strip()
response = urllib2.urlopen(urllib2.Request(self.url, data=urllib.urlencode(self.postdict), headers={'User-Agent':self.options.useragent}))
result = response.read()
self.soup = BeautifulSoup(result)
# get rice donation amount (take care of possible loopback at 100k grains
divstr = str(self.soup.findAll(id='donatedAmount')[0])
if self.options.threads == 1:
print divstr
divmatch = re.search('([0-9]+)',divstr)
if divmatch != None:
self.ricecounter[1] = self.ricecounter[2]
self.ricecounter[2] = int(divmatch.group(1))
if self.ricecounter[2] - self.ricecounter[1] >= 0:
self.queueitem['rice'] = self.ricecounter[2] - self.ricecounter[1]
else:
self.queueitem['rice'] = self.ricecounter[2]
# get vocab level
#divstr = str(self.soup.findAll(attrs={'class':'vocabLevel'})[0])
try:
leveldiv = self.soup.findAll(id='questionDisplayBottomMenuBar')[0]
self.printDebug("level div found")
levelpara = leveldiv.findAll(text=re.compile(r'Level[^0-9]*[0-9]+\s*of\s*[0-9]+'))[0]
self.printDebug("level para found")
leveltext = unicode(levelpara.string)
self.printDebug('level text:', leveltext)
levelmatch = re.search(r'([0-9]+)\s*of',leveltext)
#if int(divmatch.group(1)) > 50:
#vocablevel = 0
vocablevel = int(levelmatch.group(1))
print 'vocab level', vocablevel
except IndexError:
self.printDebug('no level info found, setting vocab level to 0')
vocablevel = 0
self.queueitem['print']['vocablevel'] = vocablevel
self.createDict(targetword,self.wordlist)
self.queue.put(self.queueitem)
except KeyboardInterrupt:
raise
except:
print "Exception in main loop!"
traceback.print_exc()
print "##########################"
print self.soup
print "##########################"
response = urllib2.urlopen(urllib2.Request(self.url, data=urllib.urlencode(self.postdict), headers={'User-Agent':self.options.useragent}))
result = response.read()
self.soup = BeautifulSoup(result)
# just keep going, don't care...
pass
self.finished.set()
def createDict(self, targetword, wordlist):
'''find if our new soup says our previous match was correct
if so, add to dict, if not, parse their answer and add that to dict
dict format: target: match'''
answer = self.soup.findAll(id='correct')
if len(answer) != 0:
target, match = targetword, self.match
self.queueitem['print']['correct'] = "True"
else:
answer = self.soup.findAll(id='incorrect')[0].string
target, match = answer.split(' = ')
self.queueitem['print']['correct'] = "False"
print "Correct answer is:", match
self.queueitem['dict'][str(target).strip()] = str(match).strip()
def lookupWord(self, targetword, wordlist):
self.printDebug('answer choices:', wordlist)
try:
return self.lookupInMyDict(targetword,wordlist)
except KeyboardInterrupt:
raise
except:
print "Exception in lookupWord"
traceback.print_exc()
return wordlist.keys()[random.randint(0,3)]
def lookupInMyDict(self, targetword, wordlist):
if 'idict' not in self.options.benchmark:
try:
word = self.ricewordlist[targetword]
self.printDebug("internal dict match found!!!")
self.queueitem['print']['answer'] = word+" (source: internal dictionary)"
return word
except KeyError: #not in our dict
self.printDebug("no internal dict match found, trying wordnet")
return self.lookupInWordnet(targetword, wordlist)
else:
return self.lookupInWordnet(targetword, wordlist)
def lookupInWordnet(self, targetword, wordlist):
if os.path.lexists(self.options.wordnetpath) and ('wordnet' not in self.options.benchmark):
executionstring = "wn '" + targetword + "' -synsn -synsv -synsa -synsr -hypen -hypev -hypon -hypov"
p = subprocess.Popen(executionstring, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
returncode = p.wait()
result = p.stdout.read()
for word in wordlist.keys():
if re.search(word, result):
self.printDebug("wn match found!")
self.queueitem['print']['answer'] = word+" (source: wordnet)"
return word
else:
self.printDebug("no wn match found, looking in dict.org")
return self.lookupInDictorg(targetword, wordlist)
else:
return self.lookupInDictorg(targetword, wordlist)
def lookupInDictorg(self, targetword, wordlist):
if 'dict.org' not in self.options.benchmark:
response = urllib2.urlopen('http://www.dict.org/bin/Dict', data=urllib.urlencode({'Query':targetword, 'Form':'Dict1', 'Strategy':'*', 'Database':'*'}))
result = response.read()
for word in wordlist.keys():
if re.search(word, result):
self.printDebug("dict.org match found!")
self.queueitem['print']['answer'] = word+" (source: dict.org)"
return word
else:
self.printDebug("no dict.org match found, returning random.")
answer = wordlist.keys()[random.randint(0,3)]
self.queueitem['print']['answer'] = answer+" (source: random)"
return answer
else:
answer = wordlist.keys()[random.randint(0,3)]
self.queueitem['print']['answer'] = answer+" (source: random)"
return answer
def printDebug(self, *args):
if self.options.debug:
for arg in args:
print arg,
print
if __name__ == '__main__':
rmc = RiceMakerController()
rmc.start()
Jump to Line
Something went wrong with that request. Please try again.