Skip to content

Commit

Permalink
local index mode for developers
Browse files Browse the repository at this point in the history
to make it easier to test parser changes without beating up on okc's servers
  • Loading branch information
ckuethe committed Sep 18, 2015
1 parent 4f6833d commit 4d0d96b
Showing 1 changed file with 28 additions and 7 deletions.
35 changes: 28 additions & 7 deletions src/arrow_fetcher.py
Expand Up @@ -97,11 +97,14 @@ class ArrowFetcher:
# Perhaps get the time of the next message/reply (there should be at least one), and set the time based on it.
fallback_date = datetime(2000, 1, 1, 12, 0)

def __init__(self, username, thunderbird=False, debug=False):
def __init__(self, username, thunderbird=False, debug=False, indexfile=None):
self.username = username
self.thunderbird = thunderbird
self.debug = debug
self.thread_urls = []
self.indexfile = indexfile
if indexfile:
self.secure_base_url = 'https://localhost'

def _safely_soupify(self, f):
f = f.partition("function autocoreError")[0] + '</body></html>' # wtf okc with the weirdly encoded "</scr' + 'ipt>'"-type statements in your javascript
Expand All @@ -119,7 +122,10 @@ def queue_threads(self):
page = 0
while (page < 1 if self.debug else True):
logging.info("Queuing folder %s, page %s", folder, page)
f = self._request_read_sleep(self.secure_base_url + '/messages?folder=' + str(folder) + '&low=' + str((page * 30) + 1))
if self.indexfile:
f = urllib2.urlopen('file:'+self.indexfile).read()
else:
f = self._request_read_sleep(self.secure_base_url + '/messages?folder=' + str(folder) + '&low=' + str((page * 30) + 1))
soup = self._safely_soupify(f)
end_pattern = re.compile('&folder=\d\';')
threads = [
Expand Down Expand Up @@ -254,11 +260,12 @@ def _strip_tags(self, html, invalid_tags=['em', 'a', 'span', 'strong', 'div', 'p
return soup.encode_contents().decode('UTF-8')

class OkcupidState:
def __init__(self, username, filename, thunderbird, debug):
def __init__(self, username, filename, thunderbird, debug, indexfile):
self.username = username
self.filename = filename
self.thunderbird = thunderbird
self.debug = debug
self.indexfile = indexfile
self.cookie_jar = cookielib.CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
urllib2.install_opener(self.opener)
Expand All @@ -272,7 +279,8 @@ def fetch(self):
arrow_fetcher = ArrowFetcher(
self.username,
thunderbird=self.thunderbird,
debug=self.debug)
debug=self.debug,
indexfile=self.indexfile)
arrow_fetcher.queue_threads()
arrow_fetcher.dedupe_threads()
try:
Expand All @@ -292,6 +300,10 @@ def use_autologin(self, autologin):
logging.debug("Using autologin url: %s", autologin)
self._setOpenerUrl(autologin)

def use_indexfile(self, indexfile):
logging.debug("Using cached index file: %s", indexfile)
self._setOpenerUrl('file:'+indexfile)

def main():
usage = "okcmd -u your_username -p your_password -f 'message_output_file.txt'"
description = "OkCupid-Message-Downloader (OKCMD): a tool for downloading your sent and received OkCupid messages to a text file."
Expand All @@ -313,9 +325,16 @@ def main():
parser.add_option("-d", "--debug", dest="debug",
help="limit the number of threads fetched for debugging, and output raw HTML",
action='store_const', const=True, default=False)
parser.add_option("-i", "--index", dest="indexfile", default=None,
help="read the message index from html file, for developers. Implies --debug")
(options, args) = parser.parse_args()
options_ok = True
logging_format = '%(levelname)s: %(message)s'
if options.indexfile:
options.debug = True
options.username = 'staff_robot'
options.password = 'he@rtl3ss!'

if options.debug:
logging.basicConfig(format=logging_format, level=logging.DEBUG)
logging.debug("Debug mode turned on.")
Expand All @@ -336,10 +355,12 @@ def main():
if not options_ok:
logging.error("See 'okcmd --help' for all options.")
else:
state = OkcupidState(options.username, options.filename, options.thunderbird, options.debug)
if options.username and options.password:
state = OkcupidState(options.username, options.filename, options.thunderbird, options.debug, options.indexfile)
if options.indexfile:
state.use_indexfile(options.indexfile)
elif options.username and options.password:
state.use_password(options.password)
if options.autologin:
elif options.autologin:
state.use_autologin(options.autologin)
state.fetch()
logging.info("Done.")
Expand Down

0 comments on commit 4d0d96b

Please sign in to comment.