Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

warctikad: Fix dirname problem

  • Loading branch information...
commit d8152ca57cb9c1fae45d79bf0c2ce8f9e99e3fa7 1 parent 2b9549d
@pmyteh authored
Showing with 7 additions and 5 deletions.
  1. +7 −5 warctikad.py
View
12 warctikad.py
@@ -34,7 +34,6 @@
# TODO: Check if necessary
#import requests
from StringIO import StringIO
-#from warc import WARCHeader, WARCRecord as WARCRecordBase, WARCFile
#####
@@ -262,7 +261,9 @@ def process_IN_MOVETO(self, event):
warcprocessor=WARCTikaProcessor()
oldsuffix = 'warc.gz'
newsuffix = '-ViaTika.warc.gz'
- handler = WARCNotifyHandler(warcprocessor)
+ handler = WARCNotifyHandler(warcprocessor=warcprocessor,
+ oldsuffix=oldsuffix,
+ newsuffix=newsuffix)
notifier = pyinotify.Notifier(wm, handler)
@@ -270,11 +271,12 @@ def process_IN_MOVETO(self, event):
# files, in case we restarted part-way through a crawl.
for fn in os.listdir(dirname):
if fn.endswith(oldsuffix) and not fn.endswith(newsuffix):
- print "Processing existing file:"+fn
+ print "Processing existing file:"+dirname+"/"+fn
# try:
+ print dirname
warcprocessor.process(
- infn=fn,
- outfn=re.sub(oldsuffix+'$', newsuffix, fn) )
+ infn=dirname+"/"+fn,
+ outfn=re.sub(oldsuffix+'$', newsuffix, dirname+"/"+fn) )
# except Exception as e:
# print ("Warning: Startup processor failed to process "+
# "file "+fn+": "+str(e)+str(e.args)+
Please sign in to comment.
Something went wrong with that request. Please try again.