-
Notifications
You must be signed in to change notification settings - Fork 0
/
filesorter.py
executable file
·441 lines (366 loc) · 16 KB
/
filesorter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
#!/opt/bin/python
"""Post-process script to be called by Transmission upon finishing
a download (script-torrent-done-filename setting).
Will unpack archives, and tries to sort TV shows within a directory
structure, the files properly renamed.
When using transmission <2.2, you need to wrap this in a shell script
which does "{ ./this.py } &", to prevent Transmission from blocking,
see: https://forum.transmissionbt.com/viewtopic.php?f=3&t=11397
Easiest way to test this when debugging is calling it like so:
TR_TORRENT_DIR=foo python torrent-postprocess.py
With foo being a directory containing a custom archive file that contains
a (maybe fake) video file.
"""
import sys
import os
import re
from os import path
from collections import namedtuple
import logging
from subprocess import Popen, PIPE
import urllib, urllib2
import base64
from parser import NameParser, InvalidNameException
log = logging.getLogger('torrent-postprocess')
config = {
#'logfile': '/tmp/torrent.log',
#'tv-dir': '',
#'manual-dir': '', # for videos not processible as a TV episode
'dryrun': False,
'log-level': logging.INFO,
#'prowl-key': '',
#'nma-key': '',
#'unrar-bin': '', # can be useful for local dev or non Synology sytems
}
try:
config.update(__import__('config').config)
except ImportError:
print "Cannot import config.py"
def get_files(directory):
"""Returns, recursively, all the files in the given directory.
"""
for (top, dirs, files) in os.walk(directory):
for f in files:
yield path.join(top, f)
def is_video(filename):
return path.splitext(filename)[1] in ('.mkv', '.avi', '.mov', '.wmv', '.mp4')
def first_part_only(filename):
"""Filter to return only those archives which represent a "first"
part in a multi volume series. This is necessary because sometimes,
multipart RAR archive use "partXX.rar" rather than ".rXX" extensions
(according to Wikipedia, "partXX.rar" is the new approach with RAR3).
"""
m = re.search(r'part(\d+)\.rar$', filename)
if not m: # Not one of the multipart archives we need to filter
return True
return int(m.groups()[0]) == 1
def unpack(torrent_path):
"""Unpack the torrent as necessary, yield a list of movie files
we need to process.
"""
if path.isfile(torrent_path):
log.error('Torrent "%s" is a file, this is currently not supported',
torrent_path)
else:
all_files = list(get_files(torrent_path))
# Unpack all archives
rar_files = filter(lambda x: path.splitext(x)[1] == '.rar', all_files)
rar_files = filter(first_part_only, rar_files)
# If there are no archives, look for videos in original set
if not rar_files:
log.info('No archives found, assume video files are contained directly')
for file in all_files:
if is_video(file):
yield file
return
for archive in rar_files:
log.info('Unpacking archive: %s' % archive)
p = Popen([config.get('unrar-bin', '/usr/syno/bin/unrar'), 'x', '-y', archive, torrent_path],
stdout=PIPE, stderr=PIPE)
p.wait()
if p.returncode != 0:
log.error('Failed to unpack "%s":\n%s\n%s' % (
archive, p.stderr.read(), p.stdout.read()))
continue
# Find the files that are new in this archive
updated_all_files = list(get_files(torrent_path))
added_files = set(updated_all_files) - set(all_files)
all_files = updated_all_files
log.debug('Unpack yielded %d new files', len(added_files))
# Process all files we unpacked
for file in added_files:
print added_files, file
if is_video(file):
yield file
else:
log.info('Ignoring unpacked file "%s"', file)
def parse_tv_filename(filename):
"""Wrapper around NameParser which does a couple extra things we
want.
"""
try:
# It might seem it is more reliable to parse only the actual filename;
# the directory sometimes contains torrent site tags, stuff that
# NameParser isn't good at handling, and those then can get preference.
show = NameParser().parse(path.basename(filename))
if not show.series_name:
# Sometimes the parser gives us name-less shows.
raise InvalidNameException('No series name')
except InvalidNameException, e:
log.debug('Failed to parse "%s": %s', filename, e, exc_info=1)
return False
else:
# Deal with filenames that contain the show year at the end.
# NameParser may give us for example "Castle 2009". We want
# to remove that part for our sorting purposes, but we do
# need it for TVdb, so store both values.
# This process is very simple right now and assumes we have
# no shows that need the year for unique identification.
show.series_name_with_year = show.series_name
show.series_name = re.sub(r'[ (]\d\d\d\d\)?$', '', show.series_name).strip()
return show
def find_tv_episode_target_filename(show, file):
"""Finds target folder and builds filename for this episode.
Returns both as a 2-tuple.
"""
# Find a fitting target folder.
log.debug('Trying to find target folder within %s', config['tv-dir'])
for top, _, files in os.walk(config['tv-dir']):
for candidate in files:
candidate_path = path.join(top, candidate)
log.debug('Considering %s', candidate)
n = parse_tv_filename(candidate_path)
if not n:
log.debug('Failed to parse %s', candidate)
else:
if n.series_name.lower() != show.series_name.lower():
log.debug('Candidate series "%s" does not match',
n.series_name)
continue
log.debug('Candidate "%s" matches', candidate)
# We have our match! Now determine the target folder.
parent_folder = path.basename(path.dirname(candidate_path))
if not parent_folder.startswith('Season'):
log.debug('No "Season" structure detected, assuming single folder')
single_folder = True
series_folder = path.join(path.dirname(candidate_path))
else:
single_folder = False
series_folder = path.join(path.dirname(candidate_path), '..')
# If a .metadatad file exists, read it. It contains the ID to
# look for. This is neccessary (rarely) in case a show does
# not yield the correct Tvdb result through a name search.
id_file = path.join(series_folder, '.metadatad')
tvdb_id = None
if path.isfile(id_file):
try:
tvdb_id = int(open(id_file, 'r').read())
log.debug('Found fixed TVDB id: "%s"', tvdb_id)
except ValueError:
pass
# Try to get the episode title via TVdb
tbdb_show = None
if len(show.episode_numbers) <= 1:
# We do not support titels for multi-episode file for now
try:
import tvdb_api
except ImportError:
log.debug('tvdb_api module not available, not getting title')
else:
log.debug('Trying to find show info via thetvdb.com')
db = tvdb_api.Tvdb()
try:
tbdb_series = \
db[tvdb_id] if tvdb_id else db[show.series_name_with_year]
if show.air_by_date:
tbdb_show = tbdb_series.airedOn(show.air_date)[0]
else:
tbdb_show = tbdb_series[show.season_number]\
[show.episode_numbers[0]]
log.info('Found episode on tvdb')
except (tvdb_api.tvdb_error, tvdb_api.tvdb_shownotfound, tvdb_api.tvdb_seasonnotfound, tvdb_api.tvdb_episodenotfound), e:
log.warning('Unable to find episode on tvdb: %s', e)
# Put together the full episode name that we will also use in
# the notification. Use the candidate's series_name, the casing
# is more likely to be what we want it to!
episode_full_name = '%s' % n.series_name
if show.air_by_date:
episode_full_name += ' - %s' % (tbdb_show['firstaired'] if tbdb_show else str(show.air_date))
else:
episode_full_name += ' - %sx%s' % (
show.season_number,
"+".join(map(lambda n: "%.2d" % n, show.episode_numbers)))
if tbdb_show:
episode_full_name = "%s - %s" % (episode_full_name, tbdb_show['episodename'])
# Determine target path
parts = [series_folder]
if not single_folder:
parts.extend(['Season %d' % show.season_number])
parts.append('%s%s' % (episode_full_name, path.splitext(file)[1]))
target_file = path.normpath(path.join(*parts))
return target_file, episode_full_name
else:
log.error('No fitting target folder found for %s' % file)
return None, None
def process_video(file):
"""Rename video file and move to target folder.
Returns a 2-tuple (episode title, final file location). The first
value will only be set if an episode was parsed and handled correctly.
If the file was only moved to a location intended for files requiring
manual intervention, then the first tuple element will be None.
"""
log.info('Trying to process video file "%s"', file)
show = parse_tv_filename(file)
# Determine where to move the video.
target_file = episode_title = None
if show:
log.info('Determined to be TV show: %s, S=%s, E=%s',
show.series_name, show.season_number, show.episode_numbers)
target_file, episode_title = find_tv_episode_target_filename(show, file)
else:
log.warning('Filename doesn\'t look like a TV show: %s', file)
# If we do not find a proper place for the video file, move it
# none the less to a target folder, where it might easier to copy
# from and manually deal with.
if not target_file and "manual-dir" in config:
filename = path.basename(file)
folder = path.basename(path.dirname(file))
target_file = path.join(config['manual-dir'], folder, filename)
if not target_file:
return None, None
# Actually move the file
log.info('Moving "%s" to "%s"', file, target_file)
if not config['dryrun']:
directory = path.dirname(target_file)
if not path.exists(directory):
os.makedirs(directory)
os.rename(file, target_file.encode('utf-8'))
return (episode_title, target_file)
def clear():
"""Delete torrents right away, except if they are from our
private tracker, which requires us to seed. Only trash data
if we were able to move all files, of course.
"""
#if funfile and not force:
# return
#
#getridofit()
def clear_old():
"""Clear torrents from our private trackers after we seeded
for a long enough time.
"""
"""for all torrents in torrents:
if ended:
clear:
pass"""
def send_notification(subject, message):
log.info('Sending message: %s - %s', subject, message)
if not config['dryrun']:
app_name = 'Your Dollhouse Handler'
# send via prowl
if 'prowl-key' in config:
url = 'https://api.prowlapp.com/publicapi/add'
post = {
'apikey': config['prowl-key'],
'application': app_name,
'event': subject,
'description': message,
}
try:
r = urllib2.urlopen(url, urllib.urlencode(post))
r.read()
r.close()
except IOError, e:
log.error('Cannot send Prowl notification: %s', e)
# send via notify
if 'nma-key' in config:
url = 'https://www.notifymyandroid.com/publicapi/notify'
post = {
'apikey': config['nma-key'],
'application': app_name,
'event': subject,
'description': message,
}
request = urllib2.Request(url, urllib.urlencode(post))
try:
r = urllib2.urlopen(request)
r.read()
r.close()
except IOError:
log.error('Cannot send NMA notification: %s', e)
# Sucessfully identified and processed file.
ProcessedFile = namedtuple('ProcessedFile', 'title, filename')
def process_path(fspath):
# Process the torrent
log.info('Asked to process path "%s"' % fspath)
tvepisodes, other_videos, failed = [], [], []
files = unpack(fspath)
for file in files:
episode, filename = process_video(file)
if episode:
tvepisodes.append(ProcessedFile(episode, filename))
elif filename:
other_videos.append(path.basename(filename))
else:
failed.append(file)
# Delete empty directories
if path.isdir(fspath) and os.listdir(fspath) == []:
os.rmdir(fspath)
# TODO
#clear()
#clear_old()
return tvepisodes, other_videos, failed
def main():
# XXX Need to support a non-move mode for torrents that are not compressed.
# Process the torrent
torrent = dict(**{
'id': os.environ.get('TR_TORRENT_ID'),
'name': os.environ.get('TR_TORRENT_NAME'),
'dir': os.environ.get('TR_TORRENT_DIR'),
'hash': os.environ.get('TR_TORRENT_HASH'),
})
torrent_path = path.join(torrent['dir'], torrent['name'] or '')
# Handle errors that are outright failures, no notification at
# all are sent in those cases.
if not torrent_path:
log.error("No torrent given via environment. This script "+
"is to be called by Transmission.")
return 1
if not path.exists(torrent_path):
log.error('Given torrent path "%s" does not exist' % torrent_path)
return 1
tvepisodes, other_videos, failed = process_path(torrent_path)
if not tvepisodes and not other_videos:
if not failed:
# Extra message when we cannot even find a single video to process
log.info('No video files found')
send_notification('Download complete', torrent['name'])
else:
if tvepisodes and other_videos:
subject = 'New Episodes/Videos'
elif tvepisodes:
subject = 'New Episodes' if len(tvepisodes)+len(failed)>1 else 'New Episode'
elif other_videos:
subject = 'New Videos' if len(other_videos)+len(failed)>1 else 'New Video'
message = ", ".join(tvepisodes+other_videos)
if failed:
message += " and %d unknown" % len(failed)
send_notification(subject, message)
def main_wrapper(*a, **kw):
"""Because the Synology Cron doesn't send stdout/stderr messages
per mail, make sure we log every failure.
"""
# Configure logging
log.setLevel(config['log-level'])
if config.get('logfile'):
h = logging.FileHandler(config.get('logfile'))
h.setFormatter(logging.Formatter(
"%(asctime)s - %(levelname)s - %(message)s"))
log.addHandler(h)
log.addHandler(logging.StreamHandler())
try:
return main(*a, **kw)
except Exception, e:
log.exception(e)
if __name__ == '__main__':
sys.exit(main_wrapper() or 0)