From 80237204766f8d24e9d45570c226af0980baf669 Mon Sep 17 00:00:00 2001 From: Andrew Baxter Date: Sat, 20 Dec 2014 15:20:13 +0900 Subject: [PATCH] Sanitize version names when creating egg paths. Branches that contain invalid filesystem characters (or special filesystem characters) would have them passed as such. This patch replaces non alphanumeric + [-_] characters with underscores. As an example, a spider deployed from git branch "feature/x_y" would attempt to store the egg in a nonexistent directory called "feature" and fail. --- scrapyd/eggstorage.py | 4 +++- scrapyd/tests/test_eggstorage.py | 30 +++++++++++++++++++++--------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/scrapyd/eggstorage.py b/scrapyd/eggstorage.py index 552b1917..86612971 100644 --- a/scrapyd/eggstorage.py +++ b/scrapyd/eggstorage.py @@ -1,3 +1,4 @@ +import re from glob import glob from os import path, makedirs, remove from shutil import copyfileobj, rmtree @@ -45,5 +46,6 @@ def delete(self, project, version=None): self.delete(project) def _eggpath(self, project, version): - x = path.join(self.basedir, project, "%s.egg" % version) + sanitized_version = re.sub(r'[^a-zA-Z0-9_-]', '_', version) + x = path.join(self.basedir, project, "%s.egg" % sanitized_version) return x diff --git a/scrapyd/tests/test_eggstorage.py b/scrapyd/tests/test_eggstorage.py index 311f32fe..eb5fbd11 100644 --- a/scrapyd/tests/test_eggstorage.py +++ b/scrapyd/tests/test_eggstorage.py @@ -20,24 +20,36 @@ def test_interface(self): def test_put_get_list_delete(self): self.eggst.put(StringIO("egg01"), 'mybot', '01') - self.eggst.put(StringIO("egg03"), 'mybot', '03') - self.eggst.put(StringIO("egg02"), 'mybot', '02') - - self.assertEqual(self.eggst.list('mybot'), ['01', '02', '03']) + self.eggst.put(StringIO("egg03"), 'mybot', '03/ver') + self.eggst.put(StringIO("egg02"), 'mybot', '02_my branch') + + self.assertEqual(self.eggst.list('mybot'), [ + '01', + '02_my_branch', + '03_ver' + ]) self.assertEqual(self.eggst.list('mybot2'), []) v, f = self.eggst.get('mybot') - self.assertEqual(v, "03") + self.assertEqual(v, "03_ver") self.assertEqual(f.read(), "egg03") f.close() - v, f = self.eggst.get('mybot', '02') - self.assertEqual(v, "02") + v, f = self.eggst.get('mybot', '02_my branch') + self.assertEqual(v, "02_my branch") + self.assertEqual(f.read(), "egg02") + f.close() + + v, f = self.eggst.get('mybot', '02_my_branch') + self.assertEqual(v, "02_my_branch") self.assertEqual(f.read(), "egg02") f.close() - self.eggst.delete('mybot', '02') - self.assertEqual(self.eggst.list('mybot'), ['01', '03']) + self.eggst.delete('mybot', '02_my branch') + self.assertEqual(self.eggst.list('mybot'), ['01', '03_ver']) + + self.eggst.delete('mybot', '03_ver') + self.assertEqual(self.eggst.list('mybot'), ['01']) self.eggst.delete('mybot') self.assertEqual(self.eggst.list('mybot'), [])