diff --git a/data/processed/json/dfa.json b/issues/adandoned/json/dfa.json similarity index 100% rename from data/processed/json/dfa.json rename to issues/adandoned/json/dfa.json diff --git a/data/processed/json/dfm.json b/issues/adandoned/json/dfm.json similarity index 100% rename from data/processed/json/dfm.json rename to issues/adandoned/json/dfm.json diff --git a/data/processed/json/dfq.json b/issues/adandoned/json/dfq.json similarity index 100% rename from data/processed/json/dfq.json rename to issues/adandoned/json/dfq.json diff --git a/src/csv2df/helpers.py b/src/csv2df/helpers.py index aa88650..426d946 100644 --- a/src/csv2df/helpers.py +++ b/src/csv2df/helpers.py @@ -32,7 +32,7 @@ \\... """ -import shutil + from locations.folder import FolderBase, md __all__ = ['PathHelper', 'DateHelper'] @@ -87,6 +87,7 @@ def locate_csv(year: int=None, month: int=None): Returns: pathlib.Path() instance """ + year, month = DateHelper.filter_date(year, month) folder = Folder(year, month).get_interim_folder() csv_path = folder / "tab.csv" if csv_path.exists() and csv_path.stat().st_size > 0: diff --git a/src/csv2df/runner.py b/src/csv2df/runner.py index d9aab94..4b66196 100644 --- a/src/csv2df/runner.py +++ b/src/csv2df/runner.py @@ -73,11 +73,13 @@ def save(self): self.dfq.to_csv(folder_path / 'dfq.csv') self.dfm.to_csv(folder_path / 'dfm.csv') print("Saved dataframes to", folder_path) + return True def validate(self): checker = Validator(self.dfa, self.dfq, self.dfm) checker.run() print("Test values parsed OK for", self) + return True def __repr__(self): return "Vintage({}, {})".format(self.year, self.month) diff --git a/src/csv2df/tests/test_helpers.py b/src/csv2df/tests/test_helpers.py index 40c8ebe..71c7735 100644 --- a/src/csv2df/tests/test_helpers.py +++ b/src/csv2df/tests/test_helpers.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import pytest -from csv2df.helpers import PathHelper, DateHelper, Folder - +from csv2df.helpers import PathHelper, DateHelper year, month = DateHelper.get_latest_date() @@ -41,37 +40,10 @@ def test_get_latest_date(self): assert month <= 12 -# more testing, 'private' part - -class Test_Folder(): - def test_repr(self): - assert repr(Folder(2015, 5)) - - def test_get_folder_methods(self): - assert Folder(2015, 5).get_processed_folder().exists() - assert Folder(2015, 5).get_interim_folder().exists() - - def test_out_of_range_year_raises_error(self): - with pytest.raises(ValueError): - Folder(2030, 1) - -# skipping - - -@pytest.mark.skip(reason="not testing maintenance scripts yet") -def test_md(folder): - assert False - @pytest.mark.skip(reason="not testing maintenance scripts yet") def test_init_dirs(): assert False - -@pytest.mark.skip(reason="not testing maintenance scripts yet") -def test_copy_latest(): - assert False - - if __name__ == "__main__": pytest.main([__file__]) diff --git a/src/download/download.py b/src/download/download.py index c237308..cca8f20 100644 --- a/src/download/download.py +++ b/src/download/download.py @@ -67,17 +67,18 @@ def download(self, force = False): download(self.url, self.local_path) if os.path.exists(self.local_path): print('Downloaded', self.local_path) - return 0 + return True else: - return 1 + print('Already downloaded', self.local_path) + return False def unrar(self): res = unrar(self.local_path, self.folder) if res == 0: print('UnRARed', self.local_path) - return res + return True else: - return 1 + return False def clean(self): for file in os.listdir(self.folder): diff --git a/src/locations/folder.py b/src/locations/folder.py index 1ec92a1..e85b52c 100644 --- a/src/locations/folder.py +++ b/src/locations/folder.py @@ -47,19 +47,24 @@ def get_latest_date(cls): root = cls.interim def max_subfolder(folder): - subfolders = [f.name for f in folder.iterdir() if f.is_dir()] - return int(max(subfolders)) + + subfolders = [f.name for f in folder.iterdir() if f.is_dir()] + return max(map(int, subfolders)) year = max_subfolder(root) - _subfolder = root / str(year) - month = max_subfolder(_subfolder) + subfolder = root / str(year) + month = max_subfolder(subfolder) return year, month + def make_dirs(self): + for folder in [self.get_raw_folder(), + self.get_interim_folder(), + self.get_processed_folder()]: + md(folder) + def _local_folder(self, parent_folder): folder = parent_folder / str(self.year) / str(self.month).zfill(2) - if not folder.exists(): - md(folder) return folder def get_raw_folder(self): @@ -70,20 +75,21 @@ def get_interim_folder(self): def get_interim_csv(self): return self.get_interim_folder() / CSV_FILENAME + + def get_processed_folder(self): + return self._local_folder(self.processed) def copy_tab_csv(self): src = self.get_raw_folder() / CSV_FILENAME dst = self.get_interim_folder() / CSV_FILENAME from shutil import copyfile - copyfile(src, dst) + copyfile(src, dst) + return True @staticmethod def is_valid_filepath(path): return bool(path.exists() and path.stat().st_size > 0) - def get_processed_folder(self): - return self._local_folder(self.processed) - def __repr__(self): return "FolderBase({}, {})".format(self.year, self.month) @@ -105,5 +111,6 @@ def copy_latest(): dst = dst_folder / src.name shutil.copyfile(src, dst) copied.append(dst) + print("Latest date is", year, month) print("Updated folder", FolderBase.latest) return copied diff --git a/src/locations/test_folder.py b/src/locations/test_folder.py new file mode 100644 index 0000000..4cafcae --- /dev/null +++ b/src/locations/test_folder.py @@ -0,0 +1,32 @@ +import pytest + +from locations.folder import FolderBase as Folder + +class Test_Folder(): + def test_repr(self): + assert repr(Folder(2015, 5)) + + def test_get_folder_methods(self): + assert Folder(2015, 5).get_processed_folder().exists() + assert Folder(2015, 5).get_interim_folder().exists() + assert Folder(2015, 5).get_raw_folder().exists() + assert Folder(2015, 5).get_interim_csv().exists() + + def test_out_of_range_year_does_not_raises_error(self): + #with pytest.raises(ValueError): + Folder(2030, 1) + +# skipping + +@pytest.mark.skip(reason="not testing maintenance scripts yet") +def test_md(folder): + assert False + +@pytest.mark.skip(reason="not testing maintenance scripts yet") +def test_copy_latest(): + assert False + + +if __name__ == "__main__": + pytest.main([__file__]) + diff --git a/src/word2csv/word.py b/src/word2csv/word.py index 2512e67..eeebffa 100644 --- a/src/word2csv/word.py +++ b/src/word2csv/word.py @@ -199,6 +199,7 @@ def make_interim_csv(year, month): from locations.folder import FolderBase folder = FolderBase(year, month).get_raw_folder() folder_to_csv(folder) + return True if __name__ == "__main__": #from run_word import get_word_folder diff --git a/tasks.py b/tasks.py index 03b5cde..920307a 100644 --- a/tasks.py +++ b/tasks.py @@ -123,33 +123,38 @@ def add(ctx, year, month): _add(year, month) def _add(year, month): - year, month = int(year), int(month) src = str(Path(__file__).parent / 'src') sys.path.insert(0, src) - #download, unpack from locations.folder import FolderBase + + #download, unpack from download.download import RemoteFile + year, month = int(year), int(month) rf = RemoteFile(year, month) - rf.download() - rf.unrar() - interim_csv = FolderBase(year, month).get_interim_csv() + assert rf.download() + assert rf.unrar() + #make interim csv + interim_csv = FolderBase(year, month).get_interim_csv() if not os.path.exists(interim_csv): from word2csv.word import make_interim_csv - make_interim_csv(year, month) - FolderBase(year, month).copy_tab_csv() + assert make_interim_csv(year, month) + assert FolderBase(year, month).copy_tab_csv() + #parse, validate, save from csv2df.runner import Vintage vint = Vintage(year, month) - vint.validate() - vint.save() + assert vint.validate() + assert vint.save() + #copy to latest - from locations.folder import copy_latest - copy_latest() + if FolderBase.get_latest_date() == (year, month): + from locations.folder import copy_latest + copy_latest() + + # see for context manager https://stackoverflow.com/questions/17211078/how-to-temporarily-modify-sys-path-in-python sys.path.remove(src) - # see for context manager: - # https://stackoverflow.com/questions/17211078/how-to-temporarily-modify-sys-path-in-python - + ns = Collection() for t in [clean, pep8, ls, cov, test, doc, rst, github, lint, add]: @@ -163,7 +168,7 @@ def _add(year, month): if __name__ == '__main__': - _add(2017, 5) + _add(2017, 6) ########################################################################## # GLOBALS #