diff --git a/file_metadata/generic_file.py b/file_metadata/generic_file.py new file mode 100644 index 0000000..7ce6414 --- /dev/null +++ b/file_metadata/generic_file.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- + +from __future__ import (division, absolute_import, unicode_literals, + print_function) + +from file_metadata.utilities import PropertyCached + + +class GenericFile: + """ + Object corresponding to a single file. An abstract class that can be + used for any mimetype/media-type (depending of the file itself). Provides + helper functions to open files, and analyze basic data common to all + types of files. + + Any class that inherits from this abstract class would probably want to + set the ``mimetypes`` and override the ``analyze()`` or write their + own ``analyze_*()`` methods depending on the file type and analysis + routines that should be run. + + :ivar mimetypes: Set of mimetypes (strings) applicable to this class + based on the official standard by IANA. + """ + mimetypes = () + + def __init__(self, fname): + self.filename = fname + + def analyze(self, prefix='analyze_', suffix='', methods=None): + """ + Analyze the given file and create metadata information appropriately. + Search and use all methods that have a name starting with + ``analyze_*`` and merge the doctionaries using ``.update()`` + to get the cumulative set of metadata. + + :param prefix: Use only methods that have this prefix. + :param suffix: Use only methods that have this suffix. + :param methods: A list of method names to choose from. If not given, + a sorted list of all methods from the class is used. + :return: A dict containing the cumulative metadata. + """ + data = {} + methods = methods or sorted(dir(self)) + for method in methods: + if method.startswith(prefix) and method.endswith(suffix): + data.update(getattr(self, method)()) + return data + + @PropertyCached + def metadata(self): + """ + A python dictionary of all the metadata identified by analyzing + the given file. This property is read-only and cannot be modified. + + :return: All the metadata found about the given file. + """ + return self.analyze() diff --git a/tests/generic_file_test.py b/tests/generic_file_test.py new file mode 100644 index 0000000..a213290 --- /dev/null +++ b/tests/generic_file_test.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +from __future__ import (division, absolute_import, unicode_literals, + print_function) + +from file_metadata.generic_file import GenericFile +from tests import fetch_file, unittest + + +class DerivedFile(GenericFile): + + def analyze(self): + # Only use the `_analyze_test` functions for tests + return GenericFile.analyze(self, prefix='analyze_test') + + def analyze_test1(self): + return {"test1": "test1"} + + def analyze_test2(self): + return {"test2": "test2"} + + +class DerivedFileTest(unittest.TestCase): + + def setUp(self): + self.uut = DerivedFile(fetch_file('ascii.txt')) + + def test_metadata(self): + self.assertEqual(self.uut.metadata, {'test1': 'test1', + 'test2': 'test2'})