/
base.py
156 lines (121 loc) · 4.67 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
"""
:copyright: Copyright 2006-2015 by the Sumatra team, see doc/authors.txt
:license: BSD 2-clause, see LICENSE for details.
"""
from __future__ import unicode_literals
from builtins import object
import hashlib
import os.path
from ..core import component_type
IGNORE_DIGEST = "0"*40
@component_type
class DataStore(object):
"""Base class for data storage abstractions."""
required_attributes = ("find_new_data", "get_data_item", "delete")
def __getstate__(self):
"""
Since each subclass has different attributes, we provide this method
as a standard way of obtaining these attributes, for database storage,
etc. Returns a dict.
"""
raise NotImplementedError
def copy(self):
return self.__class__(**self.__getstate__())
def find_new_data(self, timestamp):
"""Finds newly created/changed data items"""
raise NotImplementedError
def get_data_item(self, key):
"""
Return the file that matches the given key.
"""
raise NotImplementedError
def get_content(self, key, max_length=None):
"""
Return the contents of a file identified by a key.
If `max_length` is given, the return value will be truncated.
"""
return self.get_data_item(key).get_content(max_length)
def delete(self, *keys):
"""
Delete the files corresponding to the given keys.
"""
raise NotImplementedError
def generate_keys(self, *paths):
"""
Given a number of "paths", return a list of keys enabling the data at
those paths to be retrieved from this store later.
"""
return [self.data_item_class(path, self).generate_key() for path in paths]
def contains_path(self, path):
"""Does the store contain a data item with the given path?"""
raise NotImplementedError
class DataKey(object):
"""
Identifies a :class:`DataItem`, and may be used to retrieve a
:class:`DataItem` from a :class:`DataStore`.
May also be used to store metadata (e.g. file size, mimetype) and be used as
a proxy for the :class:`DataItem` on a system where the actual data is not
available.
"""
def __init__(self, path, digest, creation, **metadata):
self.path = path
self.digest = digest
self.creation = creation
self.metadata = metadata
def __repr__(self):
return "%s(%s [%s])" % (self.path, self.digest, self.creation)
def __eq__(self, other):
return (self.path == other.path and
(self.digest == other.digest or IGNORE_DIGEST in (self.digest, other.digest)) and
self.creation == other.creation)
def __ne__(self, other):
return not self.__eq__(other)
class DataItem(object):
"""Base class for data item classes, that may represent files or database records."""
def __str__(self):
return self.path
@property
def digest(self):
"""docstring"""
return hashlib.sha1(self.content).hexdigest()
def __eq__(self, other):
if self.size != other.size:
return False
elif self.content == other.content: # use digest here?
return True
else:
return self.sorted_content == other.sorted_content
def __ne__(self, other):
return not self.__eq__(other)
def generate_key(self):
"""Generate a :class:`DataKey` uniquely identifying this data item."""
return DataKey(self.path, self.digest, self.creation, mimetype=self.mimetype,
encoding=self.encoding, size=self.size)
def get_content(self, max_length=None):
"""
Return the contents of the data item as a string.
If *max_length* is specified, return that number of bytes, otherwise
return the entire content.
"""
raise NotImplementedError
def sorted_content(self):
"""Return the contents of the data item, sorted by line."""
raise NotImplementedError
def save_copy(self, path):
"""
Save a copy of the data to a local file.
If path is an existing directory, the data item path will be appended
to it, otherwise path is treated as a full path including filename,
either absolute or relative to the working directory.
Return the full path of the final file.
"""
if os.path.isdir(path):
full_path = os.path.join(path, self.path)
else:
full_path = path
dir = os.path.dirname(full_path)
if not os.path.exists(dir):
os.makedirs(dir)
with open(full_path, "w") as fp:
fp.write(self.content)
return full_path