Update container on fast-POST

This patch makes a number of changes to enable content-type metadata to be updated when using the fast-POST mode of operation, as proposed in the associated spec [1]. * the object server and diskfile are modified to allow content-type to be updated by a POST and the updated value to be stored in .meta files. * the object server accepts PUTs and DELETEs with older timestamps than existing .meta files. This is to be consistent with replication that will leave a later .meta file in place when replicating a .data file. * the diskfile interface is modified to provide accessor methods for the content-type and its timestamp. * the naming of .meta files is modified to encode two timestamps when the .meta file contains a content-type value that was set prior to the latest metadata update; this enables consistency to be achieved when rsync is used for replication. * ssync is modified to sync meta files when content-type differs between local and remote copies of objects. * the object server issues container updates when handling POST requests, notifying the container server of the current immutable metadata (etag, size, hash, swift_bytes), content-type with their respective timestamps, and the mutable metadata timestamp. * the container server maintains the most recently reported values for immutable metadata, content-type and mutable metadata, each with their respective timestamps, in a single db row. * new probe tests verify that replication achieves eventual consistency of containers and objects after discrete updates to content-type and mutable metadata, and that container-sync sync's objects after fast-post updates. [1] spec change-id: I60688efc3df692d3a39557114dca8c5490f7837e Change-Id: Ia597cd460bb5fd40aa92e886e3e18a7542603d01
openstack · Mar 3, 2016 · e91de49 · e91de49
1 parent 30624a8
commit e91de49
Show file tree

Hide file tree

Showing 26 changed files with 3,143 additions and 253 deletions.
diff --git a/swift/common/utils.py b/swift/common/utils.py
@@ -700,6 +700,7 @@ def drop_buffer_cache(fd, offset, length):
 
 NORMAL_FORMAT = "%016.05f"
 INTERNAL_FORMAT = NORMAL_FORMAT + '_%016x'
+SHORT_FORMAT = NORMAL_FORMAT + '_%x'
 MAX_OFFSET = (16 ** 16) - 1
 PRECISION = 1e-5
 # Setting this to True will cause the internal format to always display
@@ -820,6 +821,13 @@ def internal(self):
         else:
             return self.normal
 
+    @property
+    def short(self):
+        if self.offset or FORCE_INTERNAL:
+            return SHORT_FORMAT % (self.timestamp, self.offset)
+        else:
+            return self.normal
+
     @property
     def isoformat(self):
         t = float(self.normal)
@@ -849,16 +857,22 @@ def isoformat(self):
         return isoformat
 
     def __eq__(self, other):
+        if other is None:
+            return False
         if not isinstance(other, Timestamp):
             other = Timestamp(other)
         return self.internal == other.internal
 
     def __ne__(self, other):
+        if other is None:
+            return True
         if not isinstance(other, Timestamp):
             other = Timestamp(other)
         return self.internal != other.internal
 
     def __lt__(self, other):
+        if other is None:
+            return False
         if not isinstance(other, Timestamp):
             other = Timestamp(other)
         return self.internal < other.internal
@@ -867,6 +881,94 @@ def __hash__(self):
         return hash(self.internal)
 
 
+def encode_timestamps(t1, t2=None, t3=None, explicit=False):
+    """
+    Encode up to three timestamps into a string. Unlike a Timestamp object, the
+    encoded string does NOT used fixed width fields and consequently no
+    relative chronology of the timestamps can be inferred from lexicographic
+    sorting of encoded timestamp strings.
+
+    The format of the encoded string is:
+        <t1>[<+/-><t2 - t1>[<+/-><t3 - t2>]]
+
+    i.e. if t1 = t2 = t3 then just the string representation of t1 is returned,
+    otherwise the time offsets for t2 and t3 are appended. If explicit is True
+    then the offsets for t2 and t3 are always appended even if zero.
+
+    Note: any offset value in t1 will be preserved, but offsets on t2 and t3
+    are not preserved. In the anticipated use cases for this method (and the
+    inverse decode_timestamps method) the timestamps passed as t2 and t3 are
+    not expected to have offsets as they will be timestamps associated with a
+    POST request. In the case where the encoding is used in a container objects
+    table row, t1 could be the PUT or DELETE time but t2 and t3 represent the
+    content type and metadata times (if different from the data file) i.e.
+    correspond to POST timestamps. In the case where the encoded form is used
+    in a .meta file name, t1 and t2 both correspond to POST timestamps.
+    """
+    form = '{0}'
+    values = [t1.short]
+    if t2 is not None:
+        t2_t1_delta = t2.raw - t1.raw
+        explicit = explicit or (t2_t1_delta != 0)
+        values.append(t2_t1_delta)
+        if t3 is not None:
+            t3_t2_delta = t3.raw - t2.raw
+            explicit = explicit or (t3_t2_delta != 0)
+            values.append(t3_t2_delta)
+        if explicit:
+            form += '{1:+x}'
+            if t3 is not None:
+                form += '{2:+x}'
+    return form.format(*values)
+
+
+def decode_timestamps(encoded, explicit=False):
+    """
+    Parses a string of the form generated by encode_timestamps and returns
+    a tuple of the three component timestamps. If explicit is False, component
+    timestamps that are not explicitly encoded will be assumed to have zero
+    delta from the previous component and therefore take the value of the
+    previous component. If explicit is True, component timestamps that are
+    not explicitly encoded will be returned with value None.
+    """
+    # TODO: some tests, e.g. in test_replicator, put float timestamps values
+    # into container db's, hence this defensive check, but in real world
+    # this may never happen.
+    if not isinstance(encoded, basestring):
+        ts = Timestamp(encoded)
+        return ts, ts, ts
+
+    parts = []
+    signs = []
+    pos_parts = encoded.split('+')
+    for part in pos_parts:
+        # parse time components and their signs
+        # e.g. x-y+z --> parts = [x, y, z] and signs = [+1, -1, +1]
+        neg_parts = part.split('-')
+        parts = parts + neg_parts
+        signs = signs + [1] + [-1] * (len(neg_parts) - 1)
+    t1 = Timestamp(parts[0])
+    t2 = t3 = None
+    if len(parts) > 1:
+        t2 = t1
+        delta = signs[1] * int(parts[1], 16)
+        # if delta = 0 we want t2 = t3 = t1 in order to
+        # preserve any offset in t1 - only construct a distinct
+        # timestamp if there is a non-zero delta.
+        if delta:
+            t2 = Timestamp((t1.raw + delta) * PRECISION)
+    elif not explicit:
+        t2 = t1
+    if len(parts) > 2:
+        t3 = t2
+        delta = signs[2] * int(parts[2], 16)
+        if delta:
+            t3 = Timestamp((t2.raw + delta) * PRECISION)
+    elif not explicit:
+        t3 = t2
+    return t1, t2, t3
+
+
 def normalize_timestamp(timestamp):
     """
     Format a timestamp (string or numeric) into a standardized
@@ -3357,6 +3459,25 @@ def parse_content_type(content_type):
     return content_type, parm_list
 
 
+def extract_swift_bytes(content_type):
+    """
+    Parse a content-type and return a tuple containing:
+        - the content_type string minus any swift_bytes param,
+        -  the swift_bytes value or None if the param was not found
+
+    :param content_type: a content-type string
+    :return: a tuple of (content-type, swift_bytes or None)
+    """
+    content_type, params = parse_content_type(content_type)
+    swift_bytes = None
+    for k, v in params:
+        if k == 'swift_bytes':
+            swift_bytes = v
+        else:
+            content_type += ';%s=%s' % (k, v)
+    return content_type, swift_bytes
+
+
 def override_bytes_from_content_type(listing_dict, logger=None):
     """
     Takes a dict from a container listing and overrides the content_type,