Skip to content

Commit

Permalink
Update container on fast-POST
Browse files Browse the repository at this point in the history
This patch makes a number of changes to enable content-type
metadata to be updated when using the fast-POST mode of
operation, as proposed in the associated spec [1].

* the object server and diskfile are modified to allow
  content-type to be updated by a POST and the updated value
  to be stored in .meta files.

* the object server accepts PUTs and DELETEs with older
  timestamps than existing .meta files. This is to be
  consistent with replication that will leave a later .meta
  file in place when replicating a .data file.

* the diskfile interface is modified to provide accessor
  methods for the content-type and its timestamp.

* the naming of .meta files is modified to encode two
  timestamps when the .meta file contains a content-type value
  that was set prior to the latest metadata update; this
  enables consistency to be achieved when rsync is used for
  replication.

* ssync is modified to sync meta files when content-type
  differs between local and remote copies of objects.

* the object server issues container updates when handling
  POST requests, notifying the container server of the current
  immutable metadata (etag, size, hash, swift_bytes),
  content-type with their respective timestamps, and the
  mutable metadata timestamp.

* the container server maintains the most recently reported
  values for immutable metadata, content-type and mutable
  metadata, each with their respective timestamps, in a single
  db row.

* new probe tests verify that replication achieves eventual
  consistency of containers and objects after discrete updates
  to content-type and mutable metadata, and that container-sync
  sync's objects after fast-post updates.

[1] spec change-id: I60688efc3df692d3a39557114dca8c5490f7837e

Change-Id: Ia597cd460bb5fd40aa92e886e3e18a7542603d01
  • Loading branch information
Alistair Coles authored and Alistair Coles committed Mar 3, 2016
1 parent 30624a8 commit e91de49
Show file tree
Hide file tree
Showing 26 changed files with 3,143 additions and 253 deletions.
121 changes: 121 additions & 0 deletions swift/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@ def drop_buffer_cache(fd, offset, length):

NORMAL_FORMAT = "%016.05f"
INTERNAL_FORMAT = NORMAL_FORMAT + '_%016x'
SHORT_FORMAT = NORMAL_FORMAT + '_%x'
MAX_OFFSET = (16 ** 16) - 1
PRECISION = 1e-5
# Setting this to True will cause the internal format to always display
Expand Down Expand Up @@ -820,6 +821,13 @@ def internal(self):
else:
return self.normal

@property
def short(self):
if self.offset or FORCE_INTERNAL:
return SHORT_FORMAT % (self.timestamp, self.offset)
else:
return self.normal

@property
def isoformat(self):
t = float(self.normal)
Expand Down Expand Up @@ -849,16 +857,22 @@ def isoformat(self):
return isoformat

def __eq__(self, other):
if other is None:
return False
if not isinstance(other, Timestamp):
other = Timestamp(other)
return self.internal == other.internal

def __ne__(self, other):
if other is None:
return True
if not isinstance(other, Timestamp):
other = Timestamp(other)
return self.internal != other.internal

def __lt__(self, other):
if other is None:
return False
if not isinstance(other, Timestamp):
other = Timestamp(other)
return self.internal < other.internal
Expand All @@ -867,6 +881,94 @@ def __hash__(self):
return hash(self.internal)


def encode_timestamps(t1, t2=None, t3=None, explicit=False):
"""
Encode up to three timestamps into a string. Unlike a Timestamp object, the
encoded string does NOT used fixed width fields and consequently no
relative chronology of the timestamps can be inferred from lexicographic
sorting of encoded timestamp strings.
The format of the encoded string is:
<t1>[<+/-><t2 - t1>[<+/-><t3 - t2>]]
i.e. if t1 = t2 = t3 then just the string representation of t1 is returned,
otherwise the time offsets for t2 and t3 are appended. If explicit is True
then the offsets for t2 and t3 are always appended even if zero.
Note: any offset value in t1 will be preserved, but offsets on t2 and t3
are not preserved. In the anticipated use cases for this method (and the
inverse decode_timestamps method) the timestamps passed as t2 and t3 are
not expected to have offsets as they will be timestamps associated with a
POST request. In the case where the encoding is used in a container objects
table row, t1 could be the PUT or DELETE time but t2 and t3 represent the
content type and metadata times (if different from the data file) i.e.
correspond to POST timestamps. In the case where the encoded form is used
in a .meta file name, t1 and t2 both correspond to POST timestamps.
"""
form = '{0}'
values = [t1.short]
if t2 is not None:
t2_t1_delta = t2.raw - t1.raw
explicit = explicit or (t2_t1_delta != 0)
values.append(t2_t1_delta)
if t3 is not None:
t3_t2_delta = t3.raw - t2.raw
explicit = explicit or (t3_t2_delta != 0)
values.append(t3_t2_delta)
if explicit:
form += '{1:+x}'
if t3 is not None:
form += '{2:+x}'
return form.format(*values)


def decode_timestamps(encoded, explicit=False):
"""
Parses a string of the form generated by encode_timestamps and returns
a tuple of the three component timestamps. If explicit is False, component
timestamps that are not explicitly encoded will be assumed to have zero
delta from the previous component and therefore take the value of the
previous component. If explicit is True, component timestamps that are
not explicitly encoded will be returned with value None.
"""
# TODO: some tests, e.g. in test_replicator, put float timestamps values
# into container db's, hence this defensive check, but in real world
# this may never happen.
if not isinstance(encoded, basestring):
ts = Timestamp(encoded)
return ts, ts, ts

parts = []
signs = []
pos_parts = encoded.split('+')
for part in pos_parts:
# parse time components and their signs
# e.g. x-y+z --> parts = [x, y, z] and signs = [+1, -1, +1]
neg_parts = part.split('-')
parts = parts + neg_parts
signs = signs + [1] + [-1] * (len(neg_parts) - 1)
t1 = Timestamp(parts[0])
t2 = t3 = None
if len(parts) > 1:
t2 = t1
delta = signs[1] * int(parts[1], 16)
# if delta = 0 we want t2 = t3 = t1 in order to
# preserve any offset in t1 - only construct a distinct
# timestamp if there is a non-zero delta.
if delta:
t2 = Timestamp((t1.raw + delta) * PRECISION)
elif not explicit:
t2 = t1
if len(parts) > 2:
t3 = t2
delta = signs[2] * int(parts[2], 16)
if delta:
t3 = Timestamp((t2.raw + delta) * PRECISION)
elif not explicit:
t3 = t2
return t1, t2, t3


def normalize_timestamp(timestamp):
"""
Format a timestamp (string or numeric) into a standardized
Expand Down Expand Up @@ -3357,6 +3459,25 @@ def parse_content_type(content_type):
return content_type, parm_list


def extract_swift_bytes(content_type):
"""
Parse a content-type and return a tuple containing:
- the content_type string minus any swift_bytes param,
- the swift_bytes value or None if the param was not found
:param content_type: a content-type string
:return: a tuple of (content-type, swift_bytes or None)
"""
content_type, params = parse_content_type(content_type)
swift_bytes = None
for k, v in params:
if k == 'swift_bytes':
swift_bytes = v
else:
content_type += ';%s=%s' % (k, v)
return content_type, swift_bytes


def override_bytes_from_content_type(listing_dict, logger=None):
"""
Takes a dict from a container listing and overrides the content_type,
Expand Down

0 comments on commit e91de49

Please sign in to comment.