Skip to content
10 changes: 9 additions & 1 deletion octue/cloud/storage/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,14 @@ def get_metadata(self, bucket_name, path_in_bucket, timeout=_DEFAULT_TIMEOUT):
:return dict:
"""
bucket = self.client.get_bucket(bucket_or_name=bucket_name)
metadata = bucket.get_blob(blob_name=self._strip_leading_slash(path_in_bucket), timeout=timeout)._properties
blob = bucket.get_blob(blob_name=self._strip_leading_slash(path_in_bucket), timeout=timeout)
metadata = blob._properties

# Get timestamps from blob rather than properties so they are datetime.datetime objects rather than strings.
metadata["updated"] = blob.updated
metadata["timeCreated"] = blob.time_created
metadata["timeDeleted"] = blob.time_deleted
metadata["customTime"] = blob.custom_time
return metadata

def delete(self, bucket_name, path_in_bucket, timeout=_DEFAULT_TIMEOUT):
Expand Down Expand Up @@ -185,5 +192,6 @@ def _update_metadata(self, blob, metadata):
:return None:
"""
if metadata is not None:
blob.custom_time = metadata.pop("timestamp", None)
blob.metadata = metadata
blob.patch()
2 changes: 2 additions & 0 deletions octue/mixins/hashable.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
import collections.abc
import datetime
from google_crc32c import Checksum


Expand All @@ -9,6 +10,7 @@
float: str,
type(None): lambda attribute: "None",
dict: lambda attribute: str(sorted(attribute.items())),
datetime.datetime: str,
}


Expand Down
49 changes: 40 additions & 9 deletions octue/resources/datafile.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
import logging
import os
import tempfile
from datetime import datetime
from google_crc32c import Checksum

from octue.cloud import storage
Expand All @@ -10,6 +10,7 @@
from octue.exceptions import AttributeConflict, FileNotFoundException, InvalidInputException
from octue.mixins import Filterable, Hashable, Identifiable, Loggable, Pathable, Serialisable, Taggable
from octue.utils import isfile
from octue.utils.time import convert_from_posix_time, convert_to_posix_time


module_logger = logging.getLogger(__name__)
Expand All @@ -35,13 +36,13 @@ class Datafile(Taggable, Serialisable, Pathable, Loggable, Identifiable, Hashabl
"sequence": 0,
"extension": "csv",
"tags": "",
"timestamp": 0,
"timestamp": datetime.datetime(2021, 5, 3, 18, 15, 58, 298086),
"id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86",
"size_bytes": 59684813,
"sha-512/256": "somesha"
},

:parameter float|None timestamp: A posix timestamp associated with the file, in seconds since epoch, typically when
:parameter datetime.datetime|int|float|None timestamp: A posix timestamp associated with the file, in seconds since epoch, typically when
it was created but could relate to a relevant time point for the data
:param str id: The Universally Unique ID of this file (checked to be valid if not None, generated if None)
:param logging.Logger logger: A logger instance to which operations with this datafile will be logged. Defaults to
Expand Down Expand Up @@ -182,7 +183,7 @@ def from_cloud(cls, project_name, bucket_name, datafile_path, allow_overwrite=Fa
sequence = int(sequence)

datafile = cls(
timestamp=kwargs.get("timestamp", custom_metadata.get("timestamp")),
timestamp=kwargs.get("timestamp", metadata.get("customTime")),
id=kwargs.get("id", custom_metadata.get("id", ID_DEFAULT)),
path=storage.path.generate_gs_path(bucket_name, datafile_path),
hash_value=kwargs.get("hash_value", custom_metadata.get("hash_value", metadata.get("crc32c", None))),
Expand Down Expand Up @@ -216,17 +217,47 @@ def to_cloud(self, project_name, bucket_name, path_in_bucket):
def name(self):
return self._name or str(os.path.split(self.path)[-1])

@property
def timestamp(self):
return self._timestamp

@timestamp.setter
def timestamp(self, value):
"""Set the datafile's timestamp.

:param datetime.datetime|int|float|None value:
:raise TypeError: if value is of an incorrect type
:return None:
"""
if isinstance(value, datetime.datetime) or value is None:
self._timestamp = value
elif isinstance(value, (int, float)):
self._timestamp = convert_from_posix_time(value)
else:
raise TypeError(
f"timestamp should be a datetime.datetime instance, an int, a float, or None; received {value!r}"
)

@property
def posix_timestamp(self):
if self.timestamp is None:
return None

return convert_to_posix_time(self.timestamp)

@property
def _last_modified(self):
"""Get the date/time the file was last modified in units of seconds since epoch (posix time)."""
"""Get the date/time the file was last modified in units of seconds since epoch (posix time).

:return float:
"""
if self._path_is_in_google_cloud_storage:
unparsed_datetime = self._cloud_metadata.get("updated")
last_modified = self._cloud_metadata.get("updated")

if unparsed_datetime is None:
if last_modified is None:
return None

parsed_datetime = datetime.strptime(unparsed_datetime, "%Y-%m-%dT%H:%M:%S.%fZ")
return (parsed_datetime - datetime(1970, 1, 1)).total_seconds()
return convert_to_posix_time(last_modified)

return os.path.getmtime(self.absolute_path)

Expand Down
19 changes: 19 additions & 0 deletions octue/utils/time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from datetime import datetime, timedelta


def convert_to_posix_time(timestamp):
"""Convert a datetime timestamp to posix time (i.e. seconds since epoch: 1st January 1970).

:param datetime.datetime timestamp:
:return float:
"""
return (timestamp - datetime(1970, 1, 1, tzinfo=timestamp.tzinfo)).total_seconds()


def convert_from_posix_time(posix_timestamp):
"""Convert a posix timestamp to a datetime timestamp.

:param int|float posix_timestamp:
:return datetime.datetime:
"""
return datetime(1970, 1, 1) + timedelta(seconds=posix_timestamp)
28 changes: 26 additions & 2 deletions tests/resources/test_datafile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import tempfile
import uuid
from datetime import datetime, timezone
from unittest.mock import patch

from octue import exceptions
Expand Down Expand Up @@ -42,6 +43,16 @@ def test_path_argument_required(self):

self.assertIn("__init__() missing 1 required positional argument: 'path'", error.exception.args[0])

def test_setting_timestamp(self):
"""Test that both datetime and posix timestamps can be used for a Datafile, that the timestamp attribute is
always converted to a datetime instance, and that invalid timestamps raise an error.
"""
self.assertTrue(isinstance(Datafile(timestamp=datetime.now(), path="a_path").timestamp, datetime))
self.assertTrue(isinstance(Datafile(timestamp=50, path="a_path").timestamp, datetime))

with self.assertRaises(TypeError):
Datafile(timestamp="50", path="a_path")

def test_gt(self):
"""Test that datafiles can be ordered using the greater-than operator."""
a = Datafile(timestamp=None, path="a_path")
Expand Down Expand Up @@ -147,7 +158,7 @@ def test_from_cloud_with_bare_file(self):
)

datafile = Datafile.from_cloud(
project_name=TEST_PROJECT_NAME, bucket_name=TEST_BUCKET_NAME, datafile_path=path_in_bucket, timestamp=None
project_name=TEST_PROJECT_NAME, bucket_name=TEST_BUCKET_NAME, datafile_path=path_in_bucket
)

self.assertEqual(datafile.path, f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}")
Expand All @@ -166,7 +177,11 @@ def test_from_cloud_with_datafile(self):
temporary_file.write("[1, 2, 3]")

datafile = Datafile(
timestamp=None, path=temporary_file.name, cluster=0, sequence=1, tags={"blah:shah:nah", "blib", "glib"}
timestamp=datetime.now(tz=timezone.utc),
path=temporary_file.name,
cluster=0,
sequence=1,
tags={"blah:shah:nah", "blib", "glib"},
)
datafile.to_cloud(project_name=TEST_PROJECT_NAME, bucket_name=TEST_BUCKET_NAME, path_in_bucket=path_in_bucket)

Expand All @@ -176,6 +191,7 @@ def test_from_cloud_with_datafile(self):

self.assertEqual(persisted_datafile.path, f"gs://{TEST_BUCKET_NAME}/{path_in_bucket}")
self.assertEqual(persisted_datafile.id, datafile.id)
self.assertEqual(persisted_datafile.timestamp, datafile.timestamp)
self.assertEqual(persisted_datafile.hash_value, datafile.hash_value)
self.assertEqual(persisted_datafile.cluster, datafile.cluster)
self.assertEqual(persisted_datafile.sequence, datafile.sequence)
Expand Down Expand Up @@ -426,3 +442,11 @@ def test_deserialise_ignores_path_from_if_path_is_absolute(self):
self.assertEqual(datafile.id, deserialised_datafile.id)
self.assertFalse(pathable.path in deserialised_datafile.path)
self.assertEqual(deserialised_datafile.path, temporary_file.name)

def test_posix_timestamp(self):
"""Test that the posix timestamp property works properly."""
datafile = Datafile(path="hello.txt", timestamp=None)
self.assertIsNone(datafile.posix_timestamp)

datafile.timestamp = datetime(1970, 1, 1)
self.assertEqual(datafile.posix_timestamp, 0)
18 changes: 18 additions & 0 deletions tests/utils/test_time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from datetime import datetime
from unittest import TestCase

from octue.utils.time import convert_from_posix_time, convert_to_posix_time


class TestTime(TestCase):
def test_convert_to_posix_time(self):
"""Test that datetime instances can be converted to posix time."""
self.assertEqual(convert_to_posix_time(datetime(1970, 1, 1)), 0)
self.assertEqual(convert_to_posix_time(datetime(2000, 1, 1)), 946684800)
self.assertEqual(convert_to_posix_time(datetime(1940, 1, 1)), -946771200)

def test_convert_from_posix_time(self):
"""Test that posix timestamps can be converted to datetime instances."""
self.assertEqual(convert_from_posix_time(0), datetime(1970, 1, 1))
self.assertEqual(convert_from_posix_time(946684800), datetime(2000, 1, 1))
self.assertEqual(convert_from_posix_time(-946771200), datetime(1940, 1, 1))