Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
fix: set mtime for cached source files [WIP] (#1443)
* fix: set mtime for cached source files

* Implement mtime for local files and github.

* logging

* fix: remove abstract flag

* fix import

* keep mtime if we cannot determine it

* get latest commit

* fix datetime handling for github case
  • Loading branch information
johanneskoester committed Mar 1, 2022
1 parent 90bc88b commit dd27209
Showing 1 changed file with 28 additions and 0 deletions.
28 changes: 28 additions & 0 deletions snakemake/sourcecache.py
Expand Up @@ -11,6 +11,7 @@
import tempfile
import io
from abc import ABC, abstractmethod
from datetime import datetime


from snakemake.common import (
Expand Down Expand Up @@ -60,6 +61,10 @@ def join(self, path):
path = path.get_path_or_uri()
return self.__class__(smart_join(self.get_path_or_uri(), path))

def mtime(self):
"""If possible, return mtime of the file. Otherwise, return None."""
return None

def __hash__(self):
return self.get_path_or_uri().__hash__()

Expand Down Expand Up @@ -111,6 +116,9 @@ def isabs(self):
def simplify_path(self):
return utils.simplify_path(self.path)

def mtime(self):
return os.stat(self.path).st_mtime

def __fspath__(self):
return self.path

Expand Down Expand Up @@ -225,6 +233,18 @@ class GithubFile(HostingProviderFile):
def get_path_or_uri(self):
return "https://github.com/{}/raw/{}/{}".format(self.repo, self.ref, self.path)

def mtime(self):
import requests

url = f"https://api.github.com/repos/{self.repo}/commits?path={self.path}&page=1&per_page=1"
mtime = requests.get(url).json()[0]["commit"]["committer"]["date"]
assert mtime.endswith(
"Z"
), "bug: expected suffix Z on Github provided time stamp"
# assume UTC and make it understandable to fromisoformat
mtime = mtime[:-1] + "+00:00"
return datetime.fromisoformat(mtime).timestamp()


class GitlabFile(HostingProviderFile):
def __init__(
Expand Down Expand Up @@ -345,6 +365,14 @@ def _do_cache(self, source_file, cache_entry):
) as cache_source:
cache_source.write(source.read())

mtime = source_file.mtime()
if mtime is not None:
# Set to mtime of original file
# In case we don't have that mtime, it is fine
# to just keep the time at the time of caching
# as mtime.
os.utime(cache_entry, times=(mtime, mtime))

def _open(self, path_or_uri, mode):
from smart_open import open

Expand Down

0 comments on commit dd27209

Please sign in to comment.