Skip to content

Commit

Permalink
Allow to stream the downloads when appropriate
Browse files Browse the repository at this point in the history
Some API calls will download possibly large data, resulting in a high
memory usage and out-of-memory errors. For these API calls use the
requests streaming capabilities and download chunked data. The caller is
responsible of providing a callable to actually store the data.

The default callable just prints the data on stdout.
  • Loading branch information
Gauvain Pocentek committed Jul 17, 2016
1 parent 8e6a944 commit 94aea52
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 23 deletions.
13 changes: 13 additions & 0 deletions docs/gl_objects/builds.py
Expand Up @@ -77,6 +77,19 @@
build.artifacts()
# end artifacts

# stream artifacts
class Foo(object):
def __init__(self):
self._fd = open('artifacts.zip', 'w')

def __call__(self, chunk):
self._fd.write(chunk)

target = Foo()
build.artifacts(streamed=True, streamed=True, action=target)
del(target) # flushes data on disk
# end stream artifacts

# keep artifacts
build.keep_artifacts()
# end keep artifacts
Expand Down
14 changes: 12 additions & 2 deletions docs/gl_objects/builds.rst
Expand Up @@ -116,7 +116,16 @@ Get a build artifacts:

.. warning::

Artifacts are entirely stored in memory.
Artifacts are entirely stored in memory in this example.

.. _streaming_example:

You can download artifacts as a stream. Provide a callable to handle the
stream:

.. literalinclude:: builds.py
:start-after: # stream artifacts
:end-before: # end stream artifacts

Mark a build artifact as kept when expiration is set:

Expand All @@ -132,7 +141,8 @@ Get a build trace:

.. warning::

Traces are entirely stored in memory.
Traces are entirely stored in memory unless you use the streaming feature.
See :ref:`the artifacts example <streaming_example>`.

Cancel/retry a build:

Expand Down
3 changes: 2 additions & 1 deletion gitlab/__init__.py
Expand Up @@ -286,7 +286,7 @@ def set_credentials(self, email, password):
self.email = email
self.password = password

def _raw_get(self, path, content_type=None, **kwargs):
def _raw_get(self, path, content_type=None, streamed=False, **kwargs):
url = '%s%s' % (self._url, path)
headers = self._create_headers(content_type)
try:
Expand All @@ -295,6 +295,7 @@ def _raw_get(self, path, content_type=None, **kwargs):
headers=headers,
verify=self.ssl_verify,
timeout=self.timeout,
stream=streamed,
auth=requests.auth.HTTPBasicAuth(
self.http_username,
self.http_password))
Expand Down
53 changes: 33 additions & 20 deletions gitlab/objects.py
Expand Up @@ -29,6 +29,7 @@

import gitlab
from gitlab.exceptions import * # noqa
from gitlab import utils


class jsonEncoder(json.JSONEncoder):
Expand Down Expand Up @@ -889,22 +890,31 @@ def keep_artifacts(self, **kwargs):
r = self.gitlab._raw_post(url)
raise_error_from_response(r, GitlabGetError, 200)

def artifacts(self, **kwargs):
def artifacts(self, streamed=False, action=None, chunk_size=1024,
**kwargs):
"""Get the build artifacts.
Args:
streamed (bool): If True the data will be processed by chunks of
`chunk_size` and each chunk is passed to `action` for
treatment.
action (callable): Callable responsible of dealing with chunk of
data.
chunk_size (int): Size of each chunk.
Returns:
str: The artifacts.
str: The artifacts if `streamed` is False, None otherwise.
Raises:
GitlabConnectionError: If the server cannot be reached.
GitlabGetError: If the artifacts are not available.
"""
url = '/projects/%s/builds/%s/artifacts' % (self.project_id, self.id)
r = self.gitlab._raw_get(url)
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
raise_error_from_response(r, GitlabGetError, 200)
return r.content
return utils.response_content(r, streamed, action, chunk_size)

def trace(self, **kwargs):
def trace(self, streamed=False, action=None, chunk_size=1024, **kwargs):
"""Get the build trace.
Returns:
Expand All @@ -915,9 +925,9 @@ def trace(self, **kwargs):
GitlabGetError: If the trace is not available.
"""
url = '/projects/%s/builds/%s/trace' % (self.project_id, self.id)
r = self.gitlab._raw_get(url)
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
raise_error_from_response(r, GitlabGetError, 200)
return r.content
return utils.response_content(r, streamed, action, chunk_size)


class ProjectBuildManager(BaseManager):
Expand Down Expand Up @@ -972,7 +982,8 @@ def diff(self, **kwargs):

return r.json()

def blob(self, filepath, **kwargs):
def blob(self, filepath, streamed=False, action=None, chunk_size=1024,
**kwargs):
"""Generate the content of a file for this commit.
Args:
Expand All @@ -988,10 +999,9 @@ def blob(self, filepath, **kwargs):
url = ('/projects/%(project_id)s/repository/blobs/%(commit_id)s' %
{'project_id': self.project_id, 'commit_id': self.id})
url += '?filepath=%s' % filepath
r = self.gitlab._raw_get(url, **kwargs)
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
raise_error_from_response(r, GitlabGetError)

return r.content
return utils.response_content(r, streamed, action, chunk_size)

def builds(self, **kwargs):
"""List the build for this commit.
Expand Down Expand Up @@ -1734,7 +1744,8 @@ def blob(self, sha, filepath, **kwargs):
DeprecationWarning)
return self.repository_blob(sha, filepath, **kwargs)

def repository_blob(self, sha, filepath, **kwargs):
def repository_blob(self, sha, filepath, streamed=False, action=None,
chunk_size=1024, **kwargs):
"""Return the content of a file for a commit.
Args:
Expand All @@ -1750,11 +1761,12 @@ def repository_blob(self, sha, filepath, **kwargs):
"""
url = "/projects/%s/repository/blobs/%s" % (self.id, sha)
url += '?filepath=%s' % (filepath)
r = self.gitlab._raw_get(url, **kwargs)
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
raise_error_from_response(r, GitlabGetError)
return r.content
return utils.response_content(r, streamed, action, chunk_size)

def repository_raw_blob(self, sha, **kwargs):
def repository_raw_blob(self, sha, streamed=False, action=None,
chunk_size=1024, **kwargs):
"""Returns the raw file contents for a blob by blob SHA.
Args:
Expand All @@ -1768,9 +1780,9 @@ def repository_raw_blob(self, sha, **kwargs):
GitlabGetError: If the server fails to perform the request.
"""
url = "/projects/%s/repository/raw_blobs/%s" % (self.id, sha)
r = self.gitlab._raw_get(url, **kwargs)
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
raise_error_from_response(r, GitlabGetError)
return r.content
return utils.response_content(r, streamed, action, chunk_size)

def repository_compare(self, from_, to, **kwargs):
"""Returns a diff between two branches/commits.
Expand Down Expand Up @@ -1813,7 +1825,8 @@ def archive(self, sha=None, **kwargs):
DeprecationWarning)
return self.repository_archive(sha, **kwargs)

def repository_archive(self, sha=None, **kwargs):
def repository_archive(self, sha=None, streamed=False, action=None,
chunk_size=1024, **kwargs):
"""Return a tarball of the repository.
Args:
Expand All @@ -1829,9 +1842,9 @@ def repository_archive(self, sha=None, **kwargs):
url = '/projects/%s/repository/archive' % self.id
if sha:
url += '?sha=%s' % sha
r = self.gitlab._raw_get(url, **kwargs)
r = self.gitlab._raw_get(url, streamed=streamed, **kwargs)
raise_error_from_response(r, GitlabGetError)
return r.content
return utils.response_content(r, streamed, action, chunk_size)

def create_file(self, path, branch, content, message, **kwargs):
"""Creates file in project repository
Expand Down
15 changes: 15 additions & 0 deletions gitlab/utils.py
@@ -0,0 +1,15 @@
class _StdoutStream(object):
def __call__(self, chunk):
print(chunk)


def response_content(response, streamed, action, chunk_size):
if streamed is False:
return response.content

if action is None:
action = _StdoutStream()

for chunk in response.iter_content(chunk_size=chunk_size):
if chunk:
action(chunk)

0 comments on commit 94aea52

Please sign in to comment.