Skip to content
This repository has been archived by the owner on Dec 7, 2022. It is now read-only.

Commit

Permalink
Implement option to serve local content
Browse files Browse the repository at this point in the history
Implement option to send Docker files directly from local paths
instead of creating a HTTP redirects. The local paths can be created by
a Pulp Docker web distributor or by a rsync distributor.

Example crane.conf:

```
[general]
debug: true
data_dir: /var/www/pub/docker/
endpoint: localhost:5000

[serve_content]
enable: true
```

closes #3857
https://pulp.plan.io/issues/3857
  • Loading branch information
tri authored and simon-baatz committed Aug 15, 2018
1 parent 3034c4a commit 03fc5ad
Show file tree
Hide file tree
Showing 25 changed files with 522 additions and 65 deletions.
11 changes: 8 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,14 @@ What is Crane?
--------------

Crane is a small read-only web application that provides enough of the docker
registry API to support "docker pull". Crane does not serve the actual image
files, but instead serves 302 redirects to some other location where files are
being served. A base file location URL can be specified per-repository.
registry API to support "docker pull". Crane supports two modes of operation:

1. Serve 302 redirects to some other location where files are
being served. A base file location URL can be specified per-repository.
This is the default mode.
2. Local content delivery. In this mode, Crane provides "X-Sendfile" headers
to the Apache web server. Apache will deliver the static files including
all its optimizations.

Crane loads its data from json files stored on disk. It does not have a
database or use any other services. The json files can be generated with pulp
Expand Down
36 changes: 35 additions & 1 deletion crane/api/images.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import httplib
import urlparse
import os

from crane import exceptions
from flask import current_app
from crane import exceptions, config
from crane.app_util import authorize_image_id

VALID_IMAGE_FILES = frozenset(['ancestry', 'json', 'layer'])
Expand Down Expand Up @@ -32,3 +34,35 @@ def get_image_file_url(image_id, repo_info, filename):
base_url += '/'

return urlparse.urljoin(base_url, '/'.join((image_id, filename)))


@authorize_image_id
def get_image_file_path(image_id, repo_info, filename):
"""
Return the file path for a file in an image
:param image_id: The identifier for the image
:type image_id: basestring
:param repo_info: The tuple containing the information about the repository
:type repo_info: crane.data.Repo
:param filename: The identifier for the file belonging to the image
:type filename: basestring
:returns: file path for a file inside an image
:rtype: tuple
:raises NotFoundException: if the file specified is not known
"""

if filename not in VALID_IMAGE_FILES:
raise exceptions.HTTPError(httplib.NOT_FOUND)

if filename == 'layer':
mediatype = 'application/octet-stream'
else:
mediatype = 'application/json'

base_path = current_app.config.get(config.KEY_SC_CONTENT_DIR_V1)

result = os.path.join(base_path, repo_info.repository, image_id, filename)

return result, mediatype
12 changes: 11 additions & 1 deletion crane/app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import sys
import os.path

from flask import Flask

Expand All @@ -11,6 +12,14 @@
from crane import search


class CraneFlask(Flask):
def get_send_file_max_age(self, filepath):
# Shorten the cache timeout if the file is not content adressable
if os.path.basename(filepath).startswith('sha256:'):
return super(CraneFlask, self).get_send_file_max_age(filepath)
return self.config[config.KEY_DATA_POLLING_INTERVAL]


def create_app():
"""
Creates the flask app, loading blueprints and the configuration.
Expand All @@ -20,13 +29,14 @@ def create_app():
"""
init_logging()

app = Flask(__name__)
app = CraneFlask(__name__)
app.register_blueprint(v1.section)
app.register_blueprint(v2.section)
app.register_blueprint(crane.section)
app.register_error_handler(exceptions.HTTPError, app_util.http_error_handler)

config.load(app)

# in case the config says that debug mode is on, we need to adjust the
# log level
set_log_level(app)
Expand Down
29 changes: 27 additions & 2 deletions crane/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@
KEY_URL_AUTH_ALGO = 'url_auth_algo'
VALID_AUTH_ALGO = ["sha256", "sha1", "md5"]

# serve content settings
SECTION_SERVE_CONTENT = 'serve_content'
KEY_SC_ENABLE = 'enable'
KEY_SC_CONTENT_DIR_V1 = 'content_dir_v1'
KEY_SC_CONTENT_DIR_V2 = 'content_dir_v2'
KEY_SC_USE_X_SENDFILE = 'use_x_sendfile'

# google search appliance settings
SECTION_GSA = 'gsa'
SECTION_SOLR = 'solr'
Expand Down Expand Up @@ -90,12 +97,10 @@ def read_config(app, parser):
# "general" section settings
with supress(NoSectionError):
app.config['DEBUG'] = parser.getboolean(SECTION_GENERAL, KEY_DEBUG)

# parse other "general" section values
for key in (KEY_DATA_DIR, KEY_ENDPOINT):
with supress(NoOptionError):
app.config[key] = parser.get(SECTION_GENERAL, key)

# parse "general" section values as integers
for key in (KEY_DATA_POLLING_INTERVAL, ):
with supress(NoOptionError):
Expand Down Expand Up @@ -139,6 +144,26 @@ def read_config(app, parser):
_logger.error('value for config option %s is not a valid choice. falling back '
'to default' % KEY_URL_AUTH_ALGO)

# "serve_content" section settings
with supress(NoSectionError):
with supress(NoOptionError):
app.config[KEY_SC_ENABLE] = parser.getboolean(SECTION_SERVE_CONTENT, KEY_SC_ENABLE)
with supress(NoOptionError):
app.config['USE_X_SENDFILE'] = parser.getboolean(SECTION_SERVE_CONTENT,
KEY_SC_USE_X_SENDFILE)
# local content dir only required if crane should serve content
for key_local_content_dir in (KEY_SC_CONTENT_DIR_V1, KEY_SC_CONTENT_DIR_V2):
with supress(NoOptionError):
app.config[key_local_content_dir] = parser.get(SECTION_SERVE_CONTENT,
key_local_content_dir)
if app.config[KEY_SC_ENABLE]:
if not app.config[key_local_content_dir]:
_logger.error('"serve_content" enabled in config, but no "%s" given, disabling the serve content feature!' % key_local_content_dir) # noqa
app.config[KEY_SC_ENABLE] = False
elif not os.path.exists(app.config[key_local_content_dir]):
_logger.error('The directory specified by "%s" does not exist: "%s". Disabling the serve content feature!' % (key_local_content_dir, app.config[key_local_content_dir])) # noqa
app.config[KEY_SC_ENABLE] = False

# "gsa" (Google Search Appliance) section settings
with supress(NoSectionError):
section = app.config.setdefault(SECTION_GSA, {})
Expand Down
6 changes: 6 additions & 0 deletions crane/data/default_config.conf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ url_auth_ttl: 300
url_auth_param: _auth_
url_auth_algo: sha256

[serve_content]
enable: false
content_dir_v1: /var/www/pub/docker/v1/web/
content_dir_v2: /var/www/pub/docker/v2/web/
use_x_sendfile: true

[gsa]
url:

Expand Down
30 changes: 21 additions & 9 deletions crane/views/v1.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import httplib

from flask import Blueprint, json, current_app, redirect, request
from flask import Blueprint, json, current_app, redirect, request, send_file

from .. import app_util
from .. import config
from .. import exceptions
from ..api import repository, images
from .. import search as search_package


section = Blueprint('v1', __name__, url_prefix='/v1')


Expand All @@ -17,8 +16,8 @@ def add_common_headers(response):
"""
Add headers to a response.
All 200 responses get a content type of 'application/json', and all others
retain their default.
All 200 responses get a content type of 'application/json' if no other content type was set,
and all others retain their default.
Headers are added to make this app look like the actual docker-registry.
Expand All @@ -28,8 +27,10 @@ def add_common_headers(response):
:return: a response object that has the correct headers
:rtype: flask.Response
"""
# if response code is 200, assume it is JSON
if response.status_code == 200:
# Set default content type to 'application/json' if response code is 200 and
# content type isn't already set explicitly
content_type = response.headers.get('Content-Type', '')
if response.status_code == 200 and not content_type.startswith('application/'):
response.headers['Content-Type'] = 'application/json'
# current stable release of docker-registry
response.headers['X-Docker-Registry-Version'] = '0.6.6'
Expand Down Expand Up @@ -166,9 +167,11 @@ def search():


@section.route('/images/<image_id>/<filename>')
def images_redirect(image_id, filename):
def images_serve_or_redirect(image_id, filename):
"""
Redirects (302) the client to a path where it can access the requested file.
If 'serve_content' is set to true use send_file to provide the requested file directly,
taking into account the 'content_dir_v1' parameter.
:param image_id: the full unique ID of a docker image
:type image_id: basestring
Expand All @@ -178,5 +181,14 @@ def images_redirect(image_id, filename):
:return: 302 redirect response
:rtype: flask.Response
"""
image_url = images.get_image_file_url(image_id, filename)
return redirect(image_url)
serve_content = current_app.config.get(config.KEY_SC_ENABLE)

if serve_content:
image, mimetype = images.get_image_file_path(image_id, filename)
try:
return send_file(image, mimetype=mimetype)
except OSError:
raise exceptions.HTTPError(httplib.NOT_FOUND)
else:
image_url = images.get_image_file_url(image_id, filename)
return redirect(image_url)
50 changes: 36 additions & 14 deletions crane/views/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import urlparse
import time
from flask import Blueprint, json, current_app, redirect, request
from flask import Blueprint, json, current_app, redirect, request, send_file

from crane import app_util, exceptions, config
from crane.api import repository
Expand All @@ -18,8 +18,8 @@ def add_common_headers(response):
"""
Add headers to a response.
All 200 responses get a content type of 'application/json', and all others
retain their default.
All 200 responses get a content type of 'application/json' if no other content type was set,
and all others retain their default.
Headers are added to make this app look like the actual docker-registry.
Expand All @@ -29,9 +29,12 @@ def add_common_headers(response):
:return: a response object that has the correct headers
:rtype: flask.Response
"""
# if response code is 200, assume it is JSON
if response.status_code == 200:
response.headers['Content-Type'] = 'application/json'
# Set default content type to 'application/json' if response code is 200 and
# content type isn't already set explicitly
content_type = response.headers.get('Content-Type', '')
if not content_type.startswith('application/'):
response.headers['Content-Type'] = 'application/json'
response.headers['Docker-Distribution-API-Version'] = 'registry/2.0'
return response

Expand All @@ -50,9 +53,11 @@ def v2():


@section.route('/<path:relative_path>')
def name_redirect(relative_path):
def name_serve_or_redirect(relative_path):
"""
Redirects the client to the path from where the file can be accessed.
If 'serve_content' is set to true use send_file to provide the requested file directly,
taking into account the 'content_dir_v2' parameter.
:param relative_path: the relative path after /v2/.
:type relative_path: basestring
Expand All @@ -65,8 +70,8 @@ def name_redirect(relative_path):
base_url = repository.get_path_for_repo(name_component)
if not base_url.endswith('/'):
base_url += '/'

schema2_data = repository.get_schema2_data_for_repo(name_component)
used_mediatype = 'application/json' if component_type != 'blobs' else 'application/octet-stream'

if component_type == 'manifests' and schema2_data is not None:
manifest_list_data = repository.get_manifest_list_data_for_repo(name_component)
Expand All @@ -87,11 +92,15 @@ def name_redirect(relative_path):
# check first manifest list type
if manifest_list_mediatype in accept_headers and identifier in manifest_list_data:
path_component = os.path.join(manifest, 'list', identifier)
used_mediatype = manifest_list_mediatype
# this is needed for older clients which do not understand manifest list
elif identifier in manifest_list_amd64_tags.keys():
if schema2_mediatype in accept_headers:
schema_version = manifest_list_amd64_tags[identifier][1]
if schema_version == 2:
used_mediatype = schema2_mediatype
path_component = os.path.join(
manifest, str(manifest_list_amd64_tags[identifier][1]),
manifest, str(schema_version),
manifest_list_amd64_tags[identifier][0])
elif manifest_list_amd64_tags[identifier][1] == 1:
path_component = os.path.join(
Expand All @@ -102,18 +111,31 @@ def name_redirect(relative_path):
path_component = os.path.join(manifest, '1', identifier)
elif schema2_mediatype in accept_headers and identifier in schema2_data:
path_component = os.path.join(manifest, '2', identifier)
used_mediatype = schema2_mediatype
else:
path_component = os.path.join(manifest, '1', identifier)
# this is needed for V3Repo which do not have schema2 manifests
else:
path_component = os.path.join(manifest, '1', identifier)
url = base_url + path_component

# perform CDN rewrites and auth
url = cdn_rewrite_redirect_url(url)
url = cdn_auth_token_url(url)

return redirect(url)
serve_content = current_app.config.get(config.KEY_SC_ENABLE)
if serve_content:
base_path = current_app.config.get(config.KEY_SC_CONTENT_DIR_V2)
repo_name = repository.get_pulp_repository_name(name_component)
result = os.path.join(base_path, repo_name, path_component)

try:
return send_file(result, mimetype=used_mediatype,
add_etags=False)
except OSError:
raise exceptions.HTTPError(httplib.NOT_FOUND)
else:
url = base_url + path_component

# perform CDN rewrites and auth
url = cdn_rewrite_redirect_url(url)
url = cdn_auth_token_url(url)
return redirect(url)


@section.errorhandler(exceptions.HTTPError)
Expand Down
26 changes: 26 additions & 0 deletions deployment/apache22.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,19 @@
Deny from all
Allow from localhost
</Location>
# Uncomment this when using 'serve_content'
# <Location /v2/>
# Order Allow,Deny
# Allow from all
# XSendFile on
# XSendFilePath /var/lib/crane/repos/
# </Location>
# <Location /v1/>
# Order Allow,Deny
# Allow from all
# XSendFile on
# XSendFilePath /var/lib/crane/repos/
# </Location>
<Directory /usr/share/crane/>
Order allow,deny
Allow from all
Expand All @@ -22,6 +35,19 @@
# Deny from all
# Allow from localhost
# </Location>
# Uncomment this when using 'serve_content'
# <Location /v2/>
# Order Allow,Deny
# Allow from all
# XSendFile on
# XSendFilePath /var/lib/crane/repos/
# </Location>
# <Location /v1/>
# Order Allow,Deny
# Allow from all
# XSendFile on
# XSendFilePath /var/lib/crane/repos/
# </Location>
# <Directory /usr/share/crane/>
# SSLVerifyClient optional_no_ca
# SSLVerifyDepth 2
Expand Down
Loading

0 comments on commit 03fc5ad

Please sign in to comment.