Skip to content
This repository has been archived by the owner on Nov 10, 2017. It is now read-only.

Commit

Permalink
Merge pull request #8 from scrapinghub/sc631
Browse files Browse the repository at this point in the history
Add list-spiders cmd to shub-image
  • Loading branch information
chekunkov committed Jun 17, 2016
2 parents a83b177 + 1bf7974 commit 77e013e
Show file tree
Hide file tree
Showing 13 changed files with 337 additions and 91 deletions.
7 changes: 3 additions & 4 deletions shub_image/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@
@click.option("-d", "--debug", help="debug mode", is_flag=True)
@click.option("--version", help="release version")
def cli(target, debug, version):
build_cmd(target, debug, version)
build_cmd(target, version)


def build_cmd(target, debug, version):
def build_cmd(target, version):
client = utils.get_docker_client()
project_dir = utils.get_project_dir()
config = utils.load_release_config()
Expand All @@ -47,8 +47,7 @@ def build_cmd(target, debug, version):
for line in client.build(path=project_dir, tag=image_name):
data = json.loads(line)
if 'stream' in data:
if debug:
click.echo("{}".format(data['stream'][:-1]))
utils.debug_log("{}".format(data['stream'][:-1]))
is_built = re.search(
r'Successfully built ([0-9a-f]+)', data['stream'])
elif 'error' in data:
Expand Down
30 changes: 10 additions & 20 deletions shub_image/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from shub.deploy import list_targets
from shub_image import utils
from shub_image import list as list_mod


VALIDSPIDERNAME = re.compile('^[a-z0-9][-._a-z0-9]+$', re.I)
Expand Down Expand Up @@ -49,21 +50,21 @@
@click.option("--email", help="docker registry email")
@click.option("--async", is_flag=True, help="enable asynchronous mode")
def cli(target, debug, version, username, password, email, async):
deploy_cmd(target, debug, version, username, password, email, async)
deploy_cmd(target, version, username, password, email, async)


def deploy_cmd(target, debug, version, username, password, email, async):
def deploy_cmd(target, version, username, password, email, async):
config = utils.load_release_config()
project, endpoint, apikey = config.get_target(target)
image = config.get_image(target)
version = version or config.get_version()
image_name = utils.format_image_name(image, version)

params = _prepare_deploy_params(
project, version, image_name,
project, version, image_name, endpoint, apikey,
username, password, email)
if debug:
click.echo('Deploy with params: {}'.format(params))

utils.debug_log('Deploy with params: {}'.format(params))
req = requests.post(
urljoin(endpoint, '/api/releases/deploy.json'),
data=params,
Expand Down Expand Up @@ -109,15 +110,16 @@ def _check_status_url(status_url):
return status_req.json()


def _prepare_deploy_params(project, version, image_name,
def _prepare_deploy_params(project, version, image_name, endpoint, apikey,
username, password, email):
spiders = _extract_spiders_from_project()
# Reusing shub_image.list logic to get spiders list
spiders = list_mod.list_cmd(image_name, project, endpoint, apikey)
scripts = _extract_scripts_from_project()
params = {'project': project,
'version': version,
'image_url': image_name}
if spiders:
params['spiders'] = spiders
params['spiders'] = ','.join(spiders)
if scripts:
params['scripts'] = scripts
if not username:
Expand All @@ -130,18 +132,6 @@ def _prepare_deploy_params(project, version, image_name,
return params


def _extract_spiders_from_project():
spiders = []
try:
raw_output = subprocess.check_output(["scrapy", "list"])
spiders = sorted(filter(
VALIDSPIDERNAME.match, raw_output.splitlines()))
except subprocess.CalledProcessError as exc:
click.echo(
"Can't extract spiders from project:\n{}".format(exc.output))
return ','.join(spiders)


def _extract_scripts_from_project(setup_filename='setup.py'):
"""Parse setup.py and return scripts"""
if not os.path.isfile(setup_filename):
Expand Down
121 changes: 121 additions & 0 deletions shub_image/list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import os
import re
import json
import click
import requests
from urlparse import urljoin

from shub import exceptions as shub_exceptions
from shub.deploy import list_targets
from shub_image import utils


SETTING_TYPES = ['project_settings',
'organization_settings',
'enabled_addons']

SHORT_HELP = 'List spiders.'

HELP = """
List command tries to run your image locally and get a spiders list.
Internally, this command is a simple wrapper to `docker run` and uses
docker daemon on your system to run a new container using your image.
Before creating the container, there's a Dash call to get your project
settings to get your spiders list properly (respecting SPIDERS_MODULE
setting, etc).
Image should be set via scrapinghub.yml, section "images". If version is not
provided, the tool uses VCS-based stamp over project directory (the same as
shub utils itself).
"""


@click.command(help=HELP, short_help=SHORT_HELP)
@click.argument("target", required=False, default="default")
@click.option("-l", "--list-targets", help="list available targets",
is_flag=True, is_eager=True, expose_value=False,
callback=list_targets)
@click.option("-d", "--debug", help="debug mode", is_flag=True)
@click.option("-s", "--silent", help="silent mode", is_flag=True)
@click.option("--version", help="release version")
def cli(target, debug, silent, version):
list_cmd_full(target, silent, version)


def list_cmd_full(target, silent, version):
config = utils.load_release_config()
image = config.get_image(target)
version = version or config.get_version()
image_name = utils.format_image_name(image, version)
project, endpoint, apikey = None, None, None
try:
project, endpoint, apikey = config.get_target(target)
except shub_exceptions.BadParameterException as exc:
if 'Could not find target' not in exc.message:
raise
if not silent:
click.echo(
"Not found project for target {}, "
"not getting project settings from Dash.".format(target))
spiders = list_cmd(image_name, project, endpoint, apikey)
for spider in spiders:
click.echo(spider)


def list_cmd(image_name, project, endpoint, apikey):
"""Short version of list cmd to use with deploy cmd."""

settings = {}
if project:
settings = _get_project_settings(project, endpoint, apikey)

# Run a local docker container to run list-spiders cmd
status_code, logs = _run_list_cmd(project, image_name, settings)
if status_code != 0:
click.echo(logs)
raise shub_exceptions.ShubException(
'Container with list cmd exited with code %s' % status_code)

spiders = utils.valid_spiders(logs)
return spiders


def _get_project_settings(project, endpoint, apikey):
utils.debug_log('Getting settings for {} project:'.format(project))
req = requests.get(
urljoin(endpoint, '/api/settings/get.json'),
params={'project': project},
auth=(apikey, ''),
timeout=300,
allow_redirects=False
)
req.raise_for_status()
utils.debug_log("Response: {}".format(req.json()))
return {k: v for k, v in req.json().items() if k in SETTING_TYPES}


def _run_list_cmd(project, image_name, project_settings):
"""Run `scrapy list` command inside the image container."""

client = utils.get_docker_client()
# FIXME we should pass some value for SCRAPY_PROJECT_ID anyway
# to handle `scrapy list` cmd properly via sh_scrapy entrypoint
project = str(project) if project else ''
job_settings = json.dumps(project_settings)
container = client.create_container(
image=image_name,
command=['list-spiders'],
environment={'SCRAPY_PROJECT_ID': project,
'JOB_SETTINGS': job_settings})
if 'Id' not in container:
raise shub_exceptions.ShubException(
"Create container error:\n %s" % container)

client.start(container)
statuscode = client.wait(container=container['Id'])

return statuscode, client.logs(
container=container['Id'],
stdout=True, stderr=True if statuscode else False,
stream=False, timestamps=False)
10 changes: 5 additions & 5 deletions shub_image/push.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@
@click.option("--password", help="docker registry password")
@click.option("--email", help="docker registry email")
def cli(target, debug, version, username, password, email):
push_cmd(target, debug, version, username, password, email)
push_cmd(target, version, username, password, email)


def push_cmd(target, debug, version, username, password, email):
def push_cmd(target, version, username, password, email):
client = utils.get_docker_client()
config = utils.load_release_config()
image = config.get_image(target)
Expand All @@ -45,9 +45,9 @@ def push_cmd(target, debug, version, username, password, email):
for line in client.push(image_name, stream=True,
insecure_registry=not bool(username)):
data = json.loads(line)
if 'status' in data and debug:
click.echo("Logs:{} {}".format(data['status'],
data.get('progress')))
if 'status' in data:
utils.debug_log("Logs:{} {}".format(data['status'],
data.get('progress')))
if 'error' in data:
click.echo("Error {}: {}".format(data['error'],
data['errorDetail']))
Expand Down
37 changes: 23 additions & 14 deletions shub_image/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
HELP = """ TODO """

SH_EP_SCRAPY_WARNING = \
'You should add scrapinghub-entrypoint-scrapy dependency to your' \
' requirements.txt or to Dockerfile to run the image with Scrapy Cloud\n' \
' (git+https://github.com/scrapinghub/scrapinghub-entrypoint-scrapy.git)'
'You should add scrapinghub-entrypoint-scrapy(>=0.7.0) dependency' \
' to your requirements.txt or to Dockerfile to run the image with' \
' Scrapy Cloud.'


@click.command(help=HELP, short_help=SHORT_HELP)
Expand All @@ -28,51 +28,60 @@ def cli(target, debug, version):
docker_client = utils.get_docker_client()
for check in [_check_image_exists,
_check_start_crawl_entry,
_check_list_spiders_entry,
_check_sh_entrypoint]:
check(image_name, docker_client, debug)
check(image_name, docker_client)


def _check_image_exists(image_name, docker_client, debug):
def _check_image_exists(image_name, docker_client):
"""Check that the image exists on local machine."""
# if there's no docker lib, the command will fail earlier
# with an exception when getting a client in get_docker_client()
from docker.errors import NotFound
try:
docker_client.inspect_image(image_name)
except NotFound as exc:
if debug:
click.echo("{}".format(exc))
utils.debug_log("{}".format(exc))
raise shub_exceptions.NotFoundException(
"The image doesn't exist yet, please use build command at first.")


def _check_sh_entrypoint(image_name, docker_client, debug):
def _check_sh_entrypoint(image_name, docker_client):
"""Check that the image has scrapinghub-entrypoint-scrapy pkg"""
status, logs = _run_docker_command(
docker_client, image_name,
['pip', 'show', 'scrapinghub-entrypoint-scrapy'], debug)
['pip', 'show', 'scrapinghub-entrypoint-scrapy'])
if status != 0 or not logs:
raise shub_exceptions.NotFoundException(SH_EP_SCRAPY_WARNING)


def _check_start_crawl_entry(image_name, docker_client, debug):
def _check_list_spiders_entry(image_name, docker_client):
"""Check that the image has list-spiders entrypoint"""
status, logs = _run_docker_command(
docker_client, image_name, ['which', 'list-spiders'])
if status != 0 or not logs:
raise shub_exceptions.NotFoundException(
"list-spiders command is not found in the image.\n"
"Please upgrade your scrapinghub-entrypoint-scrapy(>=0.7.0)")


def _check_start_crawl_entry(image_name, docker_client):
"""Check that the image has start-crawl entrypoint"""
status, logs = _run_docker_command(
docker_client, image_name, ['which', 'start-crawl'], debug)
docker_client, image_name, ['which', 'start-crawl'])
if status != 0 or not logs:
raise shub_exceptions.NotFoundException(
"start-crawl command is not found in the image.\n"
+ SH_EP_SCRAPY_WARNING)


def _run_docker_command(client, image_name, command, debug):
def _run_docker_command(client, image_name, command):
"""A helper to execute an arbitrary cmd with given docker image"""
container = client.create_container(image=image_name, command=command)
client.start(container)
statuscode = client.wait(container=container['Id'])
logs = client.logs(container=container['Id'], stdout=True,
stderr=True if statuscode else False,
stream=False, timestamps=False)
if debug:
click.echo("{} results:\n{}".format(command, logs))
utils.debug_log("{} results:\n{}".format(command, logs))
return statuscode, logs
5 changes: 3 additions & 2 deletions shub_image/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ def cli():
module_deps = {
"init": [],
"build": ["docker"],
"list": ["docker"],
"test": ["docker"],
"push": ["docker"],
"deploy": ["scrapy"],
"upload": ["scrapy", "docker"],
"deploy": ["docker"],
"upload": ["docker"],
"check": [],
}

Expand Down
6 changes: 3 additions & 3 deletions shub_image/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@
@click.option("--email", help="docker registry email")
@click.option("--async", is_flag=True, help="enable asynchronous mode")
def cli(target, debug, version, username, password, email, async):
build.build_cmd(target, debug, version)
push.push_cmd(target, debug, version, username, password, email)
deploy.deploy_cmd(target, debug, version, username, password, email, async)
build.build_cmd(target, version)
push.push_cmd(target, version, username, password, email)
deploy.deploy_cmd(target, version, username, password, email, async)
19 changes: 19 additions & 0 deletions shub_image/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
import re
import json
import click
import importlib

Expand All @@ -11,6 +13,13 @@

DEFAULT_DOCKER_VERSION = '1.17'
STATUS_FILE_LOCATION = '.releases'
_VALIDSPIDERNAME = re.compile('^[a-z0-9][-._a-z0-9]+$', re.I)


def debug_log(msg):
ctx = click.get_current_context(True)
if ctx and ctx.params.get('debug'):
click.echo(msg)


class ReleaseConfig(shub_config.ShubConfig):
Expand Down Expand Up @@ -170,3 +179,13 @@ def _update_status_file(data, path):
""" Save status file with updated data """
with open(path, 'w') as status_file:
yaml.dump(data, status_file, default_flow_style=False)


def valid_spiders(buf):
"""Filter out garbage and only let valid spider names in
>>> _valid_spiders('Update rootfs\\nsony.com\\n\\nsoa-uk\\n182-blink.com')
['182-blink.com', 'soa-uk', 'sony.com']
>>> _valid_spiders('-spiders\\nA77aque')
['A77aque']
"""
return sorted(filter(_VALIDSPIDERNAME.match, buf.splitlines()))

0 comments on commit 77e013e

Please sign in to comment.