Skip to content

Commit

Permalink
Add DATABASE_URL option to support MySQL and PostgreSQL backend (#76)
Browse files Browse the repository at this point in the history
  • Loading branch information
my8100 committed Aug 2, 2019
1 parent 79ee83f commit de230b7
Show file tree
Hide file tree
Showing 18 changed files with 432 additions and 43 deletions.
171 changes: 160 additions & 11 deletions .circleci/config.yml
@@ -1,13 +1,17 @@
# Python CircleCI 2.1 configuration file
version: 2.1


orbs:
codecov: codecov/codecov@1.0.2


jobs:
py37: &test-template
docker:
- image: circleci/python:3.7
environment:
SCRAPYDWEB_TESTMODE: True

working_directory: ~/repo

Expand All @@ -18,7 +22,15 @@ jobs:
use-git:
type: boolean
default: false

use-mysql:
type: boolean
default: false
use-postgresql:
type: boolean
default: false
use-sqlite:
type: boolean
default: false
steps:
- run:
name: Install telnet
Expand All @@ -32,14 +44,14 @@ jobs:
ls -l ~
- checkout

- when:
condition: <<parameters.is-py27>>
steps:
- run:
name: Create virtual env in PY2
command: |
virtualenv ./venv
- unless:
condition: <<parameters.is-py27>>
steps:
Expand All @@ -48,6 +60,61 @@ jobs:
command: |
python3 -m venv venv
- when:
condition: <<parameters.use-mysql>>
steps:
- run:
# mysql -h 127.0.0.1 -u root -prootpw -e "create database scrapydweb_apscheduler"
name: Waiting for MySQL to be ready
command: |
for i in `seq 1 10`;
do
nc -z 127.0.0.1 3306 && echo Success && exit 0
echo -n .
sleep 1
done
echo Failed waiting for MySQL && exit 1
- run:
name: Install MySQL CLI; Import dummy data; run an example query
command: |
# sudo apt-get install default-mysql-client
# mysql -h 127.0.0.1 -u user -ppassw0rd test_db < sql-data/dummy.sql
# mysql -h 127.0.0.1 -u user -ppassw0rd --execute="SELECT * FROM test_db.Persons"
# https://discuss.circleci.com/t/how-can-i-create-multiple-mysql-databases-in-the-same-docker-image/24762
# mysql -h 127.0.0.1 -u root -prootpw -e "create database scrapydweb_apscheduler"
# mysql -h 127.0.0.1 -u root -prootpw -e "create database scrapydweb_timertasks"
# mysql -h 127.0.0.1 -u root -prootpw -e "create database scrapydweb_metadata"
# mysql -h 127.0.0.1 -u root -prootpw -e "create database scrapydweb_jobs"
- run:
name: Set DATABASE_URL to mysql
command: |
# mysql://user:passw0rd@127.0.0.1:3306
printf "\nDATABASE_URL = '"$DATABASE_URL"'\n" >> scrapydweb_settings_v8.py
cat scrapydweb_settings_v8.py
- when:
condition: <<parameters.use-postgresql>>
steps:
- run:
name: Setup PSQL Databases
command: |
# https://discuss.circleci.com/t/multiple-postgres-databases-in-circleci-2-0/23089
# createdb: could not connect to database template1: FATAL: role "circleci" does not exist
# sudo apt install -y postgresql-client
# createdb -h localhost scrapydweb_apscheduler -O circleci
- run:
name: Set DATABASE_URL to postgresql
command: |
# postgres://circleci@127.0.0.1:5432
printf "\nDATABASE_URL = '"$DATABASE_URL"'\n" >> scrapydweb_settings_v8.py
cat scrapydweb_settings_v8.py
- when:
condition: <<parameters.use-sqlite>>
steps:
- run:
name: Set DATABASE_URL to sqlite
command: |
printf "\nDATABASE_URL = '"$DATABASE_URL"'\n" >> scrapydweb_settings_v8.py
cat scrapydweb_settings_v8.py
- run:
name: Install dependencies
command: |
Expand Down Expand Up @@ -75,6 +142,7 @@ jobs:
command: |
cd ~
printf "[scrapyd]\nusername = admin\npassword = 12345\n" > scrapyd.conf
cat scrapyd.conf
nohup ~/repo/venv/bin/scrapyd > ~/scrapyd.log 2>&1 &
sleep 5
cat ~/scrapyd.log
Expand All @@ -83,6 +151,7 @@ jobs:
- run:
name: Run tests
command: |
ls -l
. venv/bin/activate
flake8 . --count --exclude=./venv* --select=E9,F63,F7,F82 --show-source --statistics
coverage erase
Expand All @@ -91,6 +160,7 @@ jobs:
- run:
name: Generate report
command: |
echo $DATABASE_URL
. venv/bin/activate
coverage report
coverage html
Expand All @@ -108,29 +178,108 @@ jobs:
<<: *test-template
docker:
- image: circleci/python:2.7

py35:
py27-sqlite:
<<: *test-template
docker:
- image: circleci/python:2.7
environment:
SCRAPYDWEB_TESTMODE: True
DATABASE_URL: 'sqlite:////home/circleci/repo/scrapydweb_database'
py27-postgresql:
<<: *test-template
docker:
- image: circleci/python:2.7
# CircleCI PostgreSQL images available at: https://hub.docker.com/r/circleci/postgres/
- image: circleci/postgres:9.6-alpine
environment:
POSTGRES_USER: circleci
# psycopg2.OperationalError: FATAL: database "circleci" does not exist
# https://discuss.circleci.com/t/django-postgresql-and-circleci/15032
POSTGRES_DB: circleci
POSTGRES_PASSWORD: passw0rd
environment:
SCRAPYDWEB_TESTMODE: True
DATABASE_URL: 'postgres://circleci:fakepassword@localhost:5432'
py27-mysql:
<<: *test-template
docker:
- image: circleci/python:3.5
- image: circleci/python:2.7
# https://circleci.com/docs/2.0/postgres-config/#example-mysql-project
# Plugin caching_sha2_password could not be loaded
# - image: circleci/mysql:8.0.4
# https://circleci.com/docs/2.0/circleci-images/#mysql
- image: circleci/mysql:5.7.25
environment:
MYSQL_ROOT_PASSWORD: rootpw
MYSQL_DATABASE: test_db
MYSQL_USER: user
MYSQL_PASSWORD: passw0rd
environment:
SCRAPYDWEB_TESTMODE: True
DATABASE_URL: 'mysql://root:rootpw@127.0.0.1:3306'

py36:
py36-sqlite:
<<: *test-template
docker:
- image: circleci/python:3.6

py37-git:
environment:
SCRAPYDWEB_TESTMODE: True
DATABASE_URL: 'sqlite:////home/circleci/repo/scrapydweb_database'
py37-git-postgresql:
<<: *test-template
docker:
- image: circleci/python:3.7
# CircleCI PostgreSQL images available at: https://hub.docker.com/r/circleci/postgres/
- image: circleci/postgres:9.6-alpine
environment:
POSTGRES_USER: circleci
# psycopg2.OperationalError: FATAL: database "circleci" does not exist
# https://discuss.circleci.com/t/django-postgresql-and-circleci/15032
POSTGRES_DB: circleci
POSTGRES_PASSWORD: passw0rd
environment:
SCRAPYDWEB_TESTMODE: True
DATABASE_URL: 'postgres://circleci:fakepassword@localhost:5432'
py37-git-mysql:
<<: *test-template
docker:
- image: circleci/python:3.7
# https://circleci.com/docs/2.0/postgres-config/#example-mysql-project
# Plugin caching_sha2_password could not be loaded
# - image: circleci/mysql:8.0.4
# https://circleci.com/docs/2.0/circleci-images/#mysql
- image: circleci/mysql:5.7.25
environment:
MYSQL_ROOT_PASSWORD: rootpw
MYSQL_DATABASE: test_db
MYSQL_USER: user
MYSQL_PASSWORD: passw0rd
environment:
SCRAPYDWEB_TESTMODE: True
DATABASE_URL: 'mysql://root:rootpw@127.0.0.1:3306'


workflows:
test:
jobs:
- py27:
is-py27: true
# - py35
- py36
- py27-sqlite:
is-py27: true
use-sqlite: true
- py27-postgresql:
is-py27: true
use-postgresql: true
- py27-mysql:
is-py27: true
use-mysql: true

- py36-sqlite:
use-postgresql: true
- py37
- py37-git:
- py37-git-postgresql:
use-git: true
use-postgresql: true
- py37-git-mysql:
use-git: true
use-mysql: true
3 changes: 3 additions & 0 deletions requirements-tests.txt
Expand Up @@ -7,3 +7,6 @@ coveralls

scrapy
scrapyd

pymysql>=0.9.3
psycopg2>=2.7.7
3 changes: 2 additions & 1 deletion scrapydweb/__init__.py
Expand Up @@ -106,8 +106,9 @@ def regex_replace(s, find, replace):


def handle_db(app):
app.config['SQLALCHEMY_BINDS'] = SQLALCHEMY_BINDS
# https://flask-sqlalchemy.palletsprojects.com/en/master/config/
app.config['SQLALCHEMY_DATABASE_URI'] = SQLALCHEMY_DATABASE_URI
app.config['SQLALCHEMY_BINDS'] = SQLALCHEMY_BINDS
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False # https://stackoverflow.com/a/33790196/10517783
app.config['SQLALCHEMY_ECHO'] = True # http://flask-sqlalchemy.pocoo.org/2.3/config/

Expand Down
3 changes: 2 additions & 1 deletion scrapydweb/common.py
Expand Up @@ -34,7 +34,8 @@ def find_scrapydweb_settings_py(filename, path, prevpath=None):
cfgfile = os.path.join(path, filename)
if os.path.exists(cfgfile):
return cfgfile
return find_scrapydweb_settings_py(filename, os.path.dirname(path), path)
# In vars.py, try to import module scrapydweb_settings_vN in cwd only
# return find_scrapydweb_settings_py(filename, os.path.dirname(path), path)


def get_now_string(allow_space=False):
Expand Down
11 changes: 11 additions & 0 deletions scrapydweb/default_settings.py
Expand Up @@ -308,3 +308,14 @@
# The default is False, set it to True to change the logging level from WARNING to DEBUG
# for getting more information about how ScrapydWeb works, especially while debugging.
VERBOSE = False

# The default is '', which means saving data of Jobs and Timer Tasks in the Python directory using SQLite.
# The data could be also saved in MySQL or PostgreSQL backend in order to improve concurrency.
# To use MySQL backend, run command: pip install --upgrade pymysql
# To use PostgreSQL backend, run command: pip install --upgrade psycopg2
# e.g.
# 'mysql://username:password@127.0.0.1:3306'
# 'postgres://username:password@127.0.0.1:5432'
# 'sqlite:///c:/Users/username'
# 'sqlite:////home/username'
DATABASE_URL = ''
15 changes: 9 additions & 6 deletions scrapydweb/models.py
Expand Up @@ -24,7 +24,7 @@ class Metadata(db.Model):
logparser_pid = db.Column(db.Integer, unique=False, nullable=True)
poll_pid = db.Column(db.Integer, unique=False, nullable=True)
pageview = db.Column(db.Integer, unique=False, nullable=False, default=0)
url_scrapydweb = db.Column(db.String(1000), unique=False, nullable=False, default='http://127.0.0.1:5000')
url_scrapydweb = db.Column(db.Text(), unique=False, nullable=False, default='http://127.0.0.1:5000')
url_jobs = db.Column(db.String(255), unique=False, nullable=False, default='/1/jobs/')
url_schedule_task = db.Column(db.String(255), unique=False, nullable=False, default='/1/schedule/task/')
url_delete_task_result = db.Column(db.String(255), unique=False, nullable=False, default='/1/tasks/xhr/delete/1/1/')
Expand Down Expand Up @@ -63,8 +63,8 @@ class Job(db.Model):
start = db.Column(db.DateTime, unique=False, nullable=True, index=True)
runtime = db.Column(db.String(20), unique=False, nullable=True)
finish = db.Column(db.DateTime, unique=False, nullable=True, index=True) # Finished
href_log = db.Column(db.String(1000), unique=False, nullable=True)
href_items = db.Column(db.String(1000), unique=False, nullable=True)
href_log = db.Column(db.Text(), unique=False, nullable=True)
href_items = db.Column(db.Text(), unique=False, nullable=True)

def __repr__(self):
return "<Job #%s in table %s, %s/%s/%s start: %s>" % (
Expand Down Expand Up @@ -99,8 +99,8 @@ class Task(db.Model):
version = db.Column(db.String(255), unique=False, nullable=False)
spider = db.Column(db.String(255), unique=False, nullable=False)
jobid = db.Column(db.String(255), unique=False, nullable=False)
settings_arguments = db.Column(db.String(2000), unique=False, nullable=False)
selected_nodes = db.Column(db.String(1000), unique=False, nullable=False)
settings_arguments = db.Column(db.Text(), unique=False, nullable=False)
selected_nodes = db.Column(db.Text(), unique=False, nullable=False)

year = db.Column(db.String(255), unique=False, nullable=False)
month = db.Column(db.String(255), unique=False, nullable=False)
Expand Down Expand Up @@ -154,7 +154,10 @@ class TaskJobResult(db.Model):
server = db.Column(db.String(255), unique=False, nullable=False) # '127.0.0.1:6800'
status_code = db.Column(db.Integer, unique=False, nullable=False) # -1, 200
status = db.Column(db.String(9), unique=False, nullable=False) # ok|error|exception
result = db.Column(db.String(1000), unique=False, nullable=False) # jobid|message|exception
# psycopg2.DataError) value too long for type character varying(1000)
# https://docs.sqlalchemy.org/en/latest/core/type_basics.html#sqlalchemy.types.Text
# In general, TEXT objects do not have a length
result = db.Column(db.Text(), unique=False, nullable=False) # jobid|message|exception

def __repr__(self):
kwargs = dict(
Expand Down
3 changes: 1 addition & 2 deletions scrapydweb/run.py
Expand Up @@ -11,7 +11,7 @@
from scrapydweb import create_app
from scrapydweb.__version__ import __description__, __version__
from scrapydweb.common import authenticate, find_scrapydweb_settings_py, handle_metadata, handle_slash
from scrapydweb.vars import ROOT_DIR, SCHEDULER_STATE_DICT, STATE_PAUSED, STATE_RUNNING
from scrapydweb.vars import ROOT_DIR, SCRAPYDWEB_SETTINGS_PY, SCHEDULER_STATE_DICT, STATE_PAUSED, STATE_RUNNING
from scrapydweb.utils.check_app_config import check_app_config


Expand All @@ -20,7 +20,6 @@

STAR = '\n%s\n' % ('*' * 100)
DEFAULT_SETTINGS_PY_PATH = os.path.join(ROOT_DIR, 'default_settings.py')
SCRAPYDWEB_SETTINGS_PY = 'scrapydweb_settings_v8.py'


def main():
Expand Down
4 changes: 4 additions & 0 deletions scrapydweb/templates/scrapydweb/settings.html
Expand Up @@ -152,6 +152,10 @@ <h3>System</h3>
<ul class="collapse">
<li><div class="title"><h4>DEBUG = {{ DEBUG }}</h4></div></li>
<li><div class="title"><h4>VERBOSE = {{ VERBOSE }}</h4></div></li>
<li>
<div class="title"><h4>DATABASE</h4><i class="iconfont icon-right"></i></div>
<pre>{{ database_details }}</pre>
</li>
</ul>
</div>

Expand Down

0 comments on commit de230b7

Please sign in to comment.