Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 5.0.0-rc3
current_version = 5.0.0-rc4
tag_name = {new_version}
commit = True
tag = True
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docker-base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
with:
context: .
file: ./Dockerfile.base
platforms: linux/amd64,linux/arm64
platforms: linux/amd64 #,linux/arm64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ repos:
- id: mixed-line-ending
args: ["--fix=lf"]
- id: trailing-whitespace
exclude: ".bumpversion.cfg" # wtf
exclude: ".bumpversion.cfg" # wtf

# - repo: https://github.com/asottile/pyupgrade
# rev: v3.10.1
Expand Down Expand Up @@ -79,7 +79,7 @@ repos:
rev: 1.9.0
hooks:
- id: poetry-export
args: ["--without-hashes", "-o", "requirements.txt"]
args: ["--without-hashes", "--with", "main", "-o", "requirements.txt"]
- id: poetry-export
args:
["--without-hashes", "--only", "dev", "-o", "requirements-dev.txt"]
9 changes: 4 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@ FROM ghcr.io/openaleph/ingest-file-base:latest

# uncomment when running on Apple Silicon
# ENV LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1
ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libgomp.so.1

COPY . /ingestors
RUN rm -rf /ingestors/tests
WORKDIR /ingestors
RUN pip3 install --no-cache-dir -r /ingestors/requirements.txt
RUN pip3 install --no-cache-dir /ingestors

RUN pip3 install --no-cache-dir --no-deps -r /ingestors/requirements.txt
RUN pip3 install --no-deps --no-cache-dir /ingestors

ENV ARCHIVE_TYPE=file \
ARCHIVE_PATH=/data \
Expand All @@ -17,7 +19,4 @@ ENV ARCHIVE_TYPE=file \

ENV PROCRASTINATE_APP="ingestors.tasks.app"

RUN chmod +x /ingestors/docker-entrypoint.sh

ENTRYPOINT [ "/ingestors/docker-entrypoint.sh" ]
CMD ["procrastinate", "worker", "-q", "ingest"]
12 changes: 9 additions & 3 deletions Dockerfile.base
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN apt-get -qq -y update \
# python deps (mostly to install their dependencies)
python3-pip python3-dev python3-pil \
# tesseract
tesseract-ocr libtesseract-dev libleptonica-dev pkg-config\
tesseract-ocr libtesseract-dev libleptonica-dev pkg-config \
# libraries
libxslt1-dev libpq-dev libldap2-dev libsasl2-dev \
zlib1g-dev libicu-dev libxml2-dev \
Expand Down Expand Up @@ -116,6 +116,14 @@ ENV LANG='en_US.UTF-8' \
OMP_THREAD_LIMIT='1' \
OPENBLAS_NUM_THREADS='1'

# force compile tesserocr 2.6.2 with C++ 14
# to make it compatible with Tesseract 5
RUN pip download --no-binary=:all: "tesserocr==2.6.2" \
&& tar -xzf tesserocr-2.6.2.tar.gz \
&& sed -i "s/-std=c++11/-std=c++14/" tesserocr-2.6.2/setup.py \
&& cd tesserocr-2.6.2 \
&& CXXFLAGS="-std=c++14" pip install --no-cache-dir .

# tesseract 5
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata

Expand All @@ -127,5 +135,3 @@ RUN pip3 install --no-cache-dir --prefer-binary --upgrade setuptools wheel

# Install PyICU
RUN pip3 install --no-binary=:pyicu: pyicu
# Install TesserOCR
RUN pip3 install --no-binary=:tesserocr: tesserocr
15 changes: 8 additions & 7 deletions Dockerfile.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,25 @@ FROM ghcr.io/openaleph/ingest-file-base:latest

# uncomment when running on Apple Silicon
# ENV LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1
ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libgomp.so.1

COPY . /ingestors
WORKDIR /ingestors
RUN pip3 install --no-cache-dir -r /ingestors/requirements.txt
RUN pip3 install --no-cache-dir /ingestors

RUN pip3 install --no-cache-dir -r /ingestors/requirements-dev.txt
RUN pip3 install --no-cache-dir procrastinate==3.2.2 # FIXME
RUN pip3 install --no-cache-dir --no-deps -r /ingestors/requirements.txt
RUN pip3 install --no-deps --no-cache-dir /ingestors

RUN pip3 install --no-deps -r /ingestors/requirements-dev.txt
RUN pip3 install --no-cache-dir procrastinate==3.2.2
RUN chown -R app:app /ingestors

ENV ARCHIVE_TYPE=file \
ARCHIVE_PATH=/data \
FTM_STORE_URI=postgresql://aleph:aleph@postgres/aleph \
REDIS_URL=redis://redis:6379/0 \
TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata
TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata \
DEBUG=1

ENV PROCRASTINATE_APP="ingestors.tasks.app"

USER app
ENTRYPOINT [ "/ingestors/docker-entrypoint.sh" ]
CMD ["pytest"]
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ build-cache:
docker build . --cache-from ghcr.io/openaleph/ingest-file:cache -t ghcr.io/openaleph/ingest-file:cache

build-test:
docker build . -f Dockerfile.test -t ghcr.io/openaleph/ingest-file:test
$(COMPOSE) build test-ingest-file

build-macos:
DOCKER_BUILDKIT=0 COMPOSE_DOCKER_CLI_BUILD=0 $(COMPOSE) build --no-rm --parallel
Expand All @@ -39,7 +39,7 @@ format-check:
black --check .

test: build-test services
PYTHONDEVMODE=1 PYTHONTRACEMALLOC=1 $(COMPOSE) run -e DEBUG=1 --rm ingest-file pytest --cov=ingestors --cov-report html --cov-report term
PYTHONDEVMODE=1 PYTHONTRACEMALLOC=1 $(COMPOSE) run --rm test-ingest-file pytest

test-e2e: build services
$(COMPOSE_E2E) run --rm ingest-file
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5.0.0-rc3
5.0.0-rc4
29 changes: 24 additions & 5 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ services:

ingest-file:
build:
dockerfile: Dockerfile.test
dockerfile: Dockerfile
hostname: ingest
tmpfs:
- /tmp:mode=777
Expand All @@ -21,15 +21,34 @@ services:
OPENALEPH_DB_URI: postgresql://ingest:ingest@postgres/ingest
FTM_STORE_URI: postgresql://ingest:ingest@postgres/ingest
LOG_FORMAT: TEXT # TEXT or JSON
# LD_PRELOAD: /usr/lib/x86_64-linux-gnu/libgomp.so.1
LD_PRELOAD: /usr/lib/aarch64-linux-gnu/libgomp.so.1
entrypoint: ["/bin/sh", "-c", "echo 'opal-procrastinate init-db' && exec \"$@\"", "--"]
volumes:
- "./ingestors:/ingestors/ingestors"
- "./tests:/ingestors/tests"
- "./data:/ingestors/data"
- "./requirements.txt:/ingestors/requirements.txt"
- "./setup.py:/ingestors/setup.py"
- "~:/host"
depends_on:
- postgres
- redis

test-ingest-file:
build:
context: .
dockerfile: Dockerfile.test
image: test-ingest-file
hostname: ingest
tmpfs:
- /tmp:mode=777
- /data:mode=777
environment:
OPENALEPH_DB_URI: postgresql://ingest:ingest@postgres/ingest
FTM_STORE_URI: postgresql://ingest:ingest@postgres/ingest
LOG_FORMAT: TEXT # TEXT or JSON
DEBUG: 1
entrypoint: ["/bin/sh", "-c", "echo 'opal-procrastinate init-db' && exec \"$@\"", "--"]
volumes:
- "./tests:/ingestors/tests"
- "./data:/ingestors/data"
depends_on:
- postgres
- redis
2 changes: 1 addition & 1 deletion ingestors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from anystore.logging import configure_logging, get_logger
from procrastinate import cli as procrastinate_cli

__version__ = "5.0.0-rc3"
__version__ = "5.0.0-rc4"

configure_logging()

Expand Down
Loading
Loading