Skip to content

Commit

Permalink
Merge pull request #12 from sul-dlss/t9-add-flake8
Browse files Browse the repository at this point in the history
Adds flake8 and black
  • Loading branch information
aaron-collier committed Aug 4, 2021
2 parents 7c15cbf + 280ee1a commit db5592a
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 48 deletions.
35 changes: 15 additions & 20 deletions dlme_airflow/dags/aims_harvester.py
@@ -1,15 +1,13 @@
from datetime import timedelta
from textwrap import dedent

# The DAG object; we'll need this to instantiate a DAG
from airflow import DAG

# Operators; we need this to operate!
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago

from harvester.aims import oai
from harvester.aims import oai
from harvester.copydir import copydir

# def print_world():
Expand All @@ -19,13 +17,13 @@
# These args will get passed on to each operator
# You can override them on a per-task basis during operator initialization
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'email': ['airflow@example.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
"owner": "airflow",
"depends_on_past": False,
"email": ["airflow@example.com"],
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=5),
# 'queue': 'bash_queue',
# 'pool': 'backfill',
# 'priority_weight': 10,
Expand All @@ -41,24 +39,21 @@
# 'trigger_rule': 'all_success'
}
with DAG(
'aims_harvester',
"aims_harvester",
default_args=default_args,
description='AIMS Harvester DAG',
description="AIMS Harvester DAG",
schedule_interval=timedelta(days=1),
start_date=days_ago(2),
tags=['example'],
tags=["example"],
) as dag:

# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = PythonOperator(
task_id='aims_harvest',
python_callable=oai
)
t1 = PythonOperator(task_id="aims_harvest", python_callable=oai)

t2 = PythonOperator(
task_id='aims_copy',
task_id="aims_copy",
python_callable=copydir,
op_kwargs={'provider': 'aims'}
op_kwargs={"provider": "aims"}
)

t1 >> t2
t1 >> t2
26 changes: 11 additions & 15 deletions dlme_airflow/dags/aub_harvester.py
@@ -1,5 +1,4 @@
from datetime import timedelta
from textwrap import dedent

# The DAG object; we'll need this to instantiate a DAG
from airflow import DAG
Expand All @@ -13,25 +12,22 @@
# These args will get passed on to each operator
# You can override them on a per-task basis during operator initialization
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'email': ['airflow@example.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
"owner": "airflow",
"depends_on_past": False,
"email": ["airflow@example.com"],
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=5),
}
with DAG(
'aub_harvester',
"aub_harvester",
default_args=default_args,
description='AUB Harvester DAG',
description="AUB Harvester DAG",
schedule_interval=timedelta(days=1),
start_date=days_ago(2),
tags=['aub', 'oai'],
tags=["aub", "oai"],
) as dag:

# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = PythonOperator(
task_id='aub_harvest',
python_callable=harvest
)
t1 = PythonOperator(task_id="aub_harvest", python_callable=harvest)
13 changes: 8 additions & 5 deletions dlme_airflow/harvester/aims.py
@@ -1,20 +1,23 @@
#!/usr/bin/python
import os, requests
import logging
import os
import requests
from lxml import etree
from io import BytesIO

# from sickle import Sickle
# from sickle.iterator import OAIResponseIterator


def oai():
parser = etree.XMLParser(ns_clean=True, load_dtd=False)
xml = requests.get('https://feed.podbean.com/themaghribpodcast/feed.xml').content
xml = requests.get(
"https://feed.podbean.com/themaghribpodcast/feed.xml").content
tree = etree.parse(BytesIO(xml), parser)
directory = "/opt/airflow/output/aims/data/"
os.makedirs(os.path.dirname(directory), exist_ok=True)

for counter, element in enumerate(tree.findall("//item"), start=1):
with open("{}aims-{}.xml".format(directory, counter), "w") as out_file:
out_file.write(etree.tostring(element, encoding='unicode', pretty_print=True))

out_file.write(
etree.tostring(element, encoding="unicode", pretty_print=True)
)
14 changes: 8 additions & 6 deletions dlme_airflow/harvester/aub.py
Expand Up @@ -2,21 +2,23 @@
import os
import logging
from sickle import Sickle
from sickle.iterator import OAIResponseIterator


def harvest():
sickle = Sickle("https://libraries.aub.edu.lb/xtf/oai")
sets = ['aco', 'aladab', 'postcards', 'posters', 'travelbooks']
logging.info('**** BEGIN AUB HARVEST ****')
sets = ["aco", "aladab", "postcards", "posters", "travelbooks"]
logging.info("**** BEGIN AUB HARVEST ****")
for s in sets:
directory = "/tmp/output/aub/{}/data/".format(s)
os.makedirs(os.path.dirname(directory), exist_ok=True)

logging.info("Start {} collection harvest.".format(s))
records = sickle.ListRecords(metadataPrefix='oai_dc', set=s, ignore_deleted=True)
records = sickle.ListRecords(
metadataPrefix="oai_dc", set=s, ignore_deleted=True
)
for counter, record in enumerate(records, start=1):
with open('{}{}-{}.xml'.format(directory, s, counter), 'w') as f:
with open("{}{}-{}.xml".format(directory, s, counter), "w") as f:
f.write(record.raw)
logging.info("End {} collection harvest.".format(s))

logging.info('**** END AUB HARVEST ****')
logging.info("**** END AUB HARVEST ****")
4 changes: 2 additions & 2 deletions dlme_airflow/harvester/copydir.py
@@ -1,9 +1,9 @@
#!/usr/bin/python
import os
import time
import shutil


def copydir(**kwargs):
src = "/opt/airflow/output/{}".format(kwargs.get('provider'))
src = "/opt/airflow/output/{}".format(kwargs.get("provider"))
dst = "/opt/airflow/output/{}".format(time.time())
shutil.copytree(src, dst)
4 changes: 4 additions & 0 deletions requirements.txt
@@ -0,0 +1,4 @@
lxml >= 4.6.3
Sickle >= 0.7.0
flake8 >= 3.9.2
black >= 21.7b0
3 changes: 3 additions & 0 deletions setup.cfg
@@ -0,0 +1,3 @@
[flake8]
max-line-length = 88
extend-ignore = E203

0 comments on commit db5592a

Please sign in to comment.