-
Notifications
You must be signed in to change notification settings - Fork 0
133 lines (127 loc) · 4.65 KB
/
generate_cat_entry.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# This workflow runs on a merge/commit to the main branch
# The goal is to extract metadata for any *new* datasets/files contained in the diff, and add this to the catalog
# The first steps are:
# - set up git config
# - install git annex, python, datalad, metalad
# - TODO: install whichever extensions are needed for metadata extraction (this would be specific to the type of data contained in this overall repository+catalog)
# Then we find the diff and:
# - check if new datasets were added
# - check for updates to existing datasets
# Then extract metadata for new/updated datasets
# Then translate extracted metadata to catalog schema
# Then checkout catalog from gh-pages branch, and add translated metadata to catalog
# Then push updates back to gh-pages branch
name: generate_cat_entry
on:
push:
branches:
- main
jobs:
check:
runs-on: ubuntu-latest
outputs:
run_job: ${{ steps.check_files.outputs.run_job }}
subds: ${{ steps.check_files.outputs.subds }}
steps:
- name: Set up environment
run: |
git config --global user.email "test@github.land"
git config --global user.name "GitHub Almighty"
# - name: Install git annex
# shell: bash
# run: |
# bash <(wget -q -O- http://neuro.debian.net/_files/neurodebian-travis.sh)
# sudo apt-get update -qq
# sudo apt-get install eatmydata
# sudo eatmydata apt-get install git-annex-standalone
- name: Set up Python 3.9
uses: actions/setup-python@v1
with:
python-version: 3.9
- name: Install datalad
run: |
python -m pip install --upgrade pip
pip install --upgrade datalad
- name: Checkout main
uses: actions/checkout@v3
with:
ref: main
fetch-depth: 0
- name: check datalad diff
id: check_files
run: |
echo "========== check datalad diff =========="
datalad diff --from ${{ github.event.before }} --to ${{ github.event.after }}
datalad_diffs=$(datalad diff --from ${{ github.event.before }} --to ${{ github.event.after }})
echo "::set-output name=run_job::false"
prefix="added: "
suffix="(dataset)"
while read -r line; do
echo "changed item: $line"
if [[ "$line" == *"$prefix"* && "$line" == *"$suffix"* ]]; then
subds=${line#"$prefix"}
subds=${subds%"$suffix"}
echo "::set-output name=subds::$subds"
echo "::set-output name=run_job::true"
break
# This currently assumes that there was a single update in the form of
# an added subdataset. TODO: run multiple jobs or loop in case of multiple updates
fi
done <<< "$datalad_diffs"
build:
needs: check
if: needs.check.outputs.run_job == 'true'
runs-on: ubuntu-latest
steps:
- name: Echo some shit
shell: bash
run: |
echo "got to do some shit for: ${{needs.check.outputs.subds}}"
- name: Set up environment
run: |
git config --global user.email "test@github.land"
git config --global user.name "GitHub Almighty"
- name: Install git annex
shell: bash
run: |
bash <(wget -q -O- http://neuro.debian.net/_files/neurodebian-travis.sh)
sudo apt-get update -qq
sudo apt-get install eatmydata
sudo eatmydata apt-get install git-annex-standalone
- name: Set up Python 3.9
uses: actions/setup-python@v1
with:
python-version: 3.9
- name: Checkout main
uses: actions/checkout@v3
with:
ref: main
fetch-depth: 0
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install --upgrade -r requirements.txt
- name: Checkout datalad-catalog
uses: actions/checkout@v3
with:
repository: datalad/datalad-catalog
path: datalad-catalog
- name: Install datalad-catalog
run: |
pip install -e datalad-catalog/
- name: Checkout catalog branch
uses: actions/checkout@v3
with:
ref: catalog
path: catalogbranch
- name: Run workflow for extraction+translation+catalog-update
run: |
# Get subdataset without data
datalad get ${{needs.check.outputs.subds}} --no-data
# Run workflow-update on new subdataset
datalad catalog workflow-update -d . -s ${{needs.check.outputs.subds}} -c catalogbranch
# Commit and push to catalog branch
cd catalogbranch
git add --all
git commit -m "adds new subdataset to catalog"
git push origin catalog