-
Notifications
You must be signed in to change notification settings - Fork 0
/
update_data
executable file
·133 lines (119 loc) · 3.56 KB
/
update_data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env bash
THIS_DIR="$(realpath "$(dirname "${BASH_SOURCE[0]}")")"
cd "${THIS_DIR}" || exit $?
set -o pipefail
source ./.env
havecmd() {
local BINARY ERRMSG
BINARY="${1:?Must provide command to check}"
command -v "${BINARY}" >/dev/null 2>&1 && return 0
ERRMSG="requires '${BINARY}', could not find that on your \$PATH"
[[ -n "$2" ]] && ERRMSG="${ERRMSG}. $2"
printf '%s\n' "${ERRMSG}" 1>&2
return 1
}
set -e
havecmd pipenv
havecmd evry "Install with 'cargo install evry'"
havecmd wait-for-internet "Install with 'cargo install wait-for-internet'"
readonly REPO='https://github.com/seanbreckenridge/mal-id-cache'
readonly REPO_TARGET='./data/mal-id-cache'
readonly HASH_FILE='./data/hash'
readonly ARM_REPO_URL='https://github.com/kawaiioverflow/arm'
ARM_REPO='./data/arm'
# if running local, sync from remote server
[[ -n "$ON_OS" ]] && wait-for-internet --text 'syncing from server...' && ./scripts/sync_from_remote
git_hash_changed() {
local ret git_hash
ret=1
cd "${REPO_TARGET}" || exit $?
git_hash="$(git rev-parse --short HEAD)"
echo "Git hash: ${git_hash}" 1>&2
cd "$THIS_DIR" || exit $?
# file exists
if [[ -f "${HASH_FILE}" ]]; then
# file existed, but hash has changed
if [[ "${git_hash}" != "$(cat "${HASH_FILE}")" ]]; then
echo 'Git hash changed, updating linear history...' >&2
ret=0
echo "${git_hash}" >"${HASH_FILE}"
else
echo 'Hash unchanged, not updating linear history...' >&2
fi
else
# file didn't exist, create it
echo 'Hash not found, creating linear history...' >&2
echo "${git_hash}" >"${HASH_FILE}"
ret=0
fi
return "${ret}"
}
authd_curl() {
echo curl "$@"
set +x
curl --header "Authorization: ${BEARER_SECRET?:No secret set}" "$@"
set -x
}
update_arm() {
[[ -d "${ARM_REPO}" ]] || git clone "${ARM_REPO_URL}" "${ARM_REPO}"
(
cd "${ARM_REPO}" || return $?
git pull
)
}
iter() {
printf '[%s] Updating... \n' "$(date)"
[[ ! -e './data' ]] && mkdir ./data
update_arm || return $?
[[ -e "$REPO_TARGET" ]] || git clone "$REPO" "$REPO_TARGET"
(cd "$REPO_TARGET" && git pull)
if git_hash_changed; then
pipenv run python3 main.py mal linear-history >'./data/data.jsonl' || exit $?
pipenv run python3 main.py mal clean-linear-history || exit $?
fi
set -x
set +e
if ! pipenv run python3 main.py mal check-mal; then
echo 'mal is down, skipping updates...'
return 1
fi
if [[ -z "$SKIP_UPDATE_METADATA" ]]; then
if wait-for-internet --text 'checking for internet....' --timeout 1; then
pipenv run python3 main.py mal update-metadata || return $?
fi
fi
if curl -s localhost:4001 >/dev/null; then
evry 30 minutes -dbsentinel-user-estimate && {
pipenv run python3 main.py mal estimate-user-recent ./usernames.txt --request --timid || return $?
}
evry 6 hours -dbsentinel-delete-estimate && {
pipenv run python3 main.py mal estimate-deleted-animelist-xml --request --timid || return $?
}
elif [[ -n "$VULTR_UPDATE" ]]; then
# manual update sshing as a stopgap
QUERY="$(python3 main.py mal estimate-user-recent ./usernames.txt --print-url)" || return $?
if [[ -n "$QUERY" ]]; then
URL="localhost:4001/api/pages?${QUERY}"
# shellcheck disable=SC2029
ssh vultr "curl -s \"${URL}\""
fi
fi
if authd_curl -s localhost:5200/ping >/dev/null; then
evry 1h -dbsentinel-full-db-update && authd_curl -s localhost:5200/tasks/full_database_update
fi
# TODO: do a force update every 3 months?
# TODO: refresh images every 3 months?
set -e
set +x
}
main() {
iter
# if loop, then run this once a minute
if [[ "$1" == '-l' ]]; then
while true; do
sleep 10m
iter
done
fi
}
main "$@" || exit $?