-
Notifications
You must be signed in to change notification settings - Fork 1
/
html2json.sh
58 lines (43 loc) · 1.68 KB
/
html2json.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/bash
BASEDIR=$(dirname $0)
. "$BASEDIR/_variables.sh"
targetDir="$BASEDIR/data/data"
mkdir -p "$targetDir"
for runtime in ${runtimes[*]}; do
outfilename="$targetDir/dotnet-$runtime.json"
json="[]"
filenames=`ls $BASEDIR/data/download/*.html | grep -E "$runtime-[0-9]+"`
for filename in ${filenames[*]}; do
echo "Parse $filename ..."
title=$(grep 'property="og:title"' $filename | grep -Eo 'content="(.*)"')
#echo $title
if [[ $title == *"404"* ]]; then
echo "404 in the title, file will be skiped"
continue
fi
version=$(echo $filename | grep -Eo '[0-9]+.[0-9]+.[0-9]+')
#echo $version
platform=$(echo $filename | grep -Eo '(arm64|x86|x64)')
#echo $platform
os=$(echo $filename | grep -Eo '(windows|macos)')
#echo $os
link=$(grep 'id="directLink"' $filename | grep -Eo 'href="(\S*)"' | sed 's#href="##g' | sed 's#"##g')
#echo $link
hash=$(grep 'id="checksum"' $filename | grep -Eo 'value="(\S*)"' | sed 's#value="##g' | sed 's#"##g')
#echo $hash
bytes=$(curl -sI "$link" | grep -i "content-length" | sed 's#content-length: ##g' | sed 's#\r##g')
#echo $bytes
obj=$(jq -n \
--arg v "$version" \
--arg p "$platform" \
--arg o "$os" \
--arg l "$link" \
--arg h "$hash" \
--arg s "$bytes" \
'{ version: $v, platform: $p, os: $o, link: $l, hash: $h, bytes: $s }')
# echo $obj
json=$((echo $json) | jq ". += [$obj]")
done
(echo $json | jq -r) > $outfilename
echo "$outfilename written sucessfully."
done