Permalink
Browse files

visionlabart.com: update

  • Loading branch information...
lexszero committed Jan 21, 2016
1 parent 23686b4 commit b02baab281655c18e86480bb1758cf7c1b68a47a
Showing with 19 additions and 16 deletions.
  1. +19 −16 visionlabart.com/visionlabart_scrape
@@ -1,21 +1,24 @@
#!/bin/bash
baseurl="http://visionlabart.com/"
baseurl="http://visionlabart.com"
page=1
while :; do
empty=1
pgurls=$(
curl -s "${baseurl}collections/types?q=Fine%20Art&page=$page" |
sed -rn 's#.*href="/(products[^"]*)".*#\1#p'
collections=$(
curl -s "${baseurl}/pages/all-artists" |
sed -rn 's#.*href="/collections/([^"]*-art)".*#\1#p'
)
for coll in $collections; do
mkdir $coll
products=$(
curl -s "${baseurl}/collections/${coll}?view=all" |
sed -rn 's#.*href="/collections/.*-art/products/([^"]*)".*#\1#p' |
sort -u
)
for pgurl in $pgurls; do
empty=0
echo $pgurl
url=$(curl -s "${baseurl}/${pgurl}" |
grep '<a.*id="mainImage"' |
sed -rne 's#.*href="([^?"]*)[?"].*#http:\1#p')
wget "$url"
for prod in $products; do
url=$(
curl -s "${baseurl}/collections/${coll}/products/${prod}" |
sed -rn 's#.*data-zoom-image="([^"]*)".*#https:\1#p'
)
echo "$coll/$prod"
wget -q "$url" -O "$coll/${prod}.jpg"
done
((page++))
[[ "$empty" == "1" ]] && break
done

0 comments on commit b02baab

Please sign in to comment.