/
rt-dl
executable file
·89 lines (79 loc) · 3.47 KB
/
rt-dl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/bin/bash
CMD=`basename $0`
show_help()
{
echo "Usage: $CMD <FILM_TITLE>"
}
get_film_rating_from_cached_html()
{
if [ $# -ne 2 ]; then
echo "Fail! -- Expecting 2 argument! ==> $@"
return 1 # non-zero as false
fi
local CRITICS_RATING_VAR=$1
local CACHED_HTML_VAR=$2
local CACHED_HTML=`eval "echo $\`echo $CACHED_HTML_VAR\`"`
if [ -n "`echo $CACHED_HTML | grep 'Search Results for'`" ]; then
echo "Fail! -- Cannot get film rating from search results page"
return 1 # non-zero as false
fi
CRITICS_RATING=`echo $CACHED_HTML | # download HTML using curl
tr -d '\n' | # remove carriage returns
sed 's#</#\n</#g' | # tokenize by HTML tag
grep 'progress-bar [^"]*\" style="width:[0-9]\+' | # locate lines with "ratingValue"
sed 's#^.*progress-bar [^"]*\" style="width:\([0-9]\+\).*$#\1#g' | # capture rating values
head -1` # isolate first instance
eval "$CRITICS_RATING_VAR=$CRITICS_RATING"
}
if [ $# -lt 1 ]; then
echo "Fail! -- Expecting at least 1 argument! ==> $@"
show_help
exit 1
fi
if [ -z "`which curl`" ]; then
echo "Fail! -- Requires \"curl\""
echo "Hint: sudo aptitude install curl"
exit 1
fi
QUERY=$@
QUERY_URL="/m/.*`echo $QUERY | tr '[:upper:]' '[:lower:]' | tr ' ' '_'`.*"
DOMAIN='www.rottentomatoes.com'
URL="http://$DOMAIN/search/?search=`echo $QUERY | sed 's/ /%20/g'`"
echo ""
echo "Extracting ratings URL from.. ==> $URL"
CACHED_HTML=`curl -L -s $URL`
if [ -z "`echo $CACHED_HTML | grep 'Search Results for'`" ]; then
get_film_rating_from_cached_html CRITICS_RATING CACHED_HTML
echo ""
if [ -n "$CRITICS_RATING" ]; then
echo "$CRITICS_RATING% of critics liked it!"
else
echo "n/a"
fi
exit
fi
EXTRACTED_URL=`echo $CACHED_HTML | # download HTML using curl
tr -d '\n' | # remove carriage returns
sed 's#</a> <span class="movie_year"> (\([0-9]*\))</span># (\1)</a>#g' | # include movie year
sed 's#</#\n</#g' | # tokenize by HTML tag
grep "href=\"$QUERY_URL\".*" | # locate lines with "/m/$QUERY_URL"
grep -v 'img src=' | # remove lines with "img src="
sed 's#^.*href=\"\([^"]*\)\">\(.*\)$#\2|\1#g' | # capture film title and ratings URL
tr -d "'" | # remove apostrophes
grep -v "^|.*$" | # skip films with no title
sort | uniq`
echo ""
echo -e "Downloading ratings for:\n`echo \"$EXTRACTED_URL\" | cut -d'|' -f1 | xargs -I@ echo - @`"
echo ""
while read LINE; do
FILM_NAME=`echo $LINE | cut -d'|' -f1`
FILM_URL="http://$DOMAIN`echo $LINE | cut -d'|' -f2`"
echo -n "\"$FILM_NAME\" ==> "
CACHED_HTML=`curl -L -s $FILM_URL`
get_film_rating_from_cached_html CRITICS_RATING CACHED_HTML
if [ -n "$CRITICS_RATING" ]; then
echo "$CRITICS_RATING% of critics liked it!"
else
echo "n/a"
fi
done <<< "$EXTRACTED_URL"