forked from sitespeedio/sitespeed.io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sitespeed.io
executable file
·775 lines (657 loc) · 29.2 KB
/
sitespeed.io
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
#! /bin/bash
#******************************************************
# Sitespeed.io - How speedy is your site? (http://www.sitespeed.io)
#
# Copyright (C) 2013 by Peter Hedenskog (http://www.peterhedenskog.com)
#
#******************************************************
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
#
#*******************************************************
#*******************************************************
# All the options that you can configure when you run
# the script
#*******************************************************
## The URL to crawl
URL=
## The depth of the crawl, default is 1
DEPTH=1
## Crawl only on this path
FOLLOW_PATH=
## Crawl not pages with this in the URL
NOT_IN_URL=
## File containing URL:s to test when not crawling
FILE=
## The number of processes when analyzing, default is five
MAX_PROCESSES=5
## Hold the output format, always HTML, can also be IMG & CSV
OUTPUT_FORMAT=
## The heap size for the Java processes
JAVA_HEAP=1024
## Pointing out the rule properties where summary rules are defined
SUMMARY_PROPERTY_DESKTOP="-Dcom.soulgalore.velocity.sitespeed.rules.file=dependencies/rules-desktop.properties"
SUMMARY_PROPERTY_MOBILE="-Dcom.soulgalore.velocity.sitespeed.rules.file=dependencies/rules-mobile.properties"
# The default one is desktop, if you choose mobile rules, then you will have the mobile version
SUMMARY_PROPERTY=$SUMMARY_PROPERTY_DESKTOP
## Where to put the result files
REPORT_BASE_DIR=sitespeed-result
## The host name if proxy is used
PROXY_HOST=
## The type of proxy
PROXY_TYPE=http
## The viewport of the browser, default is 1280*800
VIEWPORT=1280x800
## The name of the analyze
TEST_NAME=
## The columns showed in the table on the detailed summary page
PAGES_COLUMNS=
## The default user agent
USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36"
## The YSlow file to use
YSLOW_FILE=dependencies/yslow-3.1.5-sitespeed.js
## The desktop ruleset
RULESET=sitespeed.io-2.0-desktop
RULESET_MOBILE=sitespeed.io-2.0-mobile
## Maximum pages to test
MAX_PAGES=999999
## Do we have any urls hat doesn't return 2XX?
HAS_ERROR_URLS=false
## Max length of a filename created by the url
MAX_FILENAME_LENGTH=245
## Take screenshot of every page, default is false
SCREENSHOT=false
## By default browser timings isn't collected
COLLECT_BROWSER_TIMINGS=false
## The default setup: Use firefox & do it three times per URL
BROWSER_TIME_PARAMS="-b firefox -n 3"
## Easy way to set your user agent as an Iphone
IPHONE_IO6_AGENT="Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25"
IPHONE5_VIEWPORT="320x444"
## Easy way to set your user agent as an Ipad
IPAD_IO6_AGENT="Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25"
IPAD_VIEWPORT="768x1024"
## Nexus
NEXUS_4_AGENT="Mozilla/5.0 (Linux; Android 4.2; Nexus 4 Build/JVP15Q) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"
NEXUS_VIEWPORT="348x519"
# Jar files, specify the versions
CRAWLER_JAR=crawler-1.5.6-full.jar
VELOCITY_JAR=xml-velocity-1.8.1-full.jar
HTMLCOMPRESSOR_JAR=htmlcompressor-1.5.3.jar
BROWSERTIME_JAR=browsertime-0.1-full.jar
#*******************************************************
# Main program
#
#*******************************************************
main() {
verify_environment
get_input "$@"
verify_input
setup_dirs_and_dependencies
fetch_urls
analyze_pages
collect_browser_time
copy_assets
generate_error_file
generate_result_files
finished
}
#*******************************************************
# Check that we have what is needed to run
# Will check for PhantomJS, cURL and right Java version
#*******************************************************
function verify_environment {
command -v phantomjs >/dev/null 2>&1 || { echo >&2 "Missing phantomjs, please install it to be able to run sitespeed.io"; exit 1; }
command -v curl >/dev/null 2>&1 || { echo >&2 "Missing curl, please install it to be able to run sitespeed.io"; exit 1; }
# Respect JAVA_HOME if set
if [[ -n "$JAVA_HOME" ]] && [[ -x "$JAVA_HOME/bin/java" ]]
then
JAVA="$JAVA_HOME/bin/java"
else
JAVA="java"
fi
if [[ "$JAVA" ]]; then
jVersion=$("$JAVA" -version 2>&1 | awk -F '"' '/version/ {print $2}')
if [[ "$jVersion" < "1.6" ]]; then
echo "Java version is less than 1.6 which is too old, you will need at least Java 1.6 to run sitespeed.io"; exit 1;
fi
fi
}
#*******************************************************
# Fetch the input from the user
#*******************************************************
function get_input {
# Set options
while getopts “hu:d:f:s:o:m:b:n:p:r:z:x:g:t:a:v:y:l:c:j:e:i:q:k:” OPTION
do
case $OPTION in
h)
help
exit 1
;;
u)URL=$OPTARG;;
d)DEPTH=$OPTARG;;
q)FOLLOW_PATH=$OPTARG;;
s)NOT_IN_URL=$OPTARG;;
o)OUTPUT_FORMAT=$OPTARG;;
m)JAVA_HEAP=$OPTARG;;
n)TEST_NAME=$OPTARG;;
p)MAX_PROCESSES=$OPTARG;;
r)REPORT_BASE_DIR=$OPTARG;;
z)BROWSER_TIME_PARAMS=$OPTARG;;
x)PROXY_HOST=$OPTARG;;
t)PROXY_TYPE=$OPTARG;;
a)USER_AGENT=$OPTARG;;
v)VIEWPORT=$OPTARG;;
y)YSLOW_FILE=$OPTARG;;
l)RULESET=$OPTARG;;
f)FILE=$OPTARG;;
g)PAGES_COLUMNS=$OPTARG;;
b)SUMMARY_BOXES=$OPTARG;;
j)MAX_PAGES=$OPTARG;;
k)SCREENSHOT=$OPTARG;;
c)COLLECT_BROWSER_TIMINGS=$OPTARG;;
# Note: The e & i are uses in the script that analyzes multiple sites
e);;
i);;
?)
help
exit
;;
esac
done
}
#*******************************************************
# Verify that all options needed exists & set default
# values for missing ones
#*******************************************************
function verify_input {
if [[ -z $URL ]] && [[ -z $FILE ]]
then
help
exit 1
fi
if [ "$URL" != "" ] && [ "$FILE" != "" ]
then
echo 'You must either choose supply a start url for the crawl or supply a file with the url:s, not both at the same time'
help
exit 1
fi
if [ "$FOLLOW_PATH" != "" ]
then
FOLLOW_PATH="-p $FOLLOW_PATH"
else
FOLLOW_PATH=""
fi
if [ "$NOT_IN_URL" != "" ]
then
NOT_IN_URL="-np $NOT_IN_URL"
else
NOT_IN_URL=""
fi
TAKE_SCREENSHOTS=$SCREENSHOT
SCREENSHOT="-Dcom.soulgalore.velocity.key.showscreenshots=$SCREENSHOT"
if [[ "$OUTPUT_FORMAT" == *csv* ]]
then
OUTPUT_CSV=true
else
OUTPUT_CSV=false
fi
if [ "$TEST_NAME" != "" ]
then
TEST_NAME="-Dcom.soulgalore.velocity.key.testname=$TEST_NAME"
else
TEST_NAME="-Dcom.soulgalore.velocity.key.testname= "
fi
## url & ruleScore are always existing (showed on a phone)
if [ "$PAGES_COLUMNS" != "" ]
then
PAGES_COLUMNS="-Dcom.soulgalore.velocity.key.columns=url,$PAGES_COLUMNS,ruleScore"
else
# Default colums
PAGES_COLUMNS="-Dcom.soulgalore.velocity.key.columns=url,jsPerPage,cssPerPage,imagesPerPage,cssImagesPerPage,requests,requestsWithoutExpires,docWeight,pageWeight,browserScaledImages,criticalPathScore,spof,jsSyncInHead"
if $COLLECT_BROWSER_TIMINGS
then
PAGES_COLUMNS="-Dcom.soulgalore.velocity.key.columns=url,jsPerPage,cssPerPage,imagesPerPage,requests,requestsWithoutExpires,pageWeight,browserScaledImages,criticalPathScore,serverResponseTime,domContentLoadedTime"
fi
PAGES_COLUMNS="$PAGES_COLUMNS",ruleScore
fi
if [ "$SUMMARY_BOXES" != "" ]
then
SUMMARY_BOXES="-Dcom.soulgalore.velocity.key.boxes=$SUMMARY_BOXES"
else
# Default columns
SUMMARY_BOXES="-Dcom.soulgalore.velocity.key.boxes=ruleScore,criticalPathScore,jsSyncInHead,jsPerPage,cssPerPage,cssImagesPerPage,imagesPerPage,requests,requestsWithoutExpires,pageWeight,docWeight,imageWeightPerPage,browserScaledImages,spofPerPage,domainsPerPage,domElements,assetsCacheTime,timeSinceLastModification"
if $COLLECT_BROWSER_TIMINGS
then
SUMMARY_BOXES="$SUMMARY_BOXES",redirectionTime,serverResponseTime,pageDownloadTime,domInteractiveTime,domContentLoadedTime,pageLoadTime
fi
fi
if [ "$PROXY_HOST" != "" ]
then
PROXY_PHANTOMJS="--proxy=$PROXY_HOST --proxy-type=$PROXY_TYPE"
PROXY_CRAWLER="-Dcom.soulgalore.crawler.proxy=$PROXY_TYPE":"$PROXY_HOST"
fi
if [[ "$USER_AGENT" == "iphone" ]]
then
USER_AGENT="$IPHONE_IO6_AGENT"
VIEWPORT=$IPHONE5_VIEWPORT
SUMMARY_PROPERTY=$SUMMARY_PROPERTY_MOBILE
RULESET=$RULESET_MOBILE
elif [[ "$USER_AGENT" == "nexus" ]]
then
USER_AGENT="$NEXUS_4_AGENT"
VIEWPORT=$NEXUS_VIEWPORT
SUMMARY_PROPERTY=$SUMMARY_PROPERTY_MOBILE
RULESET=$RULESET_MOBILE
elif [[ "$USER_AGENT" == "ipad" ]]
then
USER_AGENT="$IPAD_IO6_AGENT"
VIEWPORT=$IPAD_VIEWPORT
fi
if [ "$USER_AGENT" != "" ]
then
USER_AGENT_YSLOW="$USER_AGENT"
USER_AGENT_CRAWLER="User-Agent:$USER_AGENT"
USER_AGENT_CURL="-A $USER_AGENT"
fi
if [ "$VIEWPORT" != "" ]
then
VIEWPORT_YSLOW="-vp $VIEWPORT"
fi
if [[ "$RULESET" == *mobile* ]]
then
SUMMARY_PROPERTY=$SUMMARY_PROPERTY_MOBILE
fi
}
#*******************************************************
# Setup the dirs needed and set versions needed for
# doing the analyze
#*******************************************************
function setup_dirs_and_dependencies {
# Switch to my dir
cd "$(dirname ${BASH_SOURCE[0]})"
local now=$(date +"%Y-%m-%d-%H-%M-%S")
DATE=$(date)
if [[ -z $FILE ]]
then
echo "Will crawl from start point $URL with User-Agent $USER_AGENT and viewport $VIEWPORT with crawl depth $DEPTH using ruleset $RULESET ... this can take a while"
else
echo "Will fetch urls from the file $FILE with User-Agent $USER_AGENT and viewport $VIEWPORT using ruleset $RULESET ... this can take a while"
fi
# remove the protocol
local noprotocol=${URL#*//}
HOST=${noprotocol%%/*}
# Setup dirs
DEPENDENCIES_DIR="dependencies"
REPORT_DIR_NAME=$HOST/$now
REPORT_DIR=$REPORT_BASE_DIR/$REPORT_DIR_NAME
REPORT_DATA_DIR=$REPORT_DIR/data
REPORT_DATA_HAR_DIR=$REPORT_DATA_DIR/har
REPORT_PAGES_DIR=$REPORT_DIR/pages
REPORT_DATA_PAGES_DIR=$REPORT_DATA_DIR/pages
REPORT_IMAGE_PAGES_DIR=$REPORT_DIR/screenshots
REPORT_DATA_METRICS_DIR=$REPORT_DATA_DIR/metrics
VELOCITY_DIR=report/velocity
PROPERTIES_DIR=report/properties
mkdir -p $REPORT_DIR || exit 1
mkdir $REPORT_DATA_DIR || exit 1
mkdir $REPORT_PAGES_DIR || exit 1
mkdir $REPORT_DATA_PAGES_DIR || exit 1
mkdir $REPORT_DATA_HAR_DIR || exit 1
mkdir $REPORT_DATA_METRICS_DIR || exit 1
MY_IP=$(curl -L -s http://api.exip.org/?call=ip)
if [ -z "$MY_IP" ]
then
MY_IP='unknown'
fi
# Logging versions
browserTimeVersion=$("$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m -jar $DEPENDENCIES_DIR/$BROWSERTIME_JAR -V)
echo "Using PhantomJS version $(phantomjs --version)"
echo "Using Java version $jVersion"
echo "Using BrowserTime version $browserTimeVersion"
echo "From IP $MY_IP"
}
#*******************************************************
# Fetch the urls, either by crawling or from file
#*******************************************************
function fetch_urls {
if [[ -z $FILE ]]
then
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m -Dcom.soulgalore.crawler.propertydir=$DEPENDENCIES_DIR/ $PROXY_CRAWLER -cp $DEPENDENCIES_DIR/$CRAWLER_JAR com.soulgalore.crawler.run.CrawlToFile -u $URL -l $DEPTH $FOLLOW_PATH $NOT_IN_URL -rh "\"$USER_AGENT_CRAWLER\"" -f $REPORT_DATA_DIR/urls.txt -ef $REPORT_DATA_DIR/errorurls.txt
else
cp $FILE $REPORT_DATA_DIR/urls.txt
fi
if [ ! -e $REPORT_DATA_DIR/urls.txt ];
then
echo "No url:s were fetched"
exit 0
fi
# read the urls
URLS=()
while read txt ; do
URLS[${#URLS[@]}]=$txt
done < $REPORT_DATA_DIR/urls.txt
## If we have a max size of URL:s to test, only use the first MAX_PAGES
NR_OF_URLS=${#URLS[@]}
if [ "$NR_OF_URLS" -gt "$MAX_PAGES" ]
then
for (( c=$MAX_PAGES; c<=$NR_OF_URLS; c++ ))
do
unset URLS[$c]
done
fi
## If we have error URLs make sure they are added to the menu
if [ -e $REPORT_DATA_DIR/errorurls.txt ];
then
HAS_ERROR_URLS=true
fi
SHOW_ERROR_URLS="-Dcom.soulgalore.velocity.key.showserrorurls=$HAS_ERROR_URLS"
}
#*******************************************************
# Analyze the pages
#*******************************************************
function analyze_pages {
echo "Will analyze ${#URLS[@]} pages"
# Setup start parameters, 0 jobs are running and the first file name
local jobs=0
local runs=0
for url in "${URLS[@]}"
do analyze "$url" "$runs" &
local jobs=$[$jobs+1]
local runs=$[$runs+1]
if [ $(($runs%20)) == 0 ]; then
echo "Analyzed $runs pages out of ${#URLS[@]}"
fi
if [ "$jobs" -ge "$MAX_PROCESSES" ]
then
wait
JOBS=0
fi
done
# make sure all processes has finished
wait
}
#*******************************************************
# Copy all assets used for creating the HTML files
#*******************************************************
function copy_assets {
#copy the rest of the files
mkdir $REPORT_DIR/css
mkdir $REPORT_DIR/js
mkdir $REPORT_DIR/img
mkdir $REPORT_DIR/img/ico
mkdir $REPORT_DIR/fonts
cat "$BASE_DIR"report/css/bootstrap.min.css > $REPORT_DIR/css/styles.css
cat "$BASE_DIR"report/js/jquery-1.10.2.min.js report/js/bootstrap.min.js report/js/stupidtable.min.js > $REPORT_DIR/js/all.js
cp "$BASE_DIR"report/img/*.* $REPORT_DIR/img
cp "$BASE_DIR"report/img/ico/* $REPORT_DIR/img/ico
cp "$BASE_DIR"report/fonts/* $REPORT_DIR/fonts
}
#*******************************************************
# Generate result output files
#*******************************************************
function generate_result_files {
echo "Create all the result pages"
local runs=0
for url in "${URLS[@]}"
do
local pagefilename=$(get_filename $url $runs)
EXTRA=
if $COLLECT_BROWSER_TIMINGS
then
EXTRA=",$REPORT_DATA_METRICS_DIR/$pagefilename.xml"
fi
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m "$SCREENSHOT" "$SHOW_ERROR_URLS" -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_PAGES_DIR/$pagefilename.xml$EXTRA $VELOCITY_DIR/page.vm $PROPERTIES_DIR/page.properties $REPORT_PAGES_DIR/$pagefilename.html || exit 1
"$JAVA" -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type html --compress-css --compress-js -o $REPORT_PAGES_DIR/$pagefilename.html $REPORT_PAGES_DIR/$pagefilename.html
done
echo "Create result.xml"
echo '<?xml version="1.0" encoding="UTF-8"?><document host="'$HOST'" date="'$DATE'" useragent="'$USER_AGENT'" viewport="'$VIEWPORT'" ip="'$MY_IP'" path="'$REPORT_DIR_NAME'"><url><![CDATA['$URL']]></url>' > $REPORT_DATA_DIR/result.xml
for file in $REPORT_DATA_PAGES_DIR/*
do
# Hack for removing dictonaries in the result file
sed 's@<dictionary>.*@</results>@' "$file" > $REPORT_DATA_DIR/tmp.txt || exit 1
sed 's/<?xml version="1.0" encoding="UTF-8"?>//g' "$REPORT_DATA_DIR/tmp.txt" >> "$REPORT_DATA_DIR/result.xml" || exit 1
rm "$REPORT_DATA_DIR/tmp.txt"
done
# Add all metrics
if $COLLECT_BROWSER_TIMINGS
then
local runs=0
echo '<metrics>' >> "$REPORT_DATA_DIR/result.xml"
for url in "${URLS[@]}"
do
local pagefilename=$(get_filename $url $runs)
sed 's/<?xml version="1.0" encoding="UTF-8" standalone="yes"?>//g' "$REPORT_DATA_METRICS_DIR/$pagefilename.xml" > "$REPORT_DATA_METRICS_DIR/tmp.xml" || exit 1
cat "$REPORT_DATA_METRICS_DIR/tmp.xml" >> "$REPORT_DATA_DIR/result.xml"
rm "$REPORT_DATA_METRICS_DIR/tmp.xml"
done
echo '</metrics>' >> "$REPORT_DATA_DIR/result.xml"
fi
echo '</document>'>> "$REPORT_DATA_DIR/result.xml"
echo 'Create the summary.xml'
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m $SUMMARY_PROPERTY -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_DIR/result.xml $VELOCITY_DIR/xml/site.summary.xml.vm $PROPERTIES_DIR/site.summary.properties $REPORT_DATA_DIR/summary.xml.tmp || exit 1
# Velocity adds a lot of garbage spaces and new lines, need to be removed before the xml is cleaned up
# because of performance reasons
echo '<?xml version="1.0" encoding="UTF-8"?>' > $REPORT_DATA_DIR/summary.xml
sed '1,/xml/d' $REPORT_DATA_DIR/summary.xml.tmp >> $REPORT_DATA_DIR/summary.xml
rm $REPORT_DATA_DIR/summary.xml.tmp
"$JAVA" -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type xml -o $REPORT_DATA_DIR/summary.xml $REPORT_DATA_DIR/summary.xml
echo 'Create the summary.details.html'
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m "$TEST_NAME" "$SCREENSHOT" "$SHOW_ERROR_URLS" -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_DIR/summary.xml $VELOCITY_DIR/detailed.site.summary.vm $PROPERTIES_DIR/summary.details.properties $REPORT_DIR/summary.details.html || exit 1
"$JAVA" -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type html --compress-css --compress-js -o $REPORT_DIR/summary.details.html $REPORT_DIR/summary.details.html
echo 'Create the pages.html'
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m "$TEST_NAME" "$PAGES_COLUMNS" "$SCREENSHOT" "$SHOW_ERROR_URLS" $SUMMARY_PROPERTY -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_DIR/result.xml $VELOCITY_DIR/pages.vm $PROPERTIES_DIR/pages.properties $REPORT_DIR/pages.html || exit 1
"$JAVA" -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type html --compress-css --compress-js -o $REPORT_DIR/pages.html $REPORT_DIR/pages.html
if $OUTPUT_CSV
then
echo 'Create the pages.csv'
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m "$PAGES_COLUMNS" $SUMMARY_PROPERTY -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_DIR/result.xml $VELOCITY_DIR/csv/pages.csv.vm $PROPERTIES_DIR/pages.properties $REPORT_DIR/pages.csv || exit 1
fi
echo 'Create the summary index.html'
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m "$TEST_NAME" "$SUMMARY_BOXES" "$SCREENSHOT" "$SHOW_ERROR_URLS" $SUMMARY_PROPERTY -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_DIR/summary.xml $VELOCITY_DIR/site.summary.vm $PROPERTIES_DIR/site.summary.properties $REPORT_DIR/index.html || exit 1
"$JAVA" -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type html --compress-css --compress-js -o $REPORT_DIR/index.html $REPORT_DIR/index.html
echo 'Create the assets.html'
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m "$TEST_NAME" "$SCREENSHOT" "$SHOW_ERROR_URLS" -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_DIR/result.xml $VELOCITY_DIR/assets.vm $PROPERTIES_DIR/assets.properties $REPORT_DIR/assets.html || exit 1
"$JAVA" -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type html --compress-css --compress-js -o $REPORT_DIR/assets.html $REPORT_DIR/assets.html
echo 'Create the rules.html'
## hack for just getting one file with the rules, take the first one in the dir!
FILE_WITH_RULES=$(ls $REPORT_DATA_PAGES_DIR | head -n 1)
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m "$TEST_NAME" "$SCREENSHOT" "$SHOW_ERROR_URLS" -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_PAGES_DIR/$FILE_WITH_RULES $VELOCITY_DIR/rules.vm $PROPERTIES_DIR/rules.properties $REPORT_DIR/rules.html || exit 1
"$JAVA" -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type html --compress-css --compress-js -o $REPORT_DIR/rules.html $REPORT_DIR/rules.html
if $TAKE_SCREENSHOTS
then
take_screenshots
fi
}
#*******************************************************
# Make a clean exit
#
#*******************************************************
function finished {
echo "Finished analyzing $HOST"
exit 0
}
#*******************************************************
# Create page that show URLs that returned error
#
#*******************************************************
function generate_error_file {
# take care of error urls
if [ -e $REPORT_DATA_DIR/errorurls.txt ];
then
local resultError=()
while read txt ; do
resultError[${#resultError[@]}]=$txt
done < $REPORT_DATA_DIR/errorurls.txt
echo '<?xml version="1.0" encoding="UTF-8"?><results>' > $REPORT_DATA_DIR/errorurls.xml
for url in "${resultError[@]}"
do echo "<url reason='${url/,*/}'><![CDATA[${url/*,/}]]></url>" >> $REPORT_DATA_DIR/errorurls.xml
done
echo '</results>' >> $REPORT_DATA_DIR/errorurls.xml
echo 'Create the errorurls.html'
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m "$TEST_NAME" "$SCREENSHOT" "$SHOW_ERROR_URLS" -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_DIR/errorurls.xml $VELOCITY_DIR/errorurls.vm $PROPERTIES_DIR/errorurls.properties $REPORT_DIR/errorurls.html || exit 1
"$JAVA" -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type html --compress-css --compress-js -o $REPORT_DIR/errorurls.html $REPORT_DIR/errorurls.html
else
# create an empty xml file
echo '<?xml version="1.0" encoding="UTF-8"?><results></results>' > $REPORT_DATA_DIR/errorurls.xml
fi
}
#*******************************************************
# Create screenshots of the pages
#
#*******************************************************
function take_screenshots() {
echo 'Create all png:s'
mkdir $REPORT_IMAGE_PAGES_DIR
local width=$(echo $VIEWPORT | cut -d'x' -f1)
local height=$(echo $VIEWPORT | cut -d'x' -f2)
local urls=
local imagenames=
## If pngcrush exist, use it to crush the images
command -v pngcrush >/dev/null && PNGCRUSH_EXIST=true || PNGCRUSH_EXIST=false
local runs=0
for url in "${URLS[@]}"
do
local imagefilename=$(get_filename $url $runs)
echo "Creating screenshot for $url $REPORT_IMAGE_PAGES_DIR/$imagefilename.png "
phantomjs $PROXY_PHANTOMJS $DEPENDENCIES_DIR/screenshot.js "$url" "$REPORT_IMAGE_PAGES_DIR/$imagefilename.png" $width $height "$USER_AGENT" true > /dev/null 2>&1
if $PNGCRUSH_EXIST
then
pngcrush -q $REPORT_IMAGE_PAGES_DIR/$imagefilename.png $REPORT_IMAGE_PAGES_DIR/$imagefilename-c.png
mv $REPORT_IMAGE_PAGES_DIR/$imagefilename-c.png $REPORT_IMAGE_PAGES_DIR/$imagefilename.png
fi
local urls+="$url"
local urls+="@"
local imagenames+="$imagefilename"
local imagenames+="@"
local runs=$[$runs+1]
done
local vp="-Dcom.soulgalore.velocity.key.viewport=$VIEWPORT"
local url_list="-Dcom.soulgalore.velocity.key.urls=$urls"
local image_list="-Dcom.soulgalore.velocity.key.images=$imagenames"
echo 'Create the screenshots.html'
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m "$TEST_NAME" "$vp" "$url_list" "$image_list" "$SCREENSHOT" "$SHOW_ERROR_URLS" -jar $DEPENDENCIES_DIR/$VELOCITY_JAR $REPORT_DATA_DIR/summary.xml $VELOCITY_DIR/screenshots.vm $PROPERTIES_DIR/screenshots.properties $REPORT_DIR/screenshots.html || exit 1
"$JAVA" -jar $DEPENDENCIES_DIR/$HTMLCOMPRESSOR_JAR --type html --compress-css --compress-js -o $REPORT_DIR/screenshots.html $REPORT_DIR/screenshots.html
}
#*******************************************************
# Help function, call it to print all different usages.
#
#*******************************************************
function help() {
cat << EOF
usage: $0 options
Sitespeed.io is a tool that helps you analyze your website performance and show you what you should optimize, more info at http://www.sitespeed.io
OPTIONS:
-h Help
-u The start URL of the crawl: http[s]://host[:port][/path/]. Use this or use the -f file option.
-f The path to a plain text file with one URL on each row.
-d The crawl depth, default is 1 (one page and all links pointing to the same domain on that page) [optional]
-q Crawl URLs only URLs that contains this keyword in the path [optional]
-s Skip URLs that contains this keyword in the path [optional]
-p The number of processes that will analyze pages, default is 5 [optional]
-m The memory heap size for the Java applications, default is 1024 Mb [optional]
-n Give your test a name, it will be added to all HTML pages [optional]
-o The output format, always output as HTML and you can also output a CSV file for the detailed site summary page (csv) [optional]
-r The result base directory, default is sitespeed-result [optional]
-x The proxy host & protocol: proxy.soulgalore.com:80 [optional]
-t The proxy type, default is http [optional]
-a The full User Agent string, default is Chrome for MacOSX. You can also set the value as iphone or ipad (will automagically change the viewport) [optional]
-v The view port, the page viewport size WidthxHeight, like 400x300, default is 1280x800 [optional]
-y The compiled YSlow file, default is dependencies/yslow-3.1.5-sitespeed.js [optional]
-l Which ruleset to use, default is the latest sitespeed.io version for desktop [optional]
-g The columns showed on detailed page summary table, see http://www.sitespeed.io/documentation/#config-columns for more info [optional]
-b The boxes showed on site summary page, see http://www.sitespeed.io/documentation/#config-boxes for more info [optional]
-j The max number of pages to test [optional]
-k Take screenshots for each page (using the configured view port). Default is false. (true|false) [optional]
-c Collect BrowserTimings data (meaning open a real browser & fetch timings). Default is false. (true|false) [optional]
-z String sent to BrowserTime, so you can choose browser and tries. Default is "-b firefox -n 3".
EOF
}
#*******************************************************
# Analyze function, call it to analyze a page
# $1 the url to analyze
# $2 runs
#*******************************************************
function analyze() {
# setup the parameters, same names maybe makes it easier
local url=$1
local runs=$2
local pagefilename=$(get_filename $1 $2)
echo "Analyzing $url"
phantomjs $PROXY_PHANTOMJS $YSLOW_FILE -d -r $RULESET -f xml --ua "$USER_AGENT_YSLOW" $VIEWPORT_YSLOW -n "$pagefilename.har" "$url" >"$REPORT_DATA_PAGES_DIR/$pagefilename.xml" || exit 1
#move the HAR-file to the HAR dir
mv "$pagefilename.har" $REPORT_DATA_HAR_DIR/
local s=$(du -k "$REPORT_DATA_PAGES_DIR/$pagefilename.xml" | cut -f1)
# Check that the size is bigger than 0
if [ $s -lt 10 ]
then
echo "Could not analyze $url unrecoverable error when parsing the page:"
## do the same thing again but setting console to log the error to output
phantomjs $PROXY_PHANTOMJS $YSLOW_FILE -d -r $RULESET -f xml "$USER_AGENT_YSLOW" $VIEWPORT_YSLOW "$url" -c 2
## write the error url to the list
echo "sitespeed.io got an unrecoverable error when parsing the page,$url" >> $REPORT_DATA_DIR/errorurls.txt
fi
# Sometimes the yslow script adds output before the xml tag, should probably be reported ...
sed '/<?xml/,$!d' $REPORT_DATA_PAGES_DIR/$pagefilename.xml > $REPORT_DATA_PAGES_DIR/$pagefilename-bup || exit 1
# And crazy enough, sometimes we get things after the end of the xml
sed -n '1,/<\/results>/p' $REPORT_DATA_PAGES_DIR/$pagefilename-bup > $REPORT_DATA_PAGES_DIR/$pagefilename.xml || exit 1
# page size (keeping getting TTFB for a while, it is now primaly fetched from PhantomJS)
curl "$USER_AGENT_CURL" --compressed -o /dev/null -w "%{time_starttransfer};%{size_download}\n" -L -s "$url" > "$REPORT_DATA_PAGES_DIR/$pagefilename.info"
read -r TTFB_SIZE < $REPORT_DATA_PAGES_DIR/$pagefilename.info
local TTFB="$(echo $TTFB_SIZE | cut -d \; -f 1)"
local SIZE="$(echo $TTFB_SIZE | cut -d \; -f 2)"
local TTFB="$(printf "%.3f" $TTFB)"
rm "$REPORT_DATA_PAGES_DIR/$pagefilename.info"
# Hack for adding link and other data to the xml file
XML_URL=$(echo "$url" | sed 's/&/\\&/g')
sed 's{<results>{<results filename="'$pagefilename'" size="'$SIZE'"><curl><![CDATA['"$XML_URL"']]></curl>{' $REPORT_DATA_PAGES_DIR/$pagefilename.xml > $REPORT_DATA_PAGES_DIR/$pagefilename-bup || exit 1
mv $REPORT_DATA_PAGES_DIR/$pagefilename-bup $REPORT_DATA_PAGES_DIR/$pagefilename.xml
}
#*******************************************************
# Get different browser timings
#*******************************************************
function collect_browser_time {
if $COLLECT_BROWSER_TIMINGS
then
local runs=0
for url in "${URLS[@]}"
do
local pagefilename=$(get_filename $url $runs)
"$JAVA" -Xmx"$JAVA_HEAP"m -Xms"$JAVA_HEAP"m -jar $DEPENDENCIES_DIR/$BROWSERTIME_JAR $BROWSER_TIME_PARAMS -o "$REPORT_DATA_METRICS_DIR/$pagefilename.xml" -ua "\"$USER_AGENT\"" -w $VIEWPORT "$url"
local runs=$[$runs+1]
done
fi
}
#*******************************************************
# Generate a filename from a URL
# $1 the url
# $2 a unique number that is used if the url is too long
#*******************************************************
function get_filename() {
local url=$1
local unique=$2
local pagefilename=$(echo ${url#*//})
## Hack for fixing when we have the same url as both http & https
if [[ $url == https* ]]
then
local pagefilename=s-$pagefilename
fi
local pagefilename=$(echo ${pagefilename//[^a-zA-Z0-9]/'-'})
# take care of too long names
if [ ${#pagefilename} -gt $MAX_FILENAME_LENGTH ]
then
local pagefilename=$(echo $pagefilename | cut -c1-$MAX_FILENAME_LENGTH)
local pagefilename="$pagefilename$unique"
fi
echo $pagefilename
}
# launch
main "$@"