Skip to content
This repository has been archived by the owner on Apr 21, 2023. It is now read-only.

Commit

Permalink
Decompress gzip'd content in InPlaceResourceRecorder; that's needed for
Browse files Browse the repository at this point in the history
reverse proxies. Because of the weird filter ordering in Apache
we actually end up looking at headers twice: first to see if it was gzip'd
by a reverse proxy, and next for complete headers (while in nginx we just
get the full headers the first time).

Addresses issue 896.
  • Loading branch information
morlovich committed Mar 7, 2014
1 parent 8ac32f3 commit 051b2f9
Show file tree
Hide file tree
Showing 8 changed files with 295 additions and 43 deletions.
48 changes: 39 additions & 9 deletions src/install/debug.conf.template
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,6 @@ ModPagespeedEnableFilters add_instrumentation
Header always set X-TestHeader "hello"
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/ipro/instant/wait/" >
ModPagespeedInPlaceWaitForOptimized on
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/ipro/instant/deadline/" >
ModPagespeedInPlaceRewriteDeadlineMs -1
</Directory>

ModPagespeedLoadFromFile "http://@@APACHE_DOMAIN@@/mod_pagespeed_test/ipro/instant/" \
"@@APACHE_DOC_ROOT@@/mod_pagespeed_test/ipro/instant/"

Expand All @@ -184,6 +176,10 @@ ModPagespeedLoadFromFile "http://@@APACHE_DOMAIN@@/mod_pagespeed_test/ipro/insta
ModPagespeedInPlaceWaitForOptimized on
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/ipro/mod_deflate/" >
AddOutputFilterByType DEFLATE text/css
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/vary/" >
ModPagespeedRespectVary on

Expand Down Expand Up @@ -409,7 +405,7 @@ ModPagespeedCompressMetadataCache true
# different depending on whether we are running system tests as root
# or as a normal user. Note that fetches must be done with
# http_proxy=SECONDARY_HOST:SECONDARY_PORT.
Listen @@APACHE_SECONDARY_PORT@@
Listen localhost:@@APACHE_SECONDARY_PORT@@
NameVirtualHost localhost:@@APACHE_SECONDARY_PORT@@
<VirtualHost localhost:@@APACHE_SECONDARY_PORT@@>
ServerName secondary.example.com
Expand Down Expand Up @@ -682,6 +678,40 @@ NameVirtualHost localhost:@@APACHE_SECONDARY_PORT@@
ModPagespeed unplugged
</VirtualHost>

# Proxy + IPRO a gzip'd file for testing Issue 896.
<VirtualHost localhost:@@APACHE_SECONDARY_PORT@@>
ServerName ipro-proxy.example.com

DocumentRoot "@@APACHE_DOC_ROOT@@"
ModPagespeedFileCachePath "@@MOD_PAGESPEED_CACHE@@_ipro_proxy"

ModPagespeed on
ModPagespeedInPlaceResourceOptimization on
ModPagespeedEnableFilters rewrite_domains

ProxyPass / http://localhost:@@APACHE_TERTIARY_PORT@@/mod_pagespeed_test/ipro/mod_deflate/
ProxyPassReverse / http://localhost:@@APACHE_TERTIARY_PORT@@/mod_pagespeed_test/ipro/mod_deflate/
AddOutputFilterByType DEFLATE text/css
</VirtualHost>

# Backend for ipro-proxy.exampe.com
Listen 127.0.0.1:@@APACHE_TERTIARY_PORT@@
NameVirtualHost 127.0.0.1:@@APACHE_TERTIARY_PORT@@
<VirtualHost 127.0.0.1:@@APACHE_TERTIARY_PORT@@>
ServerName ipro-proxy-backend.example.com
DocumentRoot "@@APACHE_DOC_ROOT@@"
ModPagespeed unplugged
AddOutputFilterByType DEFLATE text/css
</VirtualHost>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/ipro/instant/wait/" >
ModPagespeedInPlaceWaitForOptimized on
</Directory>

<Directory "@@APACHE_DOC_ROOT@@/mod_pagespeed_test/ipro/instant/deadline/" >
ModPagespeedInPlaceRewriteDeadlineMs -1
</Directory>

# Enable per-vhost statistics so that tests can be somewhat independent.
ModPagespeedUsePerVHostStatistics on
<Location /mod_pagespeed_global_statistics>
Expand Down
18 changes: 18 additions & 0 deletions src/install/mod_pagespeed_test/ipro/mod_deflate/big.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/* This is purposefully repetitive to make sure it's compressible */
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
big { color: blue; }
1 change: 1 addition & 0 deletions src/net/instaweb/apache/instaweb_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ bool handle_as_in_place(const RequestContextPtr& request_context,
// InPlaceResourceRecorder as we want any ?ModPagespeed query-params to
// be stripped from the cache key before we store the result in HTTPCache.
InPlaceResourceRecorder* recorder = new InPlaceResourceRecorder(
request_context,
stripped_gurl->Spec(),
request_headers,
options->respect_vary(),
Expand Down
10 changes: 10 additions & 0 deletions src/net/instaweb/apache/mod_instaweb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,8 @@ apr_status_t instaweb_in_place_filter(ap_filter_t* filter,
static_cast<InPlaceResourceRecorder*>(filter->ctx);
CHECK(recorder != NULL);

bool first = true;

// Iterate through all buckets, saving content in the recorder and passing
// the buckets along when there is a flush. Abort early if we hit EOS or the
// recorder fails.
Expand All @@ -876,6 +878,14 @@ apr_status_t instaweb_in_place_filter(ap_filter_t* filter,
recorder->failed());
bucket = APR_BUCKET_NEXT(bucket)) {
if (!APR_BUCKET_IS_METADATA(bucket)) {
if (first) {
first = false;
ResponseHeaders response_headers;
ApacheRequestToResponseHeaders(*request, &response_headers, NULL);
recorder->ConsiderResponseHeaders(
InPlaceResourceRecorder::kPreliminaryHeaders, &response_headers);
}

// Content bucket.
const char* buf = NULL;
size_t bytes = 0;
Expand Down
21 changes: 21 additions & 0 deletions src/net/instaweb/apache/system_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,27 @@ if [ $statistics_enabled = "1" ]; then
--header=Cookie2:cookie2-data
check_from "$IPRO_OUTPUT" fgrep -q ' background: MediumPurple;'
check_from "$IPRO_OUTPUT" fgrep -q 'Vary: Cookie2'

if [ "$SECONDARY_HOSTNAME" != "" ]; then
function gunzip_grep_0ff() {
gunzip - | fgrep -q "color:#00f"
echo $?
}

start_test ipro with mod_deflate
fetch_until -gzip $TEST_ROOT/ipro/mod_deflate/big.css gunzip_grep_0ff 0

start_test ipro with reverse proxy of compressed content
http_proxy=$SECONDARY_HOSTNAME \
fetch_until -gzip http://ipro-proxy.example.com/big.css \
gunzip_grep_0ff 0

# Also test the .pagespeed. version, to make sure we didn't
# accidentally gunzip stuff above when we shouldn't have.
OUT=$(http_proxy=$SECONDARY_HOSTNAME $WGET -q -O - \
http://ipro-proxy.example.com/A.big.css.pagespeed.cf.0.css)
check_from "$OUT" fgrep -q "big{color:#00f}"
fi
else
start_test 404s are served. Statistics are disabled so not checking them.
OUT=$($WGET -O /dev/null $BAD_RESOURCE_URL 2>&1)
Expand Down
80 changes: 52 additions & 28 deletions src/net/instaweb/system/in_place_resource_recorder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "base/logging.h"
#include "net/instaweb/http/public/http_cache.h"
#include "net/instaweb/http/public/http_value.h"
#include "net/instaweb/util/public/message_handler.h"
#include "net/instaweb/util/public/statistics.h"
#include "net/instaweb/util/public/string_util.h"
#include "pagespeed/kernel/http/content_type.h"
Expand All @@ -41,6 +40,7 @@ const char kNumDroppedDueToSize[] = "ipro_recorder_dropped_due_to_size";
AtomicInt32 InPlaceResourceRecorder::active_recordings_(0);

InPlaceResourceRecorder::InPlaceResourceRecorder(
const RequestContextPtr& request_context,
StringPiece url, const RequestHeaders& request_headers, bool respect_vary,
int max_response_bytes, int max_concurrent_recordings,
int64 implicit_cache_ttl_ms, HTTPCache* cache, Statistics* stats,
Expand All @@ -51,6 +51,8 @@ InPlaceResourceRecorder::InPlaceResourceRecorder(
max_response_bytes_(max_response_bytes),
max_concurrent_recordings_(max_concurrent_recordings),
implicit_cache_ttl_ms_(implicit_cache_ttl_ms),
write_to_resource_value_(request_context, &resource_value_),
inflating_fetch_(&write_to_resource_value_),
cache_(cache), handler_(handler),
num_resources_(stats->GetVariable(kNumResources)),
num_inserted_into_cache_(stats->GetVariable(kNumInsertedIntoCache)),
Expand All @@ -60,7 +62,8 @@ InPlaceResourceRecorder::InPlaceResourceRecorder(
num_dropped_due_to_size_(stats->GetVariable(kNumDroppedDueToSize)),
status_code_(-1),
failure_(false),
response_headers_considered_(false) {
full_response_headers_considered_(false),
consider_response_headers_called_(false) {
num_resources_->Add(1);
if (limit_active_recordings() &&
active_recordings_.BarrierIncrement(1) > max_concurrent_recordings_) {
Expand All @@ -87,26 +90,44 @@ void InPlaceResourceRecorder::InitStats(Statistics* statistics) {

bool InPlaceResourceRecorder::Write(const StringPiece& contents,
MessageHandler* handler) {
DCHECK(consider_response_headers_called_);
if (failure_) {
return false;
}

// Write into resource_value_ decompressing if needed.
failure_ = !inflating_fetch_.Write(contents, handler_);
if (max_response_bytes_ == 0 ||
resource_value_.size() + contents.size() < max_response_bytes_) {
return resource_value_.Write(contents, handler_);
resource_value_.contents_size() < max_response_bytes_) {
return !failure_;
} else {
failure_ = true;
num_dropped_due_to_size_->Add(1);
cache_->RememberNotCacheable(url_, status_code_ == 200, handler_);
DroppedDueToSize();
VLOG(1) << "IPRO: MaxResponseBytes exceeded while recording " << url_;
return false;
}
}

void InPlaceResourceRecorder::ConsiderResponseHeaders(
HeadersKind headers_kind,
ResponseHeaders* response_headers) {
CHECK(response_headers != NULL) << "Response headers cannot be NULL";
DCHECK(!response_headers_considered_);
response_headers_considered_ = true;
DCHECK(!full_response_headers_considered_);

if (!consider_response_headers_called_) {
consider_response_headers_called_ = true;
// In first call, set up headers for potential deflating. We basically only
// care about Content-Encoding, plus AsyncFetch gets unhappy with 0
// status code.
inflating_fetch_.response_headers()->CopyFrom(*response_headers);
write_to_resource_value_.response_headers()->set_status_code(
HttpStatus::kOK);
}

if (headers_kind != kFullHeaders) {
return;
}
full_response_headers_considered_ = true;

status_code_ = response_headers->status_code();

// For 4xx and 5xx we can't IPRO, but we can also cache the failure so we
Expand Down Expand Up @@ -156,35 +177,38 @@ void InPlaceResourceRecorder::ConsiderResponseHeaders(
VLOG(1) << "IPRO: Content-Length header indicates that ["
<< url_ << "] is too large to record (" << content_length
<< " bytes)";
cache_->RememberNotCacheable(url_, status_code_ == 200, handler_);
num_dropped_due_to_size_->Add(1);
failure_ = true;
DroppedDueToSize();
return;
}
}

void InPlaceResourceRecorder::DroppedDueToSize() {
cache_->RememberNotCacheable(url_, status_code_ == 200, handler_);
num_dropped_due_to_size_->Add(1);
failure_ = true;
}

void InPlaceResourceRecorder::DoneAndSetHeaders(
ResponseHeaders* response_headers) {
if (!failure_ && !response_headers_considered_) {
ConsiderResponseHeaders(response_headers);
if (!failure_ && !full_response_headers_considered_) {
ConsiderResponseHeaders(kFullHeaders, response_headers);
}

if (failure_) {
num_failed_->Add(1);
} else {
// If a content length was specified, sanity check it.
int64 content_length;
if (response_headers->FindContentLength(&content_length) &&
static_cast<int64>(resource_value_.contents_size()) != content_length) {
handler_->Message(
kWarning, "IPRO: Mismatched content length for [%s]", url_.c_str());
num_failed_->Add(1);
} else {
resource_value_.SetHeaders(response_headers);
cache_->Put(url_, request_properties_, respect_vary_, &resource_value_,
handler_);
// TODO(sligocki): Start IPRO rewrite.
num_inserted_into_cache_->Add(1);
}
// We don't consider content-encoding to be valid here, since it can
// be captured post-mod_deflate with pre-deflate content. Also note
// that content-length doesn't have to be accurate either, since it can be
// due to compression; we do still use it for quickly reject since
// if gzip'd is too large uncompressed is likely too large, too.
response_headers->RemoveAll(HttpAttributes::kContentEncoding);
response_headers->RemoveAll(HttpAttributes::kContentLength);
resource_value_.SetHeaders(response_headers);
cache_->Put(url_, request_properties_, respect_vary_, &resource_value_,
handler_);
// TODO(sligocki): Start IPRO rewrite.
num_inserted_into_cache_->Add(1);
}
delete this;
}
Expand Down
Loading

0 comments on commit 051b2f9

Please sign in to comment.