Skip to content
Permalink
Browse files

MDL-38170 SimplePie: Cannot read https feeds through proxy

  • Loading branch information...
sammarshallou committed Feb 22, 2013
1 parent f29e62c commit 85cb4b65b1b815a9747867fc082394a884639a2c
Showing with 137 additions and 1 deletion.
  1. +39 −0 lib/filelib.php
  2. +1 −1 lib/simplepie/moodle_simplepie.php
  3. +97 −0 lib/tests/filelib_test.php
@@ -3436,6 +3436,45 @@ public function get_info() {
public function get_errno() {
return $this->errno;
}
/**
* When using a proxy, an additional HTTP response code may appear at
* the start of the header. For example, when using https over a proxy
* there may be 'HTTP/1.0 200 Connection Established'. Other codes are
* also possible and some may come with their own headers.
*
* If using the return value containing all headers, this function can be
* called to remove unwanted doubles.
*
* Note that it is not possible to distinguish this situation from valid
* data unless you know the actual response part (below the headers)
* will not be included in this string, or else will not 'look like' HTTP
* headers. As a result it is not safe to call this function for general
* data.
*
* @param string $input Input HTTP response
* @return string HTTP response with additional headers stripped if any
*/
public static function strip_double_headers($input) {
// I have tried to make this regular expression as specific as possible
// to avoid any case where it does weird stuff if you happen to put
// HTTP/1.1 200 at the start of any line in your RSS file. This should
// also make it faster because it can abandon regex processing as soon
// as it hits something that doesn't look like an http header. The
// header definition is taken from RFC 822, except I didn't support
// folding which is never used in practice.
$crlf = "\r\n";
return preg_replace(
// HTTP version and status code (ignore value of code).
'~^HTTP/1\..*' . $crlf .
// Header name: character between 33 and 126 decimal, except colon.
// Colon. Header value: any character except \r and \n. CRLF.
'(?:[\x21-\x39\x3b-\x7e]+:[^' . $crlf . ']+' . $crlf . ')*' .
// Headers are terminated by another CRLF (blank line).
$crlf .
// Second HTTP status code, this time must be 200.
'(HTTP/1.[01] 200 )~', '$1', $input);
}
}
/**
@@ -139,7 +139,7 @@ function moodle_simplepie_file($url, $timeout = 10, $redirects = 5, $headers = n
}
}
$this->headers = $curl->get($url);
$this->headers = curl::strip_double_headers($curl->get($url));
if ($curl->error) {
$this->error = 'cURL Error: '.$curl->error;
@@ -270,4 +270,101 @@ public function test_delete_original_file_from_draft() {
$this->assertEquals($contenthash, $fileref->get_contenthash());
$this->assertEquals($filecontent, $fileref->get_content());
}
/**
* Tests the strip_double_headers function in the curl class.
*/
public function test_curl_strip_double_headers() {
// Example from issue tracker.
$mdl30648example = <<<EOF
HTTP/1.0 407 Proxy Authentication Required
Server: squid/2.7.STABLE9
Date: Thu, 08 Dec 2011 14:44:33 GMT
Content-Type: text/html
Content-Length: 1275
X-Squid-Error: ERR_CACHE_ACCESS_DENIED 0
Proxy-Authenticate: Basic realm="Squid proxy-caching web server"
X-Cache: MISS from homer.lancs.ac.uk
X-Cache-Lookup: NONE from homer.lancs.ac.uk:3128
Via: 1.0 homer.lancs.ac.uk:3128 (squid/2.7.STABLE9)
Connection: close
HTTP/1.0 200 OK
Server: Apache
X-Lb-Nocache: true
Cache-Control: private, max-age=15
ETag: "4d69af5d8ba873ea9192c489e151bd7b"
Content-Type: text/html
Date: Thu, 08 Dec 2011 14:44:53 GMT
Set-Cookie: BBC-UID=c4de2e109c8df6a51de627cee11b214bd4fb6054a030222488317afb31b343360MoodleBot/1.0; expires=Mon, 07-Dec-15 14:44:53 GMT; path=/; domain=bbc.co.uk
X-Cache-Action: MISS
X-Cache-Age: 0
Vary: Cookie,X-Country,X-Ip-is-uk-combined,X-Ip-is-advertise-combined,X-Ip_is_uk_combined,X-Ip_is_advertise_combined, X-GeoIP
X-Cache: MISS from ww
<html>...
EOF;
$mdl30648expected = <<<EOF
HTTP/1.0 200 OK
Server: Apache
X-Lb-Nocache: true
Cache-Control: private, max-age=15
ETag: "4d69af5d8ba873ea9192c489e151bd7b"
Content-Type: text/html
Date: Thu, 08 Dec 2011 14:44:53 GMT
Set-Cookie: BBC-UID=c4de2e109c8df6a51de627cee11b214bd4fb6054a030222488317afb31b343360MoodleBot/1.0; expires=Mon, 07-Dec-15 14:44:53 GMT; path=/; domain=bbc.co.uk
X-Cache-Action: MISS
X-Cache-Age: 0
Vary: Cookie,X-Country,X-Ip-is-uk-combined,X-Ip-is-advertise-combined,X-Ip_is_uk_combined,X-Ip_is_advertise_combined, X-GeoIP
X-Cache: MISS from ww
<html>...
EOF;
// For HTTP, replace the \n with \r\n.
$mdl30648example = preg_replace("~(?!<\r)\n~", "\r\n", $mdl30648example);
$mdl30648expected = preg_replace("~(?!<\r)\n~", "\r\n", $mdl30648expected);
// Test stripping works OK.
$this->assertEquals($mdl30648expected, curl::strip_double_headers($mdl30648example));
// Test it does nothing to the 'plain' data.
$this->assertEquals($mdl30648expected, curl::strip_double_headers($mdl30648expected));
// Example from OU proxy.
$httpsexample = <<<EOF
HTTP/1.0 200 Connection established
HTTP/1.1 200 OK
Date: Fri, 22 Feb 2013 17:14:23 GMT
Server: Apache/2
X-Powered-By: PHP/5.3.3-7+squeeze14
Content-Type: text/xml
Connection: close
Content-Encoding: gzip
Transfer-Encoding: chunked
<?xml version="1.0" encoding="ISO-8859-1" ?>
<rss version="2.0">...
EOF;
$httpsexpected = <<<EOF
HTTP/1.1 200 OK
Date: Fri, 22 Feb 2013 17:14:23 GMT
Server: Apache/2
X-Powered-By: PHP/5.3.3-7+squeeze14
Content-Type: text/xml
Connection: close
Content-Encoding: gzip
Transfer-Encoding: chunked
<?xml version="1.0" encoding="ISO-8859-1" ?>
<rss version="2.0">...
EOF;
// For HTTP, replace the \n with \r\n.
$httpsexample = preg_replace("~(?!<\r)\n~", "\r\n", $httpsexample);
$httpsexpected = preg_replace("~(?!<\r)\n~", "\r\n", $httpsexpected);
// Test stripping works OK.
$this->assertEquals($httpsexpected, curl::strip_double_headers($httpsexample));
// Test it does nothing to the 'plain' data.
$this->assertEquals($httpsexpected, curl::strip_double_headers($httpsexpected));
}
}

0 comments on commit 85cb4b6

Please sign in to comment.
You can’t perform that action at this time.