Permalink
Browse files

MDL-38170 SimplePie: Cannot read https feeds through proxy

  • Loading branch information...
1 parent a09484c commit e97baa19caa983f04cfdbf0c3fb4ba6d34e31739 @sammarshallou sammarshallou committed Feb 22, 2013
Showing with 137 additions and 1 deletion.
  1. +39 −0 lib/filelib.php
  2. +1 −1 lib/simplepie/moodle_simplepie.php
  3. +97 −0 lib/tests/filelib_test.php
View
39 lib/filelib.php
@@ -3369,6 +3369,45 @@ public function get_info() {
public function get_errno() {
return $this->errno;
}
+
+ /**
+ * When using a proxy, an additional HTTP response code may appear at
+ * the start of the header. For example, when using https over a proxy
+ * there may be 'HTTP/1.0 200 Connection Established'. Other codes are
+ * also possible and some may come with their own headers.
+ *
+ * If using the return value containing all headers, this function can be
+ * called to remove unwanted doubles.
+ *
+ * Note that it is not possible to distinguish this situation from valid
+ * data unless you know the actual response part (below the headers)
+ * will not be included in this string, or else will not 'look like' HTTP
+ * headers. As a result it is not safe to call this function for general
+ * data.
+ *
+ * @param string $input Input HTTP response
+ * @return string HTTP response with additional headers stripped if any
+ */
+ public static function strip_double_headers($input) {
+ // I have tried to make this regular expression as specific as possible
+ // to avoid any case where it does weird stuff if you happen to put
+ // HTTP/1.1 200 at the start of any line in your RSS file. This should
+ // also make it faster because it can abandon regex processing as soon
+ // as it hits something that doesn't look like an http header. The
+ // header definition is taken from RFC 822, except I didn't support
+ // folding which is never used in practice.
+ $crlf = "\r\n";
+ return preg_replace(
+ // HTTP version and status code (ignore value of code).
+ '~^HTTP/1\..*' . $crlf .
+ // Header name: character between 33 and 126 decimal, except colon.
+ // Colon. Header value: any character except \r and \n. CRLF.
+ '(?:[\x21-\x39\x3b-\x7e]+:[^' . $crlf . ']+' . $crlf . ')*' .
+ // Headers are terminated by another CRLF (blank line).
+ $crlf .
+ // Second HTTP status code, this time must be 200.
+ '(HTTP/1.[01] 200 )~', '$1', $input);
+ }
}
/**
View
2 lib/simplepie/moodle_simplepie.php
@@ -139,7 +139,7 @@ function moodle_simplepie_file($url, $timeout = 10, $redirects = 5, $headers = n
}
}
- $this->headers = $curl->get($url);
+ $this->headers = curl::strip_double_headers($curl->get($url));
if ($curl->error) {
$this->error = 'cURL Error: '.$curl->error;
View
97 lib/tests/filelib_test.php
@@ -270,4 +270,101 @@ public function test_delete_original_file_from_draft() {
$this->assertEquals($contenthash, $fileref->get_contenthash());
$this->assertEquals($filecontent, $fileref->get_content());
}
+
+ /**
+ * Tests the strip_double_headers function in the curl class.
+ */
+ public function test_curl_strip_double_headers() {
+ // Example from issue tracker.
+ $mdl30648example = <<<EOF
+HTTP/1.0 407 Proxy Authentication Required
+Server: squid/2.7.STABLE9
+Date: Thu, 08 Dec 2011 14:44:33 GMT
+Content-Type: text/html
+Content-Length: 1275
+X-Squid-Error: ERR_CACHE_ACCESS_DENIED 0
+Proxy-Authenticate: Basic realm="Squid proxy-caching web server"
+X-Cache: MISS from homer.lancs.ac.uk
+X-Cache-Lookup: NONE from homer.lancs.ac.uk:3128
+Via: 1.0 homer.lancs.ac.uk:3128 (squid/2.7.STABLE9)
+Connection: close
+
+HTTP/1.0 200 OK
+Server: Apache
+X-Lb-Nocache: true
+Cache-Control: private, max-age=15
+ETag: "4d69af5d8ba873ea9192c489e151bd7b"
+Content-Type: text/html
+Date: Thu, 08 Dec 2011 14:44:53 GMT
+Set-Cookie: BBC-UID=c4de2e109c8df6a51de627cee11b214bd4fb6054a030222488317afb31b343360MoodleBot/1.0; expires=Mon, 07-Dec-15 14:44:53 GMT; path=/; domain=bbc.co.uk
+X-Cache-Action: MISS
+X-Cache-Age: 0
+Vary: Cookie,X-Country,X-Ip-is-uk-combined,X-Ip-is-advertise-combined,X-Ip_is_uk_combined,X-Ip_is_advertise_combined, X-GeoIP
+X-Cache: MISS from ww
+
+<html>...
+EOF;
+ $mdl30648expected = <<<EOF
+HTTP/1.0 200 OK
+Server: Apache
+X-Lb-Nocache: true
+Cache-Control: private, max-age=15
+ETag: "4d69af5d8ba873ea9192c489e151bd7b"
+Content-Type: text/html
+Date: Thu, 08 Dec 2011 14:44:53 GMT
+Set-Cookie: BBC-UID=c4de2e109c8df6a51de627cee11b214bd4fb6054a030222488317afb31b343360MoodleBot/1.0; expires=Mon, 07-Dec-15 14:44:53 GMT; path=/; domain=bbc.co.uk
+X-Cache-Action: MISS
+X-Cache-Age: 0
+Vary: Cookie,X-Country,X-Ip-is-uk-combined,X-Ip-is-advertise-combined,X-Ip_is_uk_combined,X-Ip_is_advertise_combined, X-GeoIP
+X-Cache: MISS from ww
+
+<html>...
+EOF;
+ // For HTTP, replace the \n with \r\n.
+ $mdl30648example = preg_replace("~(?!<\r)\n~", "\r\n", $mdl30648example);
+ $mdl30648expected = preg_replace("~(?!<\r)\n~", "\r\n", $mdl30648expected);
+
+ // Test stripping works OK.
+ $this->assertEquals($mdl30648expected, curl::strip_double_headers($mdl30648example));
+ // Test it does nothing to the 'plain' data.
+ $this->assertEquals($mdl30648expected, curl::strip_double_headers($mdl30648expected));
+
+ // Example from OU proxy.
+ $httpsexample = <<<EOF
+HTTP/1.0 200 Connection established
+
+HTTP/1.1 200 OK
+Date: Fri, 22 Feb 2013 17:14:23 GMT
+Server: Apache/2
+X-Powered-By: PHP/5.3.3-7+squeeze14
+Content-Type: text/xml
+Connection: close
+Content-Encoding: gzip
+Transfer-Encoding: chunked
+
+<?xml version="1.0" encoding="ISO-8859-1" ?>
+<rss version="2.0">...
+EOF;
+ $httpsexpected = <<<EOF
+HTTP/1.1 200 OK
+Date: Fri, 22 Feb 2013 17:14:23 GMT
+Server: Apache/2
+X-Powered-By: PHP/5.3.3-7+squeeze14
+Content-Type: text/xml
+Connection: close
+Content-Encoding: gzip
+Transfer-Encoding: chunked
+
+<?xml version="1.0" encoding="ISO-8859-1" ?>
+<rss version="2.0">...
+EOF;
+ // For HTTP, replace the \n with \r\n.
+ $httpsexample = preg_replace("~(?!<\r)\n~", "\r\n", $httpsexample);
+ $httpsexpected = preg_replace("~(?!<\r)\n~", "\r\n", $httpsexpected);
+
+ // Test stripping works OK.
+ $this->assertEquals($httpsexpected, curl::strip_double_headers($httpsexample));
+ // Test it does nothing to the 'plain' data.
+ $this->assertEquals($httpsexpected, curl::strip_double_headers($httpsexpected));
+ }
}

0 comments on commit e97baa1

Please sign in to comment.