From 98bb0b98fcf280ae19db6a457ddc6dd1be904af7 Mon Sep 17 00:00:00 2001 From: James Reeves Date: Fri, 23 Mar 2012 22:42:25 +0000 Subject: [PATCH] Technically correct version of URL/percent-encoding Java's URLEncoder and URLDecoder classes don't follow the URL-encoding format precisely, instead encoding strings in the slightly different www-form-urlencoded format. This commit ensures that the url-encode and url-decode functions work correctly, leaving it up to the form-encode and form-decode functions to handle data encoded in the www-form-urlencoded format. --- ring-core/src/ring/util/codec.clj | 43 +++++++++++++++++++++++++++------ ring-core/test/ring/util/test/codec.clj | 24 ++++++++++++------ 2 files changed, 53 insertions(+), 14 deletions(-) diff --git a/ring-core/src/ring/util/codec.clj b/ring-core/src/ring/util/codec.clj index 9dbff02..bee2006 100644 --- a/ring-core/src/ring/util/codec.clj +++ b/ring-core/src/ring/util/codec.clj @@ -1,24 +1,53 @@ (ns ring.util.codec "Encoding and decoding utilities." (:use ring.util.data) - (:require [clojure.string :as string]) + (:require [clojure.string :as str]) (:import java.io.File (java.net URLEncoder URLDecoder) org.apache.commons.codec.binary.Base64)) +(defn- double-escape [^String x] + (.replace x "\\" "\\\\")) + +(defn percent-encode + "Percent-encode every character in the given string using either the specified + encoding, or UTF-8 by default." + [unencoded & [encoding]] + (->> (.getBytes unencoded (or encoding "UTF-8")) + (map (partial format "%%%02X")) + (str/join))) + +(defn- parse-bytes [encoded-bytes] + (->> (re-seq #"%.." encoded-bytes) + (map #(subs % 1)) + (map #(.byteValue (Integer/parseInt % 16))) + (byte-array))) + +(defn percent-decode + "Decode every percent-encoded character in the given string using the + specified encoding, or UTF-8 by default." + [encoded & [encoding]] + (str/replace encoded + #"(?:%..)+" + (fn [chars] + (-> (parse-bytes chars) + (String. (or encoding "UTF-8")) + (double-escape))))) + (defn url-encode "Returns the url-encoded version of the given string, using either a specified encoding or UTF-8 by default." [unencoded & [encoding]] - (URLEncoder/encode unencoded (or encoding "UTF-8"))) + (str/replace + unencoded + #"[^A-Za-z0-9_~.+-]+" + #(double-escape (percent-encode % encoding)))) (defn url-decode "Returns the url-decoded version of the given string, using either a specified encoding or UTF-8 by default. If the encoding is invalid, nil is returned." [encoded & [encoding]] - (try - (URLDecoder/decode encoded (or encoding "UTF-8")) - (catch Exception e nil))) + (percent-decode encoded encoding)) (defn base64-encode "Encode an array of bytes into a base64 encoded string." @@ -43,7 +72,7 @@ (url-decode (or val "") encoding)) param-map)) {} - (string/split param-string #"&")))) + (str/split param-string #"&")))) (defn form-encode "Encode parameters from a map into a string." @@ -54,7 +83,7 @@ (vals param-map) encoding)) ([params values encoding] - (string/join #"&" + (str/join #"&" (map (fn [param value] (if (vector? value) (form-encode (repeat (count value) param) diff --git a/ring-core/test/ring/util/test/codec.clj b/ring-core/test/ring/util/test/codec.clj index f99b051..58cdf1d 100644 --- a/ring-core/test/ring/util/test/codec.clj +++ b/ring-core/test/ring/util/test/codec.clj @@ -3,16 +3,26 @@ ring.util.codec) (:import java.util.Arrays)) +(deftest test-percent-encode + (is (= (percent-encode " ") "%20")) + (is (= (percent-encode "+") "%2B")) + (is (= (percent-encode "foo") "%66%6F%6F"))) + +(deftest test-percent-decode + (is (= (percent-decode "%20") " ")) + (is (= (percent-decode "foo%20bar") "foo bar")) + (is (= (percent-decode "foo%FE%FF%00%2Fbar" "UTF-16") "foo/bar"))) + (deftest test-url-encode - (is (= "foo%2Fbar" (url-encode "foo/bar"))) - (is (= "foo%FE%FF%00%2Fbar") (url-encode "foo/bar" "UTF-16"))) + (is (= (url-encode "foo/bar") "foo%2Fbar")) + (is (= (url-encode "foo/bar" "UTF-16") "foo%FE%FF%00%2Fbar")) + (is (= (url-encode "foo+bar") "foo+bar")) + (is (= (url-encode "foo bar") "foo%20bar"))) (deftest test-url-decode - (testing "standard behavior" - (is (= "foo/bar" (url-decode "foo%2Fbar"))) - (is (= "foo/bar" (url-decode "foo%FE%FF%00%2Fbar" "UTF-16")))) - (testing "returns nil when underlying Java methods throw an exception" - (is (nil? (url-decode "%"))))) + (is (= (url-decode "foo%2Fbar") "foo/bar" )) + (is (= (url-decode "foo%FE%FF%00%2Fbar" "UTF-16") "foo/bar")) + (is (= (url-decode "%") "%"))) (deftest test-base64-encoding (let [str-bytes (.getBytes "foo?/+" "UTF-8")]