Permalink
Browse files

Make `q.encode()` safe for email headers

  • Loading branch information...
ConradIrwin authored and mathiasbynens committed Nov 10, 2016
1 parent 5d8236c commit 0c48b6c217526f5a2fbd710fbff3307bc477241a
Showing with 24 additions and 8 deletions.
  1. +1 −1 q.js
  2. +16 −5 scripts/export-data.js
  3. +7 −2 tests/tests.js
2 q.js
@@ -34,7 +34,7 @@
});
};

var regexUnsafeSymbols = /[\0-\x1F=\?_\x7F-\uFFFF]/g;
var regexUnsafeSymbols = /[\0-\x1F"-\),\.:-@\[-\^`\{-\uFFFF]/g;
var encode = function(string) {
// Note: this assumes the input is already encoded into octets (e.g. using
// UTF-8), and that the resulting octets are within the extended ASCII
@@ -3,6 +3,7 @@ var regenerate = require('regenerate');

// Let’s start with the safe/unsafe symbols in `Quoted-Printable` encoding.
// https://tools.ietf.org/html/rfc2045#section-6.7
//
// safe-char := <any octet with decimal value of 33 through
// 60 inclusive, and 62 through 126>
// ; Characters not listed as "mail-safe" in
@@ -12,12 +13,22 @@ var regenerate = require('regenerate');
// ; SPACEs or TABs at the ends of lines, and is
// ; recommended for any character not listed in
// ; RFC 2049 as "mail-safe".
//
// https://tools.ietf.org/html/rfc2047#section-5 restricts this much
// more severely in the case quoting is used for a “word” in an email header:
//
// In this case the set of characters that may be used in a "Q"-encoded
// 'encoded-word' is restricted to: <upper and lower case ASCII
// letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
// (underscore, ASCII 95.)>. An 'encoded-word' that appears within a
// 'phrase' MUST be separated from any adjacent 'word', 'text' or
// 'special' by 'linear-white-space'.

var safeSymbols = regenerate()
.addRange(33, 60)
.addRange(62, 126)
// Remove symbols that are unsafe in Q-encoding. Note: space is excluded
// because it’s special-cased.
.remove('?', '_', '\t');
.addRange('A', 'Z')
.addRange('a', 'z') // lower case ASCII
.addRange('0', '9') // decimal digits
.add('!', '*', '+', '-', '/', '_');
var definitelyUnsafeSymbols = regenerate()
.addRange(0x0, 0x10FFFF)
// Note: the script assumes the input is already encoded into octets (e.g.
@@ -48,12 +48,12 @@
test('q.encode', function() {
equal(
q.encode(utf8.encode('If you believe that truth=beauty, then surely mathematics is the most beautiful branch of philosophy.')),
'If_you_believe_that_truth=3Dbeauty,_then_surely_mathematics_is_the_most_beautiful_branch_of_philosophy.',
'If_you_believe_that_truth=3Dbeauty=2C_then_surely_mathematics_is_the_most_beautiful_branch_of_philosophy=2E',
'Equals sign'
);
equal(
q.encode(utf8.encode('Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat facer possim assum. Typi non habent claritatem insitam; est usus legentis in iis qui facit eorum claritatem. Investigationes demonstraverunt lectores legere me lius quod ii legunt saepius. Claritas est etiam processus dynamicus, qui sequitur mutationem consuetudium lectorum. Mirum est notare quam littera gothica, quam nunc putamus parum claram, anteposuerit litterarum formas humanitatis per seacula quarta decima et quinta decima. Eodem modo typi, qui nunc nobis videntur parum clari, fiant sollemnes in futurum.')),
'Lorem_ipsum_dolor_sit_amet,_consectetuer_adipiscing_elit,_sed_diam_nonummy_nibh_euismod_tincidunt_ut_laoreet_dolore_magna_aliquam_erat_volutpat._Ut_wisi_enim_ad_minim_veniam,_quis_nostrud_exerci_tation_ullamcorper_suscipit_lobortis_nisl_ut_aliquip_ex_ea_commodo_consequat._Duis_autem_vel_eum_iriure_dolor_in_hendrerit_in_vulputate_velit_esse_molestie_consequat,_vel_illum_dolore_eu_feugiat_nulla_facilisis_at_vero_eros_et_accumsan_et_iusto_odio_dignissim_qui_blandit_praesent_luptatum_zzril_delenit_augue_duis_dolore_te_feugait_nulla_facilisi._Nam_liber_tempor_cum_soluta_nobis_eleifend_option_congue_nihil_imperdiet_doming_id_quod_mazim_placerat_facer_possim_assum._Typi_non_habent_claritatem_insitam;_est_usus_legentis_in_iis_qui_facit_eorum_claritatem._Investigationes_demonstraverunt_lectores_legere_me_lius_quod_ii_legunt_saepius._Claritas_est_etiam_processus_dynamicus,_qui_sequitur_mutationem_consuetudium_lectorum._Mirum_est_notare_quam_littera_gothica,_quam_nunc_putamus_parum_claram,_anteposuerit_litterarum_formas_humanitatis_per_seacula_quarta_decima_et_quinta_decima._Eodem_modo_typi,_qui_nunc_nobis_videntur_parum_clari,_fiant_sollemnes_in_futurum.',
'Lorem_ipsum_dolor_sit_amet=2C_consectetuer_adipiscing_elit=2C_sed_diam_nonummy_nibh_euismod_tincidunt_ut_laoreet_dolore_magna_aliquam_erat_volutpat=2E_Ut_wisi_enim_ad_minim_veniam=2C_quis_nostrud_exerci_tation_ullamcorper_suscipit_lobortis_nisl_ut_aliquip_ex_ea_commodo_consequat=2E_Duis_autem_vel_eum_iriure_dolor_in_hendrerit_in_vulputate_velit_esse_molestie_consequat=2C_vel_illum_dolore_eu_feugiat_nulla_facilisis_at_vero_eros_et_accumsan_et_iusto_odio_dignissim_qui_blandit_praesent_luptatum_zzril_delenit_augue_duis_dolore_te_feugait_nulla_facilisi=2E_Nam_liber_tempor_cum_soluta_nobis_eleifend_option_congue_nihil_imperdiet_doming_id_quod_mazim_placerat_facer_possim_assum=2E_Typi_non_habent_claritatem_insitam=3B_est_usus_legentis_in_iis_qui_facit_eorum_claritatem=2E_Investigationes_demonstraverunt_lectores_legere_me_lius_quod_ii_legunt_saepius=2E_Claritas_est_etiam_processus_dynamicus=2C_qui_sequitur_mutationem_consuetudium_lectorum=2E_Mirum_est_notare_quam_littera_gothica=2C_quam_nunc_putamus_parum_claram=2C_anteposuerit_litterarum_formas_humanitatis_per_seacula_quarta_decima_et_quinta_decima=2E_Eodem_modo_typi=2C_qui_nunc_nobis_videntur_parum_clari=2C_fiant_sollemnes_in_futurum=2E',
'Long text'
);
equal(
@@ -81,6 +81,11 @@
'foo=00bar=FFbaz',
'Lowest and highest octet values (U+0000 and U+00FF)'
);
equal(
q.encode('ooh: ahh'),
'ooh=3A_ahh',
'colons'
);
raises(
function() {
// Note: “forgot” to UTF-8-encode first

0 comments on commit 0c48b6c

Please sign in to comment.