Skip to content

Commit

Permalink
Handle Unicode double-quote characters when parsing arguments (#1553)
Browse files Browse the repository at this point in the history
Closes #1545
  • Loading branch information
arqunis committed Oct 16, 2021
1 parent c0463c0 commit 9bfc1e7
Showing 1 changed file with 63 additions and 13 deletions.
76 changes: 63 additions & 13 deletions src/framework/standard/args.rs
Expand Up @@ -103,19 +103,49 @@ impl Token {
}
}

// A utility enum to handle an edge case with Apple OSs.
//
// By default, a feature called "Smart Quotes" is enabled on MacOS and iOS devices. This feature
// automatically substitutes the lame, but simple `"` ASCII character for quotation with the cool
// `”` Unicode character. It can be disabled, but users may not want to do that as it is a global
// setting (i.e. they might not want to disable it just for properly invoking commands of bots on
// Discord).
#[derive(Clone, Copy)]
enum QuoteKind {
Ascii,
Apple,
}

impl QuoteKind {
fn new(c: char) -> Option<Self> {
match c {
'"' => Some(QuoteKind::Ascii),
'\u{201C}' => Some(QuoteKind::Apple),
_ => None,
}
}

fn is_ending_quote(self, c: char) -> bool {
match self {
QuoteKind::Ascii => c == '"',
QuoteKind::Apple => c == '\u{201D}',
}
}
}

fn lex(stream: &mut Stream<'_>, delims: &[Cow<'_, str>]) -> Option<Token> {
if stream.is_empty() {
return None;
}

let start = stream.offset();
if stream.current()? == b'"' {
stream.next();
if let Some(kind) = QuoteKind::new(stream.current_char()?) {
stream.next_char();

stream.take_until(|b| b == b'"');
stream.take_until_char(|c| kind.is_ending_quote(c));

let is_quote = stream.current().map_or(false, |b| b == b'"');
stream.next();
let is_quote = stream.current_char().map_or(false, |c| kind.is_ending_quote(c));
stream.next_char();

let end = stream.offset();

Expand Down Expand Up @@ -150,12 +180,35 @@ fn lex(stream: &mut Stream<'_>, delims: &[Cow<'_, str>]) -> Option<Token> {
Some(Token::new(TokenKind::Argument, start, end))
}

fn is_surrounded_with(s: &str, begin: char, end: char) -> bool {
s.starts_with(begin) && s.ends_with(end)
}

fn is_quoted(s: &str) -> bool {
if s.len() < 2 {
return false;
}

// Refer to `QuoteKind` why we check for Unicode quote characters.
is_surrounded_with(s, '"', '"') || is_surrounded_with(s, '\u{201C}', '\u{201D}')
}

fn strip(s: &str, begin: char, end: char) -> Option<&str> {
let s = s.strip_prefix(begin)?;
s.strip_suffix(end)
}

fn remove_quotes(s: &str) -> &str {
if s.starts_with('"') && s.ends_with('"') {
return &s[1..s.len() - 1];
if s.len() < 2 {
return s;
}

if let Some(s) = strip(s, '"', '"') {
return s;
}

s
// Refer to `QuoteKind` why we check for Unicode quote characters.
strip(s, '\u{201C}', '\u{201D}').unwrap_or(s)
}

#[derive(Debug, Clone, Copy)]
Expand Down Expand Up @@ -299,11 +352,8 @@ impl Args {
.collect::<Vec<_>>();

let args = if delims.is_empty() && !message.is_empty() {
let kind = if message.starts_with('"') && message.ends_with('"') {
TokenKind::QuotedArgument
} else {
TokenKind::Argument
};
let kind =
if is_quoted(message) { TokenKind::QuotedArgument } else { TokenKind::Argument };

// If there are no delimiters, then the only possible argument is the whole message.
vec![Token::new(kind, 0, message.len())]
Expand Down

0 comments on commit 9bfc1e7

Please sign in to comment.