Skip to content

m13253/chumsky-utf8dec

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

7 Commits
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

chumsky-utf8dec

A UTF-8 decoder based on Chumsky parser framework.

Demo

fn demo_success() {
    let input = "👨‍👩‍👧‍👦".as_bytes().with_context("input1.txt");
    let (output, errors) = decoder::<_, _, extra::Err<Rich<_, _>>>()
        .collect::<Vec<_>>()
        .parse(input)
        .into_output_errors();
    assert_eq!(
        output,
        Some(vec![
            ('👨', ("input1.txt", SimpleSpan::new(0, 4))),
            ('\u{200D}', ("input1.txt", SimpleSpan::new(4, 7))),
            ('👩', ("input1.txt", SimpleSpan::new(7, 11))),
            ('\u{200D}', ("input1.txt", SimpleSpan::new(11, 14))),
            ('👧', ("input1.txt", SimpleSpan::new(14, 18))),
            ('\u{200D}', ("input1.txt", SimpleSpan::new(18, 21))),
            ('👦', ("input1.txt", SimpleSpan::new(21, 25)))
        ])
    );
    assert_eq!(errors, vec![]);
}

fn demo_failure() {
    use chumsky::error::{RichPattern, RichReason};
    use chumsky::util::Maybe;

    let input = b"\xED\xA0\x80".with_context("input2.txt");
    let (output, errors) = decoder::<_, _, extra::Err<Rich<_, _>>>()
        .collect::<Vec<_>>()
        .parse(input)
        .into_output_errors();
    assert_eq!(output, None);
    assert_eq!(errors.len(), 1);
    assert_eq!(
        errors[0].reason(),
        &RichReason::ExpectedFound {
            expected: vec![RichPattern::Label("0x80 ..= 0x9F")],
            found: Some(Maybe::Val(0xA0)),
        }
    );
    assert_eq!(errors[0].span(), &("input2.txt", SimpleSpan::new(1, 2)));
}

License

This code is released under MIT license.

I don’t yet plan to release the code to crates.io, you may directly copy and paste my code into your own project if you want to use it.

About

A UTF-8 decoder based on Chumsky parser framework

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages