Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

UTF8 generator #83

Closed
wants to merge 2 commits into from

3 participants

@Motiejus

WIP; currently not possible to test proper shrinking because of
implementation using ?FORALL().

@Motiejus

We need an approach to test shrinking of user-defined types as well as specify some correct and incorrect cases in the tests.

Motiejus added some commits
@Motiejus Motiejus UTF8 generator
WIP; currently not possible to test proper shrinking because of
implementation using ?FORALL().
66b43d2
@Motiejus Motiejus Isolate proper_unicode to its own module a3433c3
@manopapad
Owner

Would constructed_types_with_data() work for this? This list is set up specifically for user-defined types, but expects all correct and incorrect cases to be specified in intermediate format (i.e., with the let-bound values present, e.g. {'$used',5,25} for ?LET(X,range(1,5),X*X), rather than just 25).

So, the first test tuple:

{utf8(), [<<>>,<<0>>,<<127>>,<<197,161>>],<<>>, [...], "utf8()"}

would become:

{utf8(), [{'$used',{'$used',0,[]},<<>>},
          {'$used',{'$used',1,[0]},<<0>>},
          {'$used',{'$used',1,[127]},<<127>>},
          {'$used',{'$used',1,[50593]},<<197,161>>}], <<>>, [...], "utf8()"}

I.e., you'd need to specify exactly how each of the values could be constructed. This is for the benefit of the instance checking code (it would be much harder to verify, in the absence of let-bound values, if the final value is/isn't a valid instance of the type).

@kostis
Collaborator

This pull request has been sitting quite long already... It would be good for @Motiejus to either react on the comment of @manopapad (possibly by revising the PR) or give me the "go ahead" so that I can merge this as is (and we can revise it later).

@Motiejus

@kostis I have been very busy lately, sorry for a late reply.

@manopapad copy-pasting your example produces a failing test (clean state assertion fails). I lack knowledge of internals understand why the test fails and what it really means when it fails. I once took a tour to proper internals and didn't break it; I don't think I want to retry without hand-holding or a few spare days ahead of me in isolation.

For the short term, if users need the feature, we can just merge this pull request. If one of you can look at Manolis' test and explain to me why exactly it fails and what it is supposed to do (the handholding part), I would gladly monkey-type rest of the test cases.

@manopapad manopapad closed this in 3d211d6
@manopapad
Owner

I made the required modifications and merged the commits. The commit message on 7c20b4e has more information on the modifications. Thanks for your help Motiejus!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Mar 8, 2014
  1. @Motiejus

    UTF8 generator

    Motiejus authored
    WIP; currently not possible to test proper shrinking because of
    implementation using ?FORALL().
  2. @Motiejus
This page is out of date. Refresh to see the latest.
View
2  doc/overview.edoc
@@ -47,4 +47,6 @@ the documentation for the following modules:
<dt>{@link proper_statem}</dt>
<dd>using PropEr to test stateful reactive systems specified via an abstract
state machine</dd>
+<dt>{@link proper_unicode}</dt>
+<dd>generating unicode strings and binaries</dd>
</dl>
View
6 include/proper.hrl
@@ -63,6 +63,12 @@
function3/1, function4/1, weighted_default/2, parameter/1,
parameter/2, with_parameter/3, with_parameters/2]).
+%%------------------------------------------------------------------------------
+%% Unicode
+%%------------------------------------------------------------------------------
+
+-import(proper_unicode, [utf8/0, utf8/1, utf8/2]).
+
%%------------------------------------------------------------------------------
%% Type manipulation functions
View
101 src/proper_unicode.erl
@@ -0,0 +1,101 @@
+%%% Copyright 2014 Motiejus Jakštys <desired.mta@gmail.com>
+%%%
+%%% This file is part of PropEr.
+%%%
+%%% PropEr is free software: you can redistribute it and/or modify
+%%% it under the terms of the GNU General Public License as published by
+%%% the Free Software Foundation, either version 3 of the License, or
+%%% (at your option) any later version.
+%%%
+%%% PropEr is distributed in the hope that it will be useful,
+%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
+%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+%%% GNU General Public License for more details.
+%%%
+%%% You should have received a copy of the GNU General Public License
+%%% along with PropEr. If not, see <http://www.gnu.org/licenses/>.
+
+%%% @copyright 2014 Motiejus Jakštys
+%%% @version {@version}
+%%% @author Motiejus Jakštys
+
+%%% @doc Unicode generators for PropEr
+%%%
+%%% This module exposes utf8 binary generator.
+%%%
+%%% Makes it easy to create custom-encoded unicode binaries. For example,
+%%% utf16 binary generator:
+%%%
+%%% ```
+%%% utf16() ->
+%%% ?LET(S, utf8(), unicode:characters_to_binary(S, utf8, utf16)).
+%%% '''
+%%%
+%%% Verify it has at least twice as many bytes as codepoints:
+%%%
+%%% ```
+%%% ?FORALL(S, utf16(),
+%%% size(S) >= 2*length(unicode:characters_to_list(S, utf16))).
+%%% '''
+%%% Only utf8 generation is supported: {@link utf8/0}, {@link utf8/1}, {@link
+%%% utf8/2}. Unicode codepoints and other encodings are trivial to get with
+%%% utf8 generators and {@link unicode} module in OTP.
+-module(proper_unicode).
+
+-include("proper_common.hrl").
+
+%% @private_type
+%% @alias
+-type nonnegextint() :: non_neg_integer() | 'inf'.
+
+-import(proper_types, [integer/2, union/1, vector/2]).
+
+-export([utf8/0, utf8/1, utf8/2]).
+
+%% @doc Codepoint which is no more than N bytes in utf8
+-spec unicode_codepoint(1..4) -> proper_types:type().
+unicode_codepoint(1) ->
+ integer(0, 16#7F);
+unicode_codepoint(2) ->
+ integer(16#80, 16#7FF);
+unicode_codepoint(3) ->
+ union([integer(16#800, 16#D7FF), integer(16#E000, 16#FFFD)]);
+unicode_codepoint(4) ->
+ integer(16#10000, 16#10FFFF).
+
+%% @doc codepoint up to N bytes in utf8
+-spec unicode_codepoint_upto(1..4) -> proper_types:type().
+unicode_codepoint_upto(N) ->
+ union([unicode_codepoint(X) || X <- lists:seq(1, N)]).
+
+%% @doc utf8-encoded unbounded size binary.
+-spec utf8() -> proper_types:type().
+utf8() ->
+ utf8(inf, 4).
+
+%% @doc utf8-encoded bounded upper size binary.
+-spec utf8(nonnegextint()) -> proper_types:type().
+utf8(N) ->
+ utf8(N, 4).
+
+%% @doc Bounded upper size utf8 binary, `codepoint length =< MaxCodePointSize'.
+%%
+%% Limiting codepoint size can be useful when applications do not accept full
+%% unicode range. For example, MySQL in utf8 encoding accepts only 3-byte
+%% unicode codepoints in VARCHAR fields.
+%%
+%% If unbounded length is needed, use `inf' as first argument.
+-spec utf8(nonnegextint(), 1..4) -> proper_types:type().
+utf8(N, MaxCodePointSize) ->
+ ?LET(Str,
+ vector_upto(N, unicode_codepoint_upto(MaxCodePointSize)),
+ unicode:characters_to_binary(Str)
+ ).
+
+%% =============================================================================
+%% Helpers
+%% =============================================================================
+
+%% @doc List of no more than N elements
+vector_upto(N, What) ->
+ ?LET(X, integer(0, N), vector(X, What)).
View
23 test/proper_tests.erl
@@ -325,6 +325,14 @@ simple_types_with_data() ->
{float(0.0,0.0), [0.0], 0.0, [0.1,-0.1], none},
{non_neg_float(), [88.8,98.9,0.0], 0.0, [-12,1,-0.01], none},
{atom(), [elvis,'Another Atom',''], '', ["not_an_atom",12,12.2], "atom()"},
+ %{utf8(), [<<>>,<<0>>,<<127>>,<<197,161>>],<<>>,
+ % [<<128>>,<<16#C5BD:16>>,<<127,128>>], "utf8()"},
+ %{utf8(0), [<<>>], <<>>, [<<1>>], none},
+ %{utf8(1), [<<>>,<<127>>,<<197,161>>], <<>>,
+ % [<<0,127>>,<<127,128>>,<<223,191>>], none},
+ %{utf8(2), [<<197,161>>,<<127,197,161>>], <<>>, [<<0,0,0>>], none},
+ %{utf8(inf, 1), [<<>>,<<0>>,<<0,0>>,<<0,0,0>>], <<>>, [<<197,161>>], none},
+ %{utf8(inf, 2), [<<0,0,0>>,<<197,161>>], <<>>, [<<127,197,161>>], none},
{binary(), [<<>>,<<12,21>>], <<>>, [<<1,2:3>>,binary_atom,42], "binary()"},
{binary(), [], <<>>, [], "<<_:_*8>>"},
{binary(3), [<<41,42,43>>], <<0,0,0>>, [<<1,2,3,4>>], "<<_:24>>"},
@@ -485,7 +493,9 @@ impossible_types() ->
?SUCHTHAT(X, float(0.0,10.0), X < 0.0),
?SUCHTHAT(L, vector(12,integer()), length(L) =/= 12),
?SUCHTHAT(B, binary(), lists:member(256,binary_to_list(B))),
- ?SUCHTHAT(X, exactly('Lelouch'), X =:= 'vi Brittania')].
+ ?SUCHTHAT(X, exactly('Lelouch'), X =:= 'vi Brittania'),
+ ?SUCHTHAT(X, utf8(), unicode:characters_to_list(X) =:= [16#D800]),
+ ?SUCHTHAT(X, utf8(1, 1), size(X) > 1)].
impossible_native_types() ->
[{types_test1, ["1.1","no_such_module:type1()","no_such_type()"]},
@@ -772,7 +782,16 @@ native_type_props_test_() ->
is_float(X))),
?_shrinksTo(0, ?LETSHRINK([X],[my_native_type()],{'tag',X})),
?_passes(weird_types:prop_export_all_works()),
- ?_passes(weird_types:prop_no_auto_import_works())].
+ ?_passes(weird_types:prop_no_auto_import_works()),
+
+ ?_passes(?FORALL(B, utf8(), unicode:characters_to_binary(B) =:= B)),
+ ?_passes(?FORALL(B, utf8(1), length(unicode:characters_to_list(B)) =< 1)),
+ ?_passes(?FORALL(B, utf8(1, 1), size(B) =< 1)),
+ ?_passes(?FORALL(B, utf8(2, 1), size(B) =< 2)),
+ ?_passes(?FORALL(B, utf8(4), size(B) =< 16)),
+ ?_passes(?FORALL(B, utf8(),
+ length(unicode:characters_to_list(B)) =< size(B)))
+ ].
-type bin4() :: <<_:32>>.
-type bits42() :: <<_:42>>.
Something went wrong with that request. Please try again.