Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 259 lines (230 sloc) 9.349 kb
528507c @nox Add multipart support
nox authored
1 %% Copyright (c) 2011, Anthony Ramine <nox@dev-extend.eu>
2 %%
3 %% Permission to use, copy, modify, and/or distribute this software for any
4 %% purpose with or without fee is hereby granted, provided that the above
5 %% copyright notice and this permission notice appear in all copies.
6 %%
7 %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15 %% @doc Multipart parser.
16 -module(cowboy_multipart).
17
a5e7521 @essen Have only one -export and -export_type per line
essen authored
18 -export([parser/1]).
19 -export([content_disposition/1]).
20
528507c @nox Add multipart support
nox authored
21 -type part_parser() :: parser(more(part_result())).
22 -type parser(T) :: fun((binary()) -> T).
23 -type more(T) :: T | {more, parser(T)}.
24 -type part_result() :: headers() | eof.
25 -type headers() :: {headers, http_headers(), body_cont()}.
e27fd5f @essen Make multipart part headers binary lowercase
essen authored
26 -type http_headers() :: [{binary(), binary()}].
528507c @nox Add multipart support
nox authored
27 -type body_cont() :: cont(more(body_result())).
28 -type cont(T) :: fun(() -> T).
29 -type body_result() :: {body, binary(), body_cont()} | end_of_part().
30 -type end_of_part() :: {end_of_part, cont(more(part_result()))}.
31 -type disposition() :: {binary(), [{binary(), binary()}]}.
32
13b743b @essen Include the eunit file only if TEST is defined
essen authored
33 -ifdef(TEST).
528507c @nox Add multipart support
nox authored
34 -include_lib("eunit/include/eunit.hrl").
13b743b @essen Include the eunit file only if TEST is defined
essen authored
35 -endif.
528507c @nox Add multipart support
nox authored
36
37 %% API.
38
39 %% @doc Return a multipart parser for the given boundary.
40 -spec parser(binary()) -> part_parser().
41 parser(Boundary) when is_binary(Boundary) ->
42 fun (Bin) when is_binary(Bin) -> parse(Bin, Boundary) end.
43
44 %% @doc Parse a content disposition.
45 %% @todo Parse the MIME header instead of the HTTP one.
46 -spec content_disposition(binary()) -> disposition().
47 content_disposition(Data) ->
48 cowboy_http:token_ci(Data,
49 fun (_Rest, <<>>) -> {error, badarg};
50 (Rest, Disposition) ->
f51493e Add 'Expect' header parsing
Loïc Hoguin authored
51 cowboy_http:params(Rest,
52 fun (<<>>, Params) -> {Disposition, Params};
53 (_Rest2, _) -> {error, badarg}
54 end)
528507c @nox Add multipart support
nox authored
55 end).
56
57 %% Internal.
58
59 %% @doc Entry point of the multipart parser, skips over the preamble if any.
60 -spec parse(binary(), binary()) -> more(part_result()).
61 parse(Bin, Boundary) when byte_size(Bin) >= byte_size(Boundary) + 2 ->
62 BoundarySize = byte_size(Boundary),
63 Pattern = pattern(Boundary),
64 case Bin of
65 <<"--", Boundary:BoundarySize/binary, Rest/binary>> ->
66 % Data starts with initial boundary, skip preamble parsing.
67 parse_boundary_tail(Rest, Pattern);
68 _ ->
69 % Parse preamble.
70 skip(Bin, Pattern)
71 end;
72 parse(Bin, Boundary) ->
73 % Not enough data to know if the data begins with a boundary.
74 more(Bin, fun (NewBin) -> parse(NewBin, Boundary) end).
75
76 -type pattern() :: {binary:cp(), non_neg_integer()}.
77
78 %% @doc Return a compiled binary pattern with its size in bytes.
79 %% The pattern is the boundary prepended with "\r\n--".
80 -spec pattern(binary()) -> pattern().
81 pattern(Boundary) ->
82 MatchPattern = <<"\r\n--", Boundary/binary>>,
83 {binary:compile_pattern(MatchPattern), byte_size(MatchPattern)}.
84
85 %% @doc Parse remaining characters of a line beginning with the boundary.
86 %% If followed by "--", <em>eof</em> is returned and parsing is finished.
87 -spec parse_boundary_tail(binary(), pattern()) -> more(part_result()).
88 parse_boundary_tail(Bin, Pattern) when byte_size(Bin) >= 2 ->
89 case Bin of
90 <<"--", _Rest/binary>> ->
91 % Boundary is followed by "--", end parsing.
92 eof;
93 _ ->
94 % No dash after boundary, proceed with unknown chars and lwsp
95 % removal.
96 parse_boundary_eol(Bin, Pattern)
97 end;
98 parse_boundary_tail(Bin, Pattern) ->
99 % Boundary may be followed by "--", need more data.
100 more(Bin, fun (NewBin) -> parse_boundary_tail(NewBin, Pattern) end).
101
102 %% @doc Skip whitespace and unknown chars until CRLF.
103 -spec parse_boundary_eol(binary(), pattern()) -> more(part_result()).
104 parse_boundary_eol(Bin, Pattern) ->
105 case binary:match(Bin, <<"\r\n">>) of
106 {CrlfStart, _Length} ->
107 % End of line found, remove optional whitespace.
108 <<_:CrlfStart/binary, Rest/binary>> = Bin,
109 Fun = fun (Rest2) -> parse_boundary_crlf(Rest2, Pattern) end,
110 cowboy_http:whitespace(Rest, Fun);
111 nomatch ->
112 % CRLF not found in the given binary.
113 RestStart = max(byte_size(Bin) - 1, 0),
114 <<_:RestStart/binary, Rest/binary>> = Bin,
115 more(Rest, fun (NewBin) -> parse_boundary_eol(NewBin, Pattern) end)
116 end.
117
118 -spec parse_boundary_crlf(binary(), pattern()) -> more(part_result()).
119 parse_boundary_crlf(<<"\r\n", Rest/binary>>, Pattern) ->
120 % The binary is at least 2 bytes long as this function is only called by
121 % parse_boundary_eol/3 when CRLF has been found so a more tuple will never
122 % be returned from here.
123 parse_headers(Rest, Pattern);
124 parse_boundary_crlf(Bin, Pattern) ->
125 % Unspecified behaviour here: RFC 2046 doesn't say what to do when LWSP is
126 % not followed directly by a new line. In this implementation it is
127 % considered part of the boundary so EOL needs to be searched again.
128 parse_boundary_eol(Bin, Pattern).
129
130 -spec parse_headers(binary(), pattern()) -> more(part_result()).
131 parse_headers(Bin, Pattern) ->
132 parse_headers(Bin, Pattern, []).
133
134 -spec parse_headers(binary(), pattern(), http_headers()) -> more(part_result()).
135 parse_headers(Bin, Pattern, Acc) ->
136 case erlang:decode_packet(httph_bin, Bin, []) of
137 {ok, {http_header, _, Name, _, Value}, Rest} ->
e27fd5f @essen Make multipart part headers binary lowercase
essen authored
138 Name2 = case is_atom(Name) of
139 true -> cowboy_bstr:to_lower(atom_to_binary(Name, latin1));
140 false -> cowboy_bstr:to_lower(Name)
141 end,
142 parse_headers(Rest, Pattern, [{Name2, Value} | Acc]);
528507c @nox Add multipart support
nox authored
143 {ok, http_eoh, Rest} ->
144 Headers = lists:reverse(Acc),
145 {headers, Headers, fun () -> parse_body(Rest, Pattern) end};
146 {ok, {http_error, _}, _} ->
147 % Skip malformed parts.
148 skip(Bin, Pattern);
149 {more, _} ->
150 more(Bin, fun (NewBin) -> parse_headers(NewBin, Pattern, Acc) end)
151 end.
152
153 -spec parse_body(binary(), pattern()) -> more(body_result()).
154 parse_body(Bin, Pattern = {P, PSize}) when byte_size(Bin) >= PSize ->
155 case binary:match(Bin, P) of
156 {0, _Length} ->
157 <<_:PSize/binary, Rest/binary>> = Bin,
158 end_of_part(Rest, Pattern);
159 {BoundaryStart, _Length} ->
160 % Boundary found, this is the latest partial body that will be
161 % returned for this part.
162 <<PBody:BoundaryStart/binary, _:PSize/binary, Rest/binary>> = Bin,
163 FResult = end_of_part(Rest, Pattern),
164 {body, PBody, fun () -> FResult end};
165 nomatch ->
166 PartialLength = byte_size(Bin) - PSize + 1,
167 <<PBody:PartialLength/binary, Rest/binary>> = Bin,
168 {body, PBody, fun () -> parse_body(Rest, Pattern) end}
169 end;
170 parse_body(Bin, Pattern) ->
171 more(Bin, fun (NewBin) -> parse_body(NewBin, Pattern) end).
172
173 -spec end_of_part(binary(), pattern()) -> end_of_part().
174 end_of_part(Bin, Pattern) ->
175 {end_of_part, fun () -> parse_boundary_tail(Bin, Pattern) end}.
176
177 -spec skip(binary(), pattern()) -> more(part_result()).
178 skip(Bin, Pattern = {P, PSize}) ->
179 case binary:match(Bin, P) of
180 {BoundaryStart, _Length} ->
181 % Boundary found, proceed with parsing of the next part.
182 RestStart = BoundaryStart + PSize,
183 <<_:RestStart/binary, Rest/binary>> = Bin,
184 parse_boundary_tail(Rest, Pattern);
185 nomatch ->
186 % Boundary not found, need more data.
187 RestStart = max(byte_size(Bin) - PSize + 1, 0),
188 <<_:RestStart/binary, Rest/binary>> = Bin,
189 more(Rest, fun (NewBin) -> skip(NewBin, Pattern) end)
190 end.
191
192 -spec more(binary(), parser(T)) -> {more, parser(T)}.
193 more(<<>>, F) ->
194 {more, F};
195 more(Bin, InnerF) ->
196 F = fun (NewData) when is_binary(NewData) ->
197 InnerF(<<Bin/binary, NewData/binary>>)
198 end,
199 {more, F}.
200
201 %% Tests.
202
203 -ifdef(TEST).
204
205 multipart_test_() ->
206 %% {Body, Result}
207 Tests = [
208 {<<"--boundary--">>, []},
209 {<<"preamble\r\n--boundary--">>, []},
210 {<<"--boundary--\r\nepilogue">>, []},
211 {<<"\r\n--boundary\r\nA:b\r\nC:d\r\n\r\n\r\n--boundary--">>,
e27fd5f @essen Make multipart part headers binary lowercase
essen authored
212 [{[{<<"a">>, <<"b">>}, {<<"c">>, <<"d">>}], <<>>}]},
528507c @nox Add multipart support
nox authored
213 {
214 <<
215 "--boundary\r\nX-Name:answer\r\n\r\n42"
216 "\r\n--boundary\r\nServer:Cowboy\r\n\r\nIt rocks!\r\n"
217 "\r\n--boundary--"
218 >>,
219 [
e27fd5f @essen Make multipart part headers binary lowercase
essen authored
220 {[{<<"x-name">>, <<"answer">>}], <<"42">>},
221 {[{<<"server">>, <<"Cowboy">>}], <<"It rocks!\r\n">>}
528507c @nox Add multipart support
nox authored
222 ]
223 }
224 ],
225 [{title(V), fun () -> R = acc_multipart(V) end} || {V, R} <- Tests].
226
227 acc_multipart(V) ->
228 acc_multipart((parser(<<"boundary">>))(V), []).
229
230 acc_multipart({headers, Headers, Cont}, Acc) ->
231 acc_multipart(Cont(), [{Headers, []}|Acc]);
232 acc_multipart({body, Body, Cont}, [{Headers, BodyAcc}|Acc]) ->
233 acc_multipart(Cont(), [{Headers, [Body|BodyAcc]}|Acc]);
234 acc_multipart({end_of_part, Cont}, [{Headers, BodyAcc}|Acc]) ->
235 Body = list_to_binary(lists:reverse(BodyAcc)),
236 acc_multipart(Cont(), [{Headers, Body}|Acc]);
237 acc_multipart(eof, Acc) ->
238 lists:reverse(Acc).
239
240 content_disposition_test_() ->
241 %% {Disposition, Result}
242 Tests = [
243 {<<"form-data; name=id">>, {<<"form-data">>, [{<<"name">>, <<"id">>}]}},
244 {<<"inline">>, {<<"inline">>, []}},
245 {<<"attachment; \tfilename=brackets-slides.pdf">>,
246 {<<"attachment">>, [{<<"filename">>, <<"brackets-slides.pdf">>}]}}
247 ],
248 [{title(V), fun () -> R = content_disposition(V) end} || {V, R} <- Tests].
249
250 title(Bin) ->
251 Title = lists:foldl(
252 fun ({T, R}, V) -> re:replace(V, T, R, [global]) end,
253 Bin,
254 [{"\t", "\\\\t"}, {"\r", "\\\\r"}, {"\n", "\\\\n"}]
255 ),
256 iolist_to_binary(Title).
257
258 -endif.
Something went wrong with that request. Please try again.