/
float_converter.cpp
255 lines (219 loc) · 7.65 KB
/
float_converter.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
//===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/stdio/scanf_core/float_converter.h"
#include "src/__support/CPP/limits.h"
#include "src/__support/char_vector.h"
#include "src/__support/ctype_utils.h"
#include "src/__support/str_to_float.h"
#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/reader.h"
#include <stddef.h>
namespace __llvm_libc {
namespace scanf_core {
constexpr char inline to_lower(char a) { return a | 32; }
void write_with_length(char *str, const FormatSection &to_conv) {
if ((to_conv.flags & NO_WRITE) != 0) {
return;
}
void *output_ptr = to_conv.output_ptr;
LengthModifier lm = to_conv.length_modifier;
switch (lm) {
case (LengthModifier::l): {
auto value = internal::strtofloatingpoint<double>(str, nullptr);
*reinterpret_cast<double *>(output_ptr) = value;
break;
}
case (LengthModifier::L): {
auto value = internal::strtofloatingpoint<long double>(str, nullptr);
*reinterpret_cast<long double *>(output_ptr) = value;
break;
}
default: {
auto value = internal::strtofloatingpoint<float>(str, nullptr);
*reinterpret_cast<float *>(output_ptr) = value;
break;
}
}
}
// All of the floating point conversions are the same for scanf, every name will
// accept every style.
int convert_float(Reader *reader, const FormatSection &to_conv) {
// %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number,
// infinity, or NaN, whose format is the same as expected for the subject
// sequence of the strtod function. The corresponding argument shall be a
// pointer to floating."
CharVector out_str = CharVector();
bool is_number = false;
size_t max_width = cpp::numeric_limits<size_t>::max();
if (to_conv.max_width > 0) {
max_width = to_conv.max_width;
}
char cur_char = reader->getc();
// Handle the sign.
if (cur_char == '+' || cur_char == '-') {
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
}
if (out_str.length() == max_width) {
return MATCHING_FAILURE;
} else {
cur_char = reader->getc();
}
}
static constexpr char DECIMAL_POINT = '.';
static const char inf_string[] = "infinity";
// Handle inf
if (to_lower(cur_char) == inf_string[0]) {
size_t inf_index = 0;
for (; to_lower(cur_char) == inf_string[inf_index] &&
inf_index < sizeof(inf_string) && out_str.length() < max_width;
++inf_index) {
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
}
cur_char = reader->getc();
}
if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) {
write_with_length(out_str.c_str(), to_conv);
return READ_OK;
} else {
return MATCHING_FAILURE;
}
}
static const char nan_string[] = "nan";
// Handle nan
if (to_lower(cur_char) == nan_string[0]) {
size_t nan_index = 0;
for (; to_lower(cur_char) == nan_string[nan_index] &&
nan_index < sizeof(nan_string) && out_str.length() < max_width;
++nan_index) {
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
}
cur_char = reader->getc();
}
if (nan_index == sizeof(nan_string) - 1) {
write_with_length(out_str.c_str(), to_conv);
return READ_OK;
} else {
return MATCHING_FAILURE;
}
}
// Assume base of 10 by default but check if it is actually base 16.
int base = 10;
// If the string starts with 0 it might be in hex.
if (cur_char == '0') {
is_number = true;
// Read the next character to check.
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
}
// If we've hit the end, then this is "0", which is valid.
if (out_str.length() == max_width) {
write_with_length(out_str.c_str(), to_conv);
return READ_OK;
} else {
cur_char = reader->getc();
}
// If that next character is an 'x' then this is a hexadecimal number.
if (to_lower(cur_char) == 'x') {
base = 16;
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
}
// If we've hit the end here, we have "0x" which is a valid prefix to a
// floating point number, and will be evaluated to 0.
if (out_str.length() == max_width) {
write_with_length(out_str.c_str(), to_conv);
return READ_OK;
} else {
cur_char = reader->getc();
}
}
}
const char exponent_mark = ((base == 10) ? 'e' : 'p');
bool after_decimal = false;
// The format for the remaining characters at this point is DD.DDe+/-DD for
// base 10 and XX.XXp+/-DD for base 16
// This handles the digits before and after the decimal point, but not the
// exponent.
while (out_str.length() < max_width) {
if (internal::isalnum(cur_char) &&
internal::b36_char_to_int(cur_char) < base) {
is_number = true;
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
}
cur_char = reader->getc();
} else if (cur_char == DECIMAL_POINT && !after_decimal) {
after_decimal = true;
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
}
cur_char = reader->getc();
} else {
break;
}
}
// Handle the exponent, which has an exponent mark, an optional sign, and
// decimal digits.
if (to_lower(cur_char) == exponent_mark) {
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
}
if (out_str.length() == max_width) {
// This is laid out in the standard as being a matching error (100e is not
// a valid float) but may conflict with existing implementations.
return MATCHING_FAILURE;
} else {
cur_char = reader->getc();
}
if (cur_char == '+' || cur_char == '-') {
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
}
if (out_str.length() == max_width) {
return MATCHING_FAILURE;
} else {
cur_char = reader->getc();
}
}
// It is specified by the standard that "100er" is a matching failure since
// the longest prefix of a possibly valid floating-point number (which is
// "100e") is not a valid floating-point number. If there is an exponent
// mark then there must be a digit after it else the number is not valid.
// Some implementations will roll back two characters (to just "100") and
// accept that since the prefix is not valid, and some will interpret an
// exponent mark followed by no digits as an additional exponent of 0
// (accepting "100e" and returning 100.0). Both of these behaviors are wrong
// by the standard, but they may be used in real code, see Hyrum's law. This
// code follows the standard, but may be incompatible due to code expecting
// these bugs.
if (!internal::isdigit(cur_char)) {
return MATCHING_FAILURE;
}
while (internal::isdigit(cur_char) && out_str.length() < max_width) {
if (!out_str.append(cur_char)) {
return ALLOCATION_FAILURE;
}
cur_char = reader->getc();
}
}
// We always read one more character than will be used, so we have to put the
// last one back.
reader->ungetc(cur_char);
// If we haven't actually found any digits, this is a matching failure (this
// catches cases like "+.")
if (!is_number) {
return MATCHING_FAILURE;
}
write_with_length(out_str.c_str(), to_conv);
return READ_OK;
}
} // namespace scanf_core
} // namespace __llvm_libc