Skip to content

Commit

Permalink
[flang][runtime] Fix fixed-width field internal wide character input (#…
Browse files Browse the repository at this point in the history
…74683)

There was some confusion about units (bytes vs characters) in the
handling of the amount of input remaining in fixed-width formatted input
fields. Clarify that any variable or parameter counting "remaining"
space in a field in the I/O runtime is always in units of bytes, and
make it so where it wasn't.

Fixes the bug(s) in
llvm-test-suite/Fortran/gfortran/regression/char4_iunit_2.f03, although
the test still won't pass due to its dependence on gfortran's
list-directed output spacing.
  • Loading branch information
klausler committed Dec 11, 2023
1 parent 4f9cb79 commit 353d56d
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 51 deletions.
101 changes: 53 additions & 48 deletions flang/runtime/edit-input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,48 +916,52 @@ static bool EditListDirectedCharacterInput(
}

template <typename CHAR>
bool EditCharacterInput(
IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x,
std::size_t lengthChars) {
switch (edit.descriptor) {
case DataEdit::ListDirected:
return EditListDirectedCharacterInput(io, x, length, edit);
return EditListDirectedCharacterInput(io, x, lengthChars, edit);
case 'A':
case 'G':
break;
case 'B':
return EditBOZInput<1>(io, edit, x, length * sizeof *x);
return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
case 'O':
return EditBOZInput<3>(io, edit, x, length * sizeof *x);
return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
case 'Z':
return EditBOZInput<4>(io, edit, x, length * sizeof *x);
return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
default:
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
"Data edit descriptor '%c' may not be used with a CHARACTER data item",
edit.descriptor);
return false;
}
const ConnectionState &connection{io.GetConnectionState()};
std::size_t remaining{length};
std::size_t remainingChars{lengthChars};
// Skip leading characters.
// Their bytes don't count towards INQUIRE(IOLENGTH=).
std::size_t skipChars{0};
if (edit.width && *edit.width > 0) {
remaining = *edit.width;
remainingChars = *edit.width;
if (remainingChars > lengthChars) {
skipChars = remainingChars - lengthChars;
}
}
// When the field is wider than the variable, we drop the leading
// characters. When the variable is wider than the field, there can be
// trailing padding or an EOR condition.
const char *input{nullptr};
std::size_t ready{0};
// Skip leading bytes.
// These bytes don't count towards INQUIRE(IOLENGTH=).
std::size_t skip{remaining > length ? remaining - length : 0};
std::size_t readyBytes{0};
// Transfer payload bytes; these do count.
while (remaining > 0) {
if (ready == 0) {
ready = io.GetNextInputBytes(input);
if (ready == 0 || (ready < remaining && edit.modes.nonAdvancing)) {
if (io.CheckForEndOfRecord(ready)) {
if (ready == 0) {
while (remainingChars > 0) {
if (readyBytes == 0) {
readyBytes = io.GetNextInputBytes(input);
if (readyBytes == 0 ||
(readyBytes < remainingChars && edit.modes.nonAdvancing)) {
if (io.CheckForEndOfRecord(readyBytes)) {
if (readyBytes == 0) {
// PAD='YES' and no more data
std::fill_n(x, length, ' ');
std::fill_n(x, lengthChars, ' ');
return !io.GetIoErrorHandler().InError();
} else {
// Do partial read(s) then pad on last iteration
Expand All @@ -967,63 +971,64 @@ bool EditCharacterInput(
}
}
}
std::size_t chunk;
bool skipping{skip > 0};
std::size_t chunkBytes;
std::size_t chunkChars{1};
bool skipping{skipChars > 0};
if (connection.isUTF8) {
chunk = MeasureUTF8Bytes(*input);
chunkBytes = MeasureUTF8Bytes(*input);
if (skipping) {
--skip;
--skipChars;
} else if (auto ucs{DecodeUTF8(input)}) {
*x++ = *ucs;
--length;
} else if (chunk == 0) {
--lengthChars;
} else if (chunkBytes == 0) {
// error recovery: skip bad encoding
chunk = 1;
chunkBytes = 1;
}
--remaining;
} else if (connection.internalIoCharKind > 1) {
// Reading from non-default character internal unit
chunk = connection.internalIoCharKind;
chunkBytes = connection.internalIoCharKind;
if (skipping) {
--skip;
--skipChars;
} else {
char32_t buffer{0};
std::memcpy(&buffer, input, chunk);
std::memcpy(&buffer, input, chunkBytes);
*x++ = buffer;
--length;
--lengthChars;
}
--remaining;
} else if constexpr (sizeof *x > 1) {
// Read single byte with expansion into multi-byte CHARACTER
chunk = 1;
chunkBytes = 1;
if (skipping) {
--skip;
--skipChars;
} else {
*x++ = static_cast<unsigned char>(*input);
--length;
--lengthChars;
}
--remaining;
} else { // single bytes -> default CHARACTER
if (skipping) {
chunk = std::min<std::size_t>(skip, ready);
skip -= chunk;
chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
chunkChars = chunkBytes;
skipChars -= chunkChars;
} else {
chunk = std::min<std::size_t>(remaining, ready);
std::memcpy(x, input, chunk);
x += chunk;
length -= chunk;
chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
chunkChars = chunkBytes;
std::memcpy(x, input, chunkBytes);
x += chunkBytes;
lengthChars -= chunkChars;
}
remaining -= chunk;
}
input += chunk;
input += chunkBytes;
remainingChars -= chunkChars;
if (!skipping) {
io.GotChar(chunk);
io.GotChar(chunkBytes);
}
io.HandleRelativePosition(chunk);
ready -= chunk;
io.HandleRelativePosition(chunkBytes);
readyBytes -= chunkBytes;
}
// Pad the remainder of the input variable, if any.
std::fill_n(x, length, ' ');
std::fill_n(x, lengthChars, ' ');
return CheckCompleteListDirectedField(io, edit);
}

Expand Down
14 changes: 11 additions & 3 deletions flang/runtime/io-stmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ class IoStatementState {
std::size_t GetNextInputBytes(const char *&);
bool AdvanceRecord(int = 1);
void BackspaceRecord();
void HandleRelativePosition(std::int64_t);
void HandleAbsolutePosition(std::int64_t); // for r* in list I/O
void HandleRelativePosition(std::int64_t byteOffset);
void HandleAbsolutePosition(std::int64_t byteOffset); // for r* in list I/O
std::optional<DataEdit> GetNextDataEdit(int maxRepeat = 1);
ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit
bool BeginReadingRecord();
Expand Down Expand Up @@ -124,7 +124,11 @@ class IoStatementState {
// Vacant after the end of the current record
std::optional<char32_t> GetCurrentChar(std::size_t &byteCount);

// For fixed-width fields, return the number of remaining characters.
// The "remaining" arguments to CueUpInput(), SkipSpaces(), & NextInField()
// are always in units of bytes, not characters; the distinction matters
// for internal input from CHARACTER(KIND=2 and 4).

// For fixed-width fields, return the number of remaining bytes.
// Skip over leading blanks.
std::optional<int> CueUpInput(const DataEdit &edit) {
std::optional<int> remaining;
Expand All @@ -134,6 +138,10 @@ class IoStatementState {
} else {
if (edit.width.value_or(0) > 0) {
remaining = *edit.width;
if (int bytesPerChar{GetConnectionState().internalIoCharKind};
bytesPerChar > 1) {
*remaining *= bytesPerChar;
}
}
SkipSpaces(remaining);
}
Expand Down

0 comments on commit 353d56d

Please sign in to comment.