Skip to content

Commit 8d3399b

Browse files
author
John R Rose
committed
8292758: put support for UNSIGNED5 format into its own header file
Reviewed-by: dlong, coleenp
1 parent 6677227 commit 8d3399b

File tree

9 files changed

+1129
-157
lines changed

9 files changed

+1129
-157
lines changed

src/hotspot/share/code/compressedStream.cpp

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,36 +27,45 @@
2727
#include "utilities/ostream.hpp"
2828
#include "utilities/moveBits.hpp"
2929

30-
// 32-bit self-inverse encoding of float bits
31-
// converts trailing zeroes (common in floats) to leading zeroes
32-
inline juint CompressedStream::reverse_int(juint i) {
33-
return reverse_bits(i);
34-
}
35-
3630
jint CompressedReadStream::read_signed_int() {
37-
return decode_sign(read_int());
31+
return UNSIGNED5::decode_sign(read_int());
3832
}
3933

4034
// Compressing floats is simple, because the only common pattern
4135
// is trailing zeroes. (Compare leading sign bits on ints.)
4236
// Since floats are left-justified, as opposed to right-justified
4337
// ints, we can bit-reverse them in order to take advantage of int
44-
// compression.
45-
38+
// compression. Since bit reversal converts trailing zeroes to
39+
// leading zeroes, effect is better compression of those common
40+
// 32-bit float values, such as integers or integers divided by
41+
// powers of two, that have many trailing zeroes.
4642
jfloat CompressedReadStream::read_float() {
4743
int rf = read_int();
48-
int f = reverse_int(rf);
44+
int f = reverse_bits(rf);
4945
return jfloat_cast(f);
5046
}
5147

48+
// The treatment of doubles is similar. We could bit-reverse each
49+
// entire 64-bit word, but it is almost as effective to bit-reverse
50+
// the individual halves. Since we are going to encode them
51+
// separately as 32-bit halves anyway, it seems slightly simpler
52+
// to reverse after splitting, and when reading reverse each
53+
// half before joining them together.
5254
jdouble CompressedReadStream::read_double() {
5355
jint rh = read_int();
5456
jint rl = read_int();
55-
jint h = reverse_int(rh);
56-
jint l = reverse_int(rl);
57+
jint h = reverse_bits(rh);
58+
jint l = reverse_bits(rl);
5759
return jdouble_cast(jlong_from(h, l));
5860
}
5961

62+
// A 64-bit long is encoded into distinct 32-bit halves. This saves
63+
// us from having to define a 64-bit encoding and is almost as
64+
// effective. A modified LEB128 could encode longs into 9 bytes, and
65+
// this technique maxes out at 10 bytes, so, if we didn't mind the
66+
// extra complexity of another coding system, we could process 64-bit
67+
// values as single units. But, the complexity does not seem
68+
// worthwhile.
6069
jlong CompressedReadStream::read_long() {
6170
jint low = read_signed_int();
6271
jint high = read_signed_int();
@@ -70,26 +79,31 @@ CompressedWriteStream::CompressedWriteStream(int initial_size) : CompressedStrea
7079
}
7180

7281
void CompressedWriteStream::grow() {
73-
u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, _size * 2);
82+
int nsize = _size * 2;
83+
const int min_expansion = UNSIGNED5::MAX_LENGTH;
84+
if (nsize < min_expansion*2) {
85+
nsize = min_expansion*2;
86+
}
87+
u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, nsize);
7488
memcpy(_new_buffer, _buffer, _position);
7589
_buffer = _new_buffer;
76-
_size = _size * 2;
90+
_size = nsize;
7791
}
7892

7993
void CompressedWriteStream::write_float(jfloat value) {
8094
juint f = jint_cast(value);
81-
juint rf = reverse_int(f);
82-
assert(f == reverse_int(rf), "can re-read same bits");
95+
juint rf = reverse_bits(f);
96+
assert(f == reverse_bits(rf), "can re-read same bits");
8397
write_int(rf);
8498
}
8599

86100
void CompressedWriteStream::write_double(jdouble value) {
87101
juint h = high(jlong_cast(value));
88102
juint l = low( jlong_cast(value));
89-
juint rh = reverse_int(h);
90-
juint rl = reverse_int(l);
91-
assert(h == reverse_int(rh), "can re-read same bits");
92-
assert(l == reverse_int(rl), "can re-read same bits");
103+
juint rh = reverse_bits(h);
104+
juint rl = reverse_bits(l);
105+
assert(h == reverse_bits(rh), "can re-read same bits");
106+
assert(l == reverse_bits(rl), "can re-read same bits");
93107
write_int(rh);
94108
write_int(rl);
95109
}

src/hotspot/share/code/compressedStream.hpp

Lines changed: 15 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#define SHARE_CODE_COMPRESSEDSTREAM_HPP
2727

2828
#include "memory/allocation.hpp"
29+
#include "utilities/unsigned5.hpp"
2930

3031
// Simple interface for filing out and filing in basic types
3132
// Used for writing out and reading in debugging information.
@@ -36,18 +37,6 @@ class CompressedStream : public ResourceObj {
3637
u_char* _buffer;
3738
int _position;
3839

39-
enum {
40-
// Constants for UNSIGNED5 coding of Pack200
41-
lg_H = 6, H = 1<<lg_H, // number of high codes (64)
42-
L = (1<<BitsPerByte)-H, // number of low codes (192)
43-
MAX_i = 4 // bytes are numbered in (0..4), max 5 bytes
44-
};
45-
46-
// 32-bit one-to-one sign encoding taken from Pack200
47-
// converts leading sign bits into leading zeroes with trailing sign bit
48-
static juint encode_sign(jint value) { return (value << 1) ^ (value >> 31); }
49-
static jint decode_sign(juint value) { return (value >> 1) ^ -(jint)(value & 1); }
50-
static juint reverse_int(juint i); // to trim trailing float 0's
5140
public:
5241
CompressedStream(u_char* buffer, int position = 0) {
5342
_buffer = buffer;
@@ -66,41 +55,6 @@ class CompressedReadStream : public CompressedStream {
6655
private:
6756
inline u_char read() { return _buffer[_position++]; }
6857

69-
// This encoding, called UNSIGNED5, is taken from J2SE Pack200.
70-
// It assumes that most values have lots of leading zeroes.
71-
// Very small values, in the range [0..191], code in one byte.
72-
// Any 32-bit value (including negatives) can be coded, in
73-
// up to five bytes. The grammar is:
74-
// low_byte = [0..191]
75-
// high_byte = [192..255]
76-
// any_byte = low_byte | high_byte
77-
// coding = low_byte
78-
// | high_byte low_byte
79-
// | high_byte high_byte low_byte
80-
// | high_byte high_byte high_byte low_byte
81-
// | high_byte high_byte high_byte high_byte any_byte
82-
// Each high_byte contributes six bits of payload.
83-
// The encoding is one-to-one (except for integer overflow)
84-
// and easy to parse and unparse.
85-
86-
jint read_int_mb(jint b0) {
87-
int pos = position() - 1;
88-
u_char* buf = buffer() + pos;
89-
assert(buf[0] == b0 && b0 >= L, "correctly called");
90-
jint sum = b0;
91-
// must collect more bytes: b[1]...b[4]
92-
int lg_H_i = lg_H;
93-
for (int i = 0; ; ) {
94-
jint b_i = buf[++i]; // b_i = read(); ++i;
95-
sum += b_i << lg_H_i; // sum += b[i]*(64**i)
96-
if (b_i < L || i == MAX_i) {
97-
set_position(pos+i+1);
98-
return sum;
99-
}
100-
lg_H_i += lg_H;
101-
}
102-
}
103-
10458
public:
10559
CompressedReadStream(u_char* buffer, int position = 0)
10660
: CompressedStream(buffer, position) {}
@@ -109,14 +63,14 @@ class CompressedReadStream : public CompressedStream {
10963
jbyte read_byte() { return (jbyte ) read(); }
11064
jchar read_char() { return (jchar ) read_int(); }
11165
jshort read_short() { return (jshort ) read_signed_int(); }
112-
jint read_int() { jint b0 = read();
113-
if (b0 < L) return b0;
114-
else return read_int_mb(b0);
115-
}
11666
jint read_signed_int();
117-
jfloat read_float(); // jfloat_cast(reverse_int(read_int()))
118-
jdouble read_double(); // jdouble_cast(2*reverse_int(read_int))
67+
jfloat read_float(); // jfloat_cast(reverse_bits(read_int()))
68+
jdouble read_double(); // jdouble_cast(2*reverse_bits(read_int))
11969
jlong read_long(); // jlong_from(2*read_signed_int())
70+
71+
jint read_int() {
72+
return UNSIGNED5::read_uint(_buffer, _position, 0);
73+
}
12074
};
12175

12276

@@ -134,23 +88,6 @@ class CompressedWriteStream : public CompressedStream {
13488
}
13589
void grow();
13690

137-
// UNSIGNED5 coding, 1-5 byte cases
138-
void write_int_mb(jint value) {
139-
juint sum = value;
140-
for (int i = 0; ; ) {
141-
if (sum < L || i == MAX_i) {
142-
// remainder is either a "low code" or the 5th byte
143-
assert(sum == (u_char)sum, "valid byte");
144-
write((u_char)sum);
145-
break;
146-
}
147-
sum -= L;
148-
int b_i = L + (sum % H); // this is a "high code"
149-
sum >>= lg_H; // extracted 6 bits
150-
write(b_i); ++i;
151-
}
152-
}
153-
15491
protected:
15592
int _size;
15693

@@ -163,13 +100,15 @@ class CompressedWriteStream : public CompressedStream {
163100
void write_byte(jbyte value) { write(value); }
164101
void write_char(jchar value) { write_int(value); }
165102
void write_short(jshort value) { write_signed_int(value); }
166-
void write_int(jint value) { if ((juint)value < L && !full())
167-
store((u_char)value);
168-
else write_int_mb(value); }
169-
void write_signed_int(jint value) { write_int(encode_sign(value)); }
170-
void write_float(jfloat value); // write_int(reverse_int(jint_cast(v)))
171-
void write_double(jdouble value); // write_int(reverse_int(<low,high>))
103+
void write_signed_int(jint value) { write_int(UNSIGNED5::encode_sign(value)); }
104+
void write_float(jfloat value); // write_int(reverse_bits(jint_cast(v)))
105+
void write_double(jdouble value); // write_int(reverse_bits(<low,high>))
172106
void write_long(jlong value); // write_signed_int(<low,high>)
107+
108+
void write_int(juint value) {
109+
UNSIGNED5::write_uint_grow(value, _buffer, _position, _size,
110+
[&](int){ grow(); });
111+
}
173112
};
174113

175114
#endif // SHARE_CODE_COMPRESSEDSTREAM_HPP

src/hotspot/share/utilities/debug.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#include "utilities/formatBuffer.hpp"
6262
#include "utilities/globalDefinitions.hpp"
6363
#include "utilities/macros.hpp"
64+
#include "utilities/unsigned5.hpp"
6465
#include "utilities/vmError.hpp"
6566

6667
#include <stdio.h>
@@ -648,6 +649,37 @@ extern "C" JNIEXPORT void findbcp(intptr_t method, intptr_t bcp) {
648649
}
649650
}
650651

652+
// check and decode a single u5 value
653+
extern "C" JNIEXPORT u4 u5decode(intptr_t addr) {
654+
Command c("u5decode");
655+
u1* arr = (u1*)addr;
656+
size_t off = 0, lim = 5;
657+
if (!UNSIGNED5::check_length(arr, off, lim)) {
658+
return 0;
659+
}
660+
return UNSIGNED5::read_uint(arr, off, lim);
661+
}
662+
663+
// Sets up a Reader from addr/limit and prints count items.
664+
// A limit of zero means no set limit; stop at the first null
665+
// or after count items are printed.
666+
// A count of zero or less is converted to -1, which means
667+
// there is no limit on the count of items printed; the
668+
// printing stops when an null is printed or at limit.
669+
// See documentation for UNSIGNED5::Reader::print(count).
670+
extern "C" JNIEXPORT intptr_t u5p(intptr_t addr,
671+
intptr_t limit,
672+
int count) {
673+
Command c("u5p");
674+
u1* arr = (u1*)addr;
675+
if (limit && limit < addr) limit = addr;
676+
size_t lim = !limit ? 0 : (limit - addr);
677+
size_t endpos = UNSIGNED5::print_count(count > 0 ? count : -1,
678+
arr, (size_t)0, lim);
679+
return addr + endpos;
680+
}
681+
682+
651683
// int versions of all methods to avoid having to type type casts in the debugger
652684

653685
void pp(intptr_t p) { pp((void*)p); }
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#include "precompiled.hpp"
26+
#include "memory/allocation.hpp"
27+
#include "utilities/unsigned5.hpp"
28+
29+
// Most of UNSIGNED5 is in the header file.
30+
// Let's put a few debug functions out-of-line here.
31+
32+
// For the record, UNSIGNED5 was defined around 2001 and was first
33+
// published in the initial Pack200 spec. See:
34+
// https://docs.oracle.com/en/java/javase/11/docs/specs/pack-spec.html
35+
// in Section 6.1, "Encoding of Small Whole Numbers".
36+
37+
PRAGMA_DIAG_PUSH
38+
PRAGMA_FORMAT_NONLITERAL_IGNORED
39+
40+
// For debugging, even in product builds (see debug.cpp).
41+
template<typename ARR, typename OFF, typename GET>
42+
void UNSIGNED5::Reader<ARR,OFF,GET>::
43+
print_on(outputStream* st, int count,
44+
const char* left, // "U5: ["
45+
const char* right // "] (values=%d/length=%d)\n"
46+
) {
47+
if (left == NULL) left = "U5: [";
48+
if (right == NULL) right = "] (values=%d/length=%d)\n";
49+
int printed = 0;
50+
st->print("%s", left);
51+
for (;;) {
52+
if (count >= 0 && printed >= count) break;
53+
if (!has_next()) {
54+
if ((_limit == 0 || _position < _limit) && _array[_position] == 0) {
55+
st->print(" null");
56+
++_position; // skip null byte
57+
++printed;
58+
if (_limit != 0) continue; // keep going to explicit limit
59+
}
60+
break;
61+
}
62+
u4 value = next_uint();
63+
if (printed == 0)
64+
st->print("%d", value);
65+
else
66+
st->print(" %d", value);
67+
++printed;
68+
}
69+
st->print(right,
70+
// these arguments may or may not be used in the format string:
71+
printed,
72+
(int)_position);
73+
}
74+
75+
PRAGMA_DIAG_POP
76+
77+
// Explicit instantiation for supported types.
78+
template void UNSIGNED5::Reader<char*,int>::
79+
print_on(outputStream* st, int count, const char* left, const char* right);
80+
template void UNSIGNED5::Reader<u1*,int>::
81+
print_on(outputStream* st, int count, const char* left, const char* right);
82+
template void UNSIGNED5::Reader<address,size_t>::
83+
print_on(outputStream* st, int count, const char* left, const char* right);

0 commit comments

Comments
 (0)