Skip to content

Commit

Permalink
streamline get-byte' (especially as used by read-[bytes-]line')
Browse files Browse the repository at this point in the history
Applies in the case of simple ports without line counting, etc.
Also, `read-line' keeps track of whether all bytes are ASCII
(which is easy) to shortcut general UTF-8 decoding.
  • Loading branch information
mflatt committed Nov 3, 2011
1 parent 4bbb2d4 commit 9896cb6
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 48 deletions.
1 change: 1 addition & 0 deletions src/racket/include/scheme.h
Expand Up @@ -1374,6 +1374,7 @@ struct Scheme_Port
struct Scheme_Input_Port
{
struct Scheme_Port p;
char slow; /* 0 => no line count, no ungotten, etc.: can call get_string_fun directly */
char closed, pending_eof;
Scheme_Object *sub_type;
Scheme_Custodian_Reference *mref;
Expand Down
143 changes: 102 additions & 41 deletions src/racket/src/port.c
Expand Up @@ -1448,6 +1448,7 @@ scheme_make_input_port(Scheme_Object *subtype,
ip->closed = 0;
ip->read_handler = NULL;
init_port_locations((Scheme_Port *)ip);
if (ip->p.count_lines) ip->slow = 1;

if (progress_evt_fun == scheme_progress_evt_via_get)
ip->unless_cache = scheme_false;
Expand Down Expand Up @@ -1854,8 +1855,10 @@ intptr_t scheme_get_byte_string_unless(const char *who,
}
s = NULL;

if (!peek)
if (!peek) {
ip->ungotten_count = i;
ip->slow = 1;
}

l = pipe_char_count(ip->peeked_read);
if (size && l) {
Expand Down Expand Up @@ -2064,6 +2067,7 @@ intptr_t scheme_get_byte_string_unless(const char *who,
}

if ((got || total_got) && only_avail) {
ip->slow = 1;
ip->ungotten_special = ip->special;
ip->special = NULL;
gc = 0;
Expand All @@ -2075,13 +2079,17 @@ intptr_t scheme_get_byte_string_unless(const char *who,
} else if (gc == EOF) {
ip->p.utf8state = 0;
if (!got && !total_got) {
if (peek && ip->pending_eof)
if (peek && ip->pending_eof) {
ip->pending_eof = 2;
ip->slow = 1;
}
return EOF;
}
/* remember the EOF for next time */
if (ip->pending_eof)
if (ip->pending_eof) {
ip->pending_eof = 2;
ip->slow = 1;
}
gc = 0;
size = 0; /* so that we stop */
} else if (gc == SCHEME_UNLESS_READY) {
Expand Down Expand Up @@ -2115,9 +2123,10 @@ intptr_t scheme_get_byte_string_unless(const char *who,
/* save newly peeked string for future peeks/reads */
/***************************************************/
if (gc) {
if ((gc == 1) && !ip->ungotten_count && !ip->peeked_write) {
ip->slow = 1;
if ((gc == 1) && !ip->ungotten_count && !ip->peeked_write)
ip->ungotten[ip->ungotten_count++] = buffer[offset];
} else {
else {
if (!ip->peeked_write) {
Scheme_Object *rd, *wt;
scheme_pipe(&rd, &wt);
Expand Down Expand Up @@ -2425,6 +2434,7 @@ int scheme_peeked_read_via_get(Scheme_Input_Port *ip,
/* This sema makes other threads wait before reading: */
sema = scheme_make_sema(0);
ip->input_lock = sema;
ip->slow = 1;

/* This sema lets other threads try to make progress,
if the current target doesn't work out */
Expand Down Expand Up @@ -2541,6 +2551,7 @@ Scheme_Object *scheme_progress_evt_via_get(Scheme_Input_Port *port)
sema = scheme_make_sema(0);

port->progress_evt = sema;
port->slow = 1;

return sema;
}
Expand Down Expand Up @@ -2729,11 +2740,15 @@ intptr_t scheme_get_char_string(const char *who,
}
}

static MZ_INLINE
intptr_t get_one_byte(const char *who,
Scheme_Object *port,
char *buffer, intptr_t offset,
int only_avail)
MZ_DO_NOT_INLINE(static intptr_t get_one_byte_slow(const char *who,
Scheme_Object *port,
char *buffer, intptr_t offset,
int only_avail));

static intptr_t get_one_byte_slow(const char *who,
Scheme_Object *port,
char *buffer, intptr_t offset,
int only_avail)
{
Scheme_Input_Port *ip;
intptr_t gc;
Expand Down Expand Up @@ -2777,36 +2792,39 @@ intptr_t get_one_byte(const char *who,
ip->pending_eof = 1;
return EOF;
} else {
if (!ip->progress_evt && !ip->p.count_lines)
ip->slow = 0;

/* Call port's get function. */
gs = ip->get_string_fun;

gc = gs(ip, buffer, offset, 1, 0, NULL);

if (ip->progress_evt && (gc > 0))
post_progress(ip);
post_progress(ip);

if (gc < 1) {
if (gc == SCHEME_SPECIAL) {
if (special_ok) {
if (ip->p.position >= 0)
ip->p.position++;
if (ip->p.count_lines)
inc_pos((Scheme_Port *)ip, 1);
return SCHEME_SPECIAL;
} else {
scheme_bad_time_for_special(who, port);
return 0;
}
} else if (gc == EOF) {
ip->p.utf8state = 0;
return EOF;
} else {
/* didn't get anything the first try, so use slow path: */
special_is_ok = special_ok;
return scheme_get_byte_string_unless(who, port,
buffer, offset, 1,
0, 0, NULL, NULL);
}
if (gc == SCHEME_SPECIAL) {
if (special_ok) {
if (ip->p.position >= 0)
ip->p.position++;
if (ip->p.count_lines)
inc_pos((Scheme_Port *)ip, 1);
return SCHEME_SPECIAL;
} else {
scheme_bad_time_for_special(who, port);
return 0;
}
} else if (gc == EOF) {
ip->p.utf8state = 0;
return EOF;
} else {
/* didn't get anything the first try, so use slow path: */
special_is_ok = special_ok;
return scheme_get_byte_string_unless(who, port,
buffer, offset, 1,
0, 0, NULL, NULL);
}
}
}
}
Expand All @@ -2823,6 +2841,37 @@ intptr_t get_one_byte(const char *who,
return gc;
}

static MZ_INLINE intptr_t get_one_byte(GC_CAN_IGNORE const char *who,
Scheme_Object *port, char *buffer)
{
if (!special_is_ok && SCHEME_INPORTP(port)) {
GC_CAN_IGNORE Scheme_Input_Port *ip;
ip = (Scheme_Input_Port *)port;
if (!ip->slow) {
Scheme_Get_String_Fun gs;
int v;

gs = ip->get_string_fun;

v = gs(ip, buffer, 0, 1, 0, NULL);

if (v) {
if (v == SCHEME_SPECIAL) {
scheme_bad_time_for_special(who, port);
}

ip = (Scheme_Input_Port *)port; /* since ignored by GC */
if (ip->p.position >= 0)
ip->p.position++;

return v;
}
}
}

return get_one_byte_slow(who, port, buffer, 0, 0);
}

int
scheme_getc(Scheme_Object *port)
{
Expand All @@ -2838,9 +2887,7 @@ scheme_getc(Scheme_Object *port)
delta > 0, scheme_make_integer(delta-1),
NULL);
} else {
v = get_one_byte("read-char", port,
s, 0,
0);
v = get_one_byte("read-char", port, s);
}

if ((v == EOF) || (v == SCHEME_SPECIAL)) {
Expand Down Expand Up @@ -2880,9 +2927,7 @@ scheme_get_byte(Scheme_Object *port)
char s[1];
int v;

v = get_one_byte("read-byte", port,
s, 0,
0);
v = get_one_byte("read-byte", port, s);

if ((v == EOF) || (v == SCHEME_SPECIAL))
return v;
Expand Down Expand Up @@ -3160,6 +3205,8 @@ scheme_ungetc (int ch, Scheme_Object *port)

CHECK_PORT_CLOSED("#<primitive:peek-port-char>", "input", port, ip->closed);

ip->slow = 1;

if (ch == EOF) {
if (ip->pending_eof) /* non-zero means that EOFs are tracked */
ip->pending_eof = 2;
Expand Down Expand Up @@ -3210,9 +3257,10 @@ scheme_byte_ready (Scheme_Object *port)

CHECK_PORT_CLOSED("char-ready?", "input", port, ip->closed);

if (ip->ungotten_count || ip->ungotten_special
|| (ip->pending_eof > 1)
|| pipe_char_count(ip->peeked_read))
if (ip->slow
&& (ip->ungotten_count || ip->ungotten_special
|| (ip->pending_eof > 1)
|| pipe_char_count(ip->peeked_read)))
retval = 1;
else {
Scheme_In_Ready_Fun f = ip->byte_ready_fun;
Expand Down Expand Up @@ -3582,6 +3630,13 @@ scheme_count_lines (Scheme_Object *port)
Scheme_Count_Lines_Fun cl = ip->count_lines_fun;
cl(ip);
}

if (scheme_is_input_port(port)) {
Scheme_Input_Port *iip;
iip = scheme_input_port_record(port);
if (iip)
iip->slow = 1;
}
}
}

Expand Down Expand Up @@ -3609,6 +3664,7 @@ scheme_close_input_port (Scheme_Object *port)
}

ip->closed = 1;
ip->slow = 1;
ip->ungotten_count = 0;
ip->ungotten_special = NULL;
}
Expand Down Expand Up @@ -5541,6 +5597,11 @@ fd_byte_ready (Scheme_Input_Port *port)
}
}

MZ_DO_NOT_INLINE(static intptr_t fd_get_string_slow(Scheme_Input_Port *port,
char *buffer, intptr_t offset, intptr_t size,
int nonblock,
Scheme_Object *unless));

static intptr_t fd_get_string_slow(Scheme_Input_Port *port,
char *buffer, intptr_t offset, intptr_t size,
int nonblock,
Expand Down
45 changes: 41 additions & 4 deletions src/racket/src/portfun.c
Expand Up @@ -3013,10 +3013,12 @@ static Scheme_Object *
do_read_line (int as_bytes, const char *who, int argc, Scheme_Object *argv[])
{
Scheme_Object *port;
int ch;
int ch, ascii;
int crlf = 0, cr = 0, lf = 1;
char *buf, *oldbuf, onstack[32];
intptr_t size = 31, oldsize, i = 0;
Scheme_Input_Port *ip;
Scheme_Get_String_Fun gs;

if (argc && !SCHEME_INPUT_PORTP(argv[0]))
scheme_wrong_type(who, "input-port", 0, argc, argv);
Expand Down Expand Up @@ -3051,8 +3053,31 @@ do_read_line (int as_bytes, const char *who, int argc, Scheme_Object *argv[])

buf = onstack;

ip = scheme_input_port_record(port);
gs = ip->get_string_fun;
ascii = 1;

while (1) {
ch = scheme_get_byte(port);
if (!ip->slow) {
/* `read-line' seems important enough to inline the `read-byte' fast path: */
char s[1];

ch = gs(ip, s, 0, 1, 0, NULL);

if (ch == SCHEME_SPECIAL) {
scheme_bad_time_for_special(who, port);
} else if (ch) {
if (ip->p.position >= 0)
ip->p.position++;

if (ch != EOF)
ch = ((unsigned char *)s)[0];
} else
ch = scheme_get_byte(port);
} else {
ch = scheme_get_byte(port);
}

if (ch == EOF) {
if (!i)
return scheme_eof;
Expand Down Expand Up @@ -3086,14 +3111,26 @@ do_read_line (int as_bytes, const char *who, int argc, Scheme_Object *argv[])
memcpy(buf, oldbuf, oldsize);
}
buf[i++] = ch;
if (ch > 127) ascii = 0;
}

if (as_bytes) {
buf[i] = '\0';
return scheme_make_sized_byte_string(buf, i, buf == (char *)onstack);
} else {
buf[i] = '\0';
return scheme_make_sized_utf8_string(buf, i);
int j;
if (ascii) {
mzchar *us;
us = scheme_malloc_atomic(sizeof(mzchar) * (i + 1));
for (j = 0; j < i; j++) {
us[j] = ((unsigned char *)buf)[j];
}
us[i] = 0;
return scheme_make_sized_offset_char_string(us, 0, i, 0);
} else {
buf[i] = '\0';
return scheme_make_sized_utf8_string(buf, i);
}
}
}

Expand Down
5 changes: 2 additions & 3 deletions src/racket/src/string.c
Expand Up @@ -891,9 +891,8 @@ Scheme_Object *scheme_make_sized_offset_utf8_string(char *chars, intptr_t d, int
NULL, 0 /* not UTF-16 */, 0xFFFD);
us = scheme_malloc_atomic(sizeof(mzchar) * (ulen + 1));
scheme_utf8_decode((unsigned char *)chars, d, d + len,
us, 0, -1,
NULL, 0 /* not UTF-16 */, 0xFFFD);

us, 0, -1,
NULL, 0 /* not UTF-16 */, 0xFFFD);
us[ulen] = 0;
} else {
us = (mzchar *)"\0\0\0";
Expand Down

0 comments on commit 9896cb6

Please sign in to comment.