Skip to content
This repository
Browse code

streamline `get-byte' (especially as used by `read-[bytes-]line')

Applies in the case of simple ports without line counting, etc.
Also, `read-line' keeps track of whether all bytes are ASCII
(which is easy) to shortcut general UTF-8 decoding.
  • Loading branch information...
commit 9896cb6851d55ffb7f4c5480a7f3746e09bd4193 1 parent 4bbb2d4
Matthew Flatt authored
1  src/racket/include/scheme.h
@@ -1374,6 +1374,7 @@ struct Scheme_Port
1374 1374
 struct Scheme_Input_Port
1375 1375
 {
1376 1376
   struct Scheme_Port p;
  1377
+  char slow; /* 0 => no line count, no ungotten, etc.: can call get_string_fun directly */
1377 1378
   char closed, pending_eof;
1378 1379
   Scheme_Object *sub_type;
1379 1380
   Scheme_Custodian_Reference *mref;
143  src/racket/src/port.c
@@ -1448,6 +1448,7 @@ scheme_make_input_port(Scheme_Object *subtype,
1448 1448
   ip->closed = 0;
1449 1449
   ip->read_handler = NULL;
1450 1450
   init_port_locations((Scheme_Port *)ip);
  1451
+  if (ip->p.count_lines) ip->slow = 1;
1451 1452
 
1452 1453
   if (progress_evt_fun == scheme_progress_evt_via_get)
1453 1454
     ip->unless_cache = scheme_false;
@@ -1854,8 +1855,10 @@ intptr_t scheme_get_byte_string_unless(const char *who,
1854 1855
       }
1855 1856
       s = NULL;
1856 1857
 
1857  
-      if (!peek)
  1858
+      if (!peek) {
1858 1859
 	ip->ungotten_count = i;
  1860
+        ip->slow = 1;
  1861
+      }
1859 1862
 
1860 1863
       l = pipe_char_count(ip->peeked_read);
1861 1864
       if (size && l) {
@@ -2064,6 +2067,7 @@ intptr_t scheme_get_byte_string_unless(const char *who,
2064 2067
 	}
2065 2068
 
2066 2069
 	if ((got || total_got) && only_avail) {
  2070
+          ip->slow = 1;
2067 2071
 	  ip->ungotten_special = ip->special;
2068 2072
 	  ip->special = NULL;
2069 2073
 	  gc = 0;
@@ -2075,13 +2079,17 @@ intptr_t scheme_get_byte_string_unless(const char *who,
2075 2079
       } else if (gc == EOF) {
2076 2080
 	ip->p.utf8state = 0;
2077 2081
 	if (!got && !total_got) {
2078  
-	  if (peek && ip->pending_eof)
  2082
+	  if (peek && ip->pending_eof) {
2079 2083
 	    ip->pending_eof = 2;
  2084
+            ip->slow = 1;
  2085
+          }
2080 2086
 	  return EOF;
2081 2087
 	}
2082 2088
 	/* remember the EOF for next time */
2083  
-	if (ip->pending_eof)
  2089
+	if (ip->pending_eof) {
2084 2090
 	  ip->pending_eof = 2;
  2091
+          ip->slow = 1;
  2092
+        }
2085 2093
 	gc = 0;
2086 2094
 	size = 0; /* so that we stop */
2087 2095
       } else if (gc == SCHEME_UNLESS_READY) {
@@ -2115,9 +2123,10 @@ intptr_t scheme_get_byte_string_unless(const char *who,
2115 2123
       /* save newly peeked string for future peeks/reads */
2116 2124
       /***************************************************/
2117 2125
       if (gc) {
2118  
-	if ((gc == 1) && !ip->ungotten_count && !ip->peeked_write) {
  2126
+        ip->slow = 1;
  2127
+	if ((gc == 1) && !ip->ungotten_count && !ip->peeked_write)
2119 2128
 	  ip->ungotten[ip->ungotten_count++] = buffer[offset];
2120  
-	} else {
  2129
+	else {
2121 2130
 	  if (!ip->peeked_write) {
2122 2131
 	    Scheme_Object *rd, *wt;
2123 2132
 	    scheme_pipe(&rd, &wt);
@@ -2425,6 +2434,7 @@ int scheme_peeked_read_via_get(Scheme_Input_Port *ip,
2425 2434
       /* This sema makes other threads wait before reading: */
2426 2435
       sema = scheme_make_sema(0);
2427 2436
       ip->input_lock = sema;
  2437
+      ip->slow = 1;
2428 2438
       
2429 2439
       /* This sema lets other threads try to make progress,
2430 2440
 	 if the current target doesn't work out */
@@ -2541,6 +2551,7 @@ Scheme_Object *scheme_progress_evt_via_get(Scheme_Input_Port *port)
2541 2551
   sema = scheme_make_sema(0);
2542 2552
 
2543 2553
   port->progress_evt = sema;
  2554
+  port->slow = 1;
2544 2555
 
2545 2556
   return sema;
2546 2557
 }
@@ -2729,11 +2740,15 @@ intptr_t scheme_get_char_string(const char *who,
2729 2740
   }
2730 2741
 }
2731 2742
 
2732  
-static MZ_INLINE
2733  
-intptr_t get_one_byte(const char *who,
2734  
-		  Scheme_Object *port,
2735  
-		  char *buffer, intptr_t offset,
2736  
-		  int only_avail)
  2743
+MZ_DO_NOT_INLINE(static intptr_t get_one_byte_slow(const char *who,
  2744
+                                                   Scheme_Object *port,
  2745
+                                                   char *buffer, intptr_t offset,
  2746
+                                                   int only_avail));
  2747
+
  2748
+static intptr_t get_one_byte_slow(const char *who,
  2749
+                                  Scheme_Object *port,
  2750
+                                  char *buffer, intptr_t offset,
  2751
+                                  int only_avail)
2737 2752
 {
2738 2753
   Scheme_Input_Port *ip;
2739 2754
   intptr_t gc;
@@ -2777,36 +2792,39 @@ intptr_t get_one_byte(const char *who,
2777 2792
       ip->pending_eof = 1;
2778 2793
       return EOF;
2779 2794
     } else {
  2795
+      if (!ip->progress_evt && !ip->p.count_lines)
  2796
+        ip->slow = 0;
  2797
+
2780 2798
       /* Call port's get function. */
2781 2799
       gs = ip->get_string_fun;
2782 2800
 
2783 2801
       gc = gs(ip, buffer, offset, 1, 0, NULL);
2784 2802
 	
2785 2803
       if (ip->progress_evt && (gc > 0))
2786  
-	post_progress(ip);
  2804
+        post_progress(ip);
2787 2805
 
2788 2806
       if (gc < 1) {
2789  
-	if (gc == SCHEME_SPECIAL) {
2790  
-	  if (special_ok) {
2791  
-	    if (ip->p.position >= 0)
2792  
-	      ip->p.position++;
2793  
-	    if (ip->p.count_lines)
2794  
-	      inc_pos((Scheme_Port *)ip, 1);
2795  
-	    return SCHEME_SPECIAL;
2796  
-	  } else {
2797  
-	    scheme_bad_time_for_special(who, port);
2798  
-	    return 0;
2799  
-	  }
2800  
-	} else if (gc == EOF) {
2801  
-	  ip->p.utf8state = 0;
2802  
-	  return EOF;
2803  
-	} else {
2804  
-	  /* didn't get anything the first try, so use slow path: */
2805  
-	  special_is_ok = special_ok;
2806  
-	  return scheme_get_byte_string_unless(who, port,
2807  
-					       buffer, offset, 1,
2808  
-					       0, 0, NULL, NULL);
2809  
-	}
  2807
+        if (gc == SCHEME_SPECIAL) {
  2808
+          if (special_ok) {
  2809
+            if (ip->p.position >= 0)
  2810
+              ip->p.position++;
  2811
+            if (ip->p.count_lines)
  2812
+              inc_pos((Scheme_Port *)ip, 1);
  2813
+            return SCHEME_SPECIAL;
  2814
+          } else {
  2815
+            scheme_bad_time_for_special(who, port);
  2816
+            return 0;
  2817
+          }
  2818
+        } else if (gc == EOF) {
  2819
+          ip->p.utf8state = 0;
  2820
+          return EOF;
  2821
+        } else {
  2822
+          /* didn't get anything the first try, so use slow path: */
  2823
+          special_is_ok = special_ok;
  2824
+          return scheme_get_byte_string_unless(who, port,
  2825
+                                               buffer, offset, 1,
  2826
+                                               0, 0, NULL, NULL);
  2827
+        }
2810 2828
       }
2811 2829
     }
2812 2830
   }
@@ -2823,6 +2841,37 @@ intptr_t get_one_byte(const char *who,
2823 2841
   return gc;
2824 2842
 }
2825 2843
 
  2844
+static MZ_INLINE intptr_t get_one_byte(GC_CAN_IGNORE const char *who,
  2845
+                                       Scheme_Object *port, char *buffer)
  2846
+{
  2847
+  if (!special_is_ok && SCHEME_INPORTP(port)) {
  2848
+    GC_CAN_IGNORE Scheme_Input_Port *ip;
  2849
+    ip = (Scheme_Input_Port *)port;
  2850
+    if (!ip->slow) {
  2851
+      Scheme_Get_String_Fun gs;
  2852
+      int v;
  2853
+
  2854
+      gs = ip->get_string_fun;
  2855
+
  2856
+      v = gs(ip, buffer, 0, 1, 0, NULL);
  2857
+    
  2858
+      if (v) {
  2859
+        if (v == SCHEME_SPECIAL) {
  2860
+          scheme_bad_time_for_special(who, port);
  2861
+        }
  2862
+
  2863
+        ip = (Scheme_Input_Port *)port; /* since ignored by GC */
  2864
+        if (ip->p.position >= 0)
  2865
+          ip->p.position++;
  2866
+
  2867
+        return v;
  2868
+      }
  2869
+    }
  2870
+  }
  2871
+  
  2872
+  return get_one_byte_slow(who, port, buffer, 0, 0);
  2873
+}
  2874
+
2826 2875
 int
2827 2876
 scheme_getc(Scheme_Object *port)
2828 2877
 {
@@ -2838,9 +2887,7 @@ scheme_getc(Scheme_Object *port)
2838 2887
 					delta > 0, scheme_make_integer(delta-1),
2839 2888
 					NULL);
2840 2889
     } else {
2841  
-      v = get_one_byte("read-char", port,
2842  
-		       s, 0, 
2843  
-		       0);
  2890
+      v = get_one_byte("read-char", port, s);
2844 2891
     }
2845 2892
 
2846 2893
     if ((v == EOF) || (v == SCHEME_SPECIAL)) {
@@ -2880,9 +2927,7 @@ scheme_get_byte(Scheme_Object *port)
2880 2927
   char s[1];
2881 2928
   int v;
2882 2929
 
2883  
-  v = get_one_byte("read-byte", port,
2884  
-		   s, 0,
2885  
-		   0);
  2930
+  v = get_one_byte("read-byte", port, s);
2886 2931
 
2887 2932
   if ((v == EOF) || (v == SCHEME_SPECIAL))
2888 2933
     return v;
@@ -3160,6 +3205,8 @@ scheme_ungetc (int ch, Scheme_Object *port)
3160 3205
 
3161 3206
   CHECK_PORT_CLOSED("#<primitive:peek-port-char>", "input", port, ip->closed);
3162 3207
 
  3208
+  ip->slow = 1;
  3209
+
3163 3210
   if (ch == EOF) {
3164 3211
     if (ip->pending_eof) /* non-zero means that EOFs are tracked */
3165 3212
       ip->pending_eof = 2;
@@ -3210,9 +3257,10 @@ scheme_byte_ready (Scheme_Object *port)
3210 3257
 
3211 3258
   CHECK_PORT_CLOSED("char-ready?", "input", port, ip->closed);
3212 3259
 
3213  
-  if (ip->ungotten_count || ip->ungotten_special
3214  
-      || (ip->pending_eof > 1)
3215  
-      || pipe_char_count(ip->peeked_read))
  3260
+  if (ip->slow
  3261
+      && (ip->ungotten_count || ip->ungotten_special
  3262
+          || (ip->pending_eof > 1)
  3263
+          || pipe_char_count(ip->peeked_read)))
3216 3264
     retval = 1;
3217 3265
   else {
3218 3266
     Scheme_In_Ready_Fun f = ip->byte_ready_fun;
@@ -3582,6 +3630,13 @@ scheme_count_lines (Scheme_Object *port)
3582 3630
       Scheme_Count_Lines_Fun cl = ip->count_lines_fun;
3583 3631
       cl(ip);
3584 3632
     }
  3633
+    
  3634
+    if (scheme_is_input_port(port)) {
  3635
+      Scheme_Input_Port *iip;
  3636
+      iip = scheme_input_port_record(port);
  3637
+      if (iip)
  3638
+        iip->slow = 1;
  3639
+    }
3585 3640
   }
3586 3641
 }
3587 3642
 
@@ -3609,6 +3664,7 @@ scheme_close_input_port (Scheme_Object *port)
3609 3664
     }
3610 3665
 
3611 3666
     ip->closed = 1;
  3667
+    ip->slow = 1;
3612 3668
     ip->ungotten_count = 0;
3613 3669
     ip->ungotten_special = NULL;
3614 3670
   }
@@ -5541,6 +5597,11 @@ fd_byte_ready (Scheme_Input_Port *port)
5541 5597
   }
5542 5598
 }
5543 5599
 
  5600
+MZ_DO_NOT_INLINE(static intptr_t fd_get_string_slow(Scheme_Input_Port *port,
  5601
+                                                    char *buffer, intptr_t offset, intptr_t size,
  5602
+                                                    int nonblock,
  5603
+                                                    Scheme_Object *unless));
  5604
+
5544 5605
 static intptr_t fd_get_string_slow(Scheme_Input_Port *port,
5545 5606
                                char *buffer, intptr_t offset, intptr_t size,
5546 5607
                                int nonblock,
45  src/racket/src/portfun.c
@@ -3013,10 +3013,12 @@ static Scheme_Object *
3013 3013
 do_read_line (int as_bytes, const char *who, int argc, Scheme_Object *argv[])
3014 3014
 {
3015 3015
   Scheme_Object *port;
3016  
-  int ch;
  3016
+  int ch, ascii;
3017 3017
   int crlf = 0, cr = 0, lf = 1;
3018 3018
   char *buf, *oldbuf, onstack[32];
3019 3019
   intptr_t size = 31, oldsize, i = 0;
  3020
+  Scheme_Input_Port *ip;
  3021
+  Scheme_Get_String_Fun gs;
3020 3022
 
3021 3023
   if (argc && !SCHEME_INPUT_PORTP(argv[0]))
3022 3024
     scheme_wrong_type(who, "input-port", 0, argc, argv);
@@ -3051,8 +3053,31 @@ do_read_line (int as_bytes, const char *who, int argc, Scheme_Object *argv[])
3051 3053
 
3052 3054
   buf = onstack;
3053 3055
 
  3056
+  ip = scheme_input_port_record(port);
  3057
+  gs = ip->get_string_fun;
  3058
+  ascii = 1;
  3059
+      
3054 3060
   while (1) {
3055  
-    ch = scheme_get_byte(port);
  3061
+    if (!ip->slow) {
  3062
+      /* `read-line' seems important enough to inline the `read-byte' fast path: */
  3063
+      char s[1];
  3064
+      
  3065
+      ch = gs(ip, s, 0, 1, 0, NULL);
  3066
+
  3067
+      if (ch == SCHEME_SPECIAL) {
  3068
+        scheme_bad_time_for_special(who, port);
  3069
+      } else if (ch) {
  3070
+        if (ip->p.position >= 0)
  3071
+          ip->p.position++;
  3072
+
  3073
+        if (ch != EOF)
  3074
+          ch = ((unsigned char *)s)[0];
  3075
+      } else
  3076
+        ch = scheme_get_byte(port);
  3077
+    } else {
  3078
+      ch = scheme_get_byte(port);
  3079
+    }
  3080
+
3056 3081
     if (ch == EOF) {
3057 3082
       if (!i)
3058 3083
 	return scheme_eof;
@@ -3086,14 +3111,26 @@ do_read_line (int as_bytes, const char *who, int argc, Scheme_Object *argv[])
3086 3111
       memcpy(buf, oldbuf, oldsize);
3087 3112
     }
3088 3113
     buf[i++] = ch;
  3114
+    if (ch > 127) ascii = 0;
3089 3115
   }
3090 3116
 
3091 3117
   if (as_bytes) {
3092 3118
     buf[i] = '\0';
3093 3119
     return scheme_make_sized_byte_string(buf, i, buf == (char *)onstack);
3094 3120
   } else {
3095  
-    buf[i] = '\0';
3096  
-    return scheme_make_sized_utf8_string(buf, i);
  3121
+    int j;
  3122
+    if (ascii) {
  3123
+      mzchar *us;
  3124
+      us = scheme_malloc_atomic(sizeof(mzchar) * (i + 1));
  3125
+      for (j = 0; j < i; j++) {
  3126
+        us[j] = ((unsigned char *)buf)[j];
  3127
+      }
  3128
+      us[i] = 0;
  3129
+      return scheme_make_sized_offset_char_string(us, 0, i, 0);
  3130
+    } else {
  3131
+      buf[i] = '\0';
  3132
+      return scheme_make_sized_utf8_string(buf, i);
  3133
+    }
3097 3134
   }
3098 3135
 }
3099 3136
 
5  src/racket/src/string.c
@@ -891,9 +891,8 @@ Scheme_Object *scheme_make_sized_offset_utf8_string(char *chars, intptr_t d, int
891 891
 			      NULL, 0 /* not UTF-16 */, 0xFFFD);
892 892
     us = scheme_malloc_atomic(sizeof(mzchar) * (ulen + 1));
893 893
     scheme_utf8_decode((unsigned char *)chars, d, d + len,
894  
-		       us, 0, -1,
895  
-		       NULL, 0 /* not UTF-16 */, 0xFFFD);
896  
-
  894
+                       us, 0, -1,
  895
+                       NULL, 0 /* not UTF-16 */, 0xFFFD);
897 896
     us[ulen] = 0;
898 897
   } else {
899 898
     us = (mzchar *)"\0\0\0";

0 notes on commit 9896cb6

Please sign in to comment.
Something went wrong with that request. Please try again.