Browse files

4.4BSD-Lite2 contrib/sort

  • Loading branch information...
1 parent a2bca40 commit 9ad0661fc70c5e776dbef6f658b97fdd3804a61a bjh21 committed Oct 7, 2000
View
6 usr.bin/sort/Makefile
@@ -0,0 +1,6 @@
+# @(#)Makefile 8.1 (Berkeley) 6/6/93
+
+PROG= sort
+SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c
+
+.include <bsd.prog.mk>
View
893 usr.bin/sort/TEST/stests
@@ -0,0 +1,893 @@
+# @(#)stests 8.1 (Berkeley) 6/6/93
+
+#Latest version. My sort passes all tests because I wrote it.
+#We differ only on 25E and 25H.
+#(I found at least one bug in constructing test 25, and was driven
+#to rewrite field parsing to clarify it.)
+#
+#In 25E, -k2.3,2.1b, the fields are not necessarily out of order.
+#Even if they were, it would be legal (11752-3), although certainly
+#justification for warning.
+#
+#On 25H, your answer is as defensible as mine. (Our suggestion
+#*1 backs mine.)
+
+
+# Tests for the Unix sort utility
+# Test Posix features except for locale.
+# Test some nonstandard features if present.
+
+# Other tests should be made for files too big to fit in memory.
+
+
+# Initialize switches for nonstandard features.
+# Use parenthesized settings for supported features.
+
+o=: # officially obsolescent features: +1 -2, misplaced -o (o=)
+g=: # -g numeric sort including e-format numbers (g=)
+M=: # -M sort by month names (M=)
+s=: # -s stable, do not compare raw bytes on equal keys (s=)
+y= # -y user-specified memory size (y=-y10000)
+
+# Detect what features are supported, assuming bad options cause
+# errors. Set switches accordingly.
+
+echo obsolescent and nonstandard features recognized, if any:
+if sort +0 </dev/null 2>/dev/null; then o=
+ echo ' +1 -2'; fi
+if sort /dev/null -o xx 2>/dev/null; then o=
+ echo ' displaced -o'; fi
+if sort -g </dev/null 2>/dev/null; then g=
+ echo ' -g g-format numbers'; fi
+if sort -M </dev/null 2>/dev/null; then M=
+ echo ' -M months'; fi
+if sort -s </dev/null 2>/dev/null; then s=
+ echo ' -s stable'; fi
+if sort -y10000 </dev/null 2>/dev/null; then y=-y10000
+ echo ' -y space'; fi
+if sort -z10000 </dev/null 2>/dev/null; then
+ echo ' -z size (not exercised)'; fi
+if sort -T. </dev/null 2>/dev/null; then
+ echo ' -T tempdir (not exercised)'; fi
+
+
+export TEST # major sequence number of test
+
+trap "rm -f in in1 out xx -k xsort linecount fields; exit" 0 1 2 13 15
+
+# xsort testno options
+# Sort file "in" with specified options.
+# Compare with file "out" if that is supplied,
+# otherwise make plausibility checks on output
+
+# "sum" must be dumb; insensitive to the
+# order of lines within a file.
+# System V sum is suitable; sum -5 is the v10 equivalent.
+
+PATH=.:$PATH
+export PATH
+cat <<'!' >xsort; chmod +x xsort
+
+ X=$1; shift
+
+ if sort "$@" in >xx && sort -c "$@" xx
+ then
+ if test -f out
+ then
+ cmp xx out >/dev/null && exit 0
+ echo $TEST$X comparison failed
+ else
+ test "`cksum -o2 <in`" = "`cksum -o2 <xx`" && exit 0
+ echo $TEST$X checksum failed
+ fi
+ else
+ echo $TEST$X failed
+ fi
+ exit 1
+!
+
+# linecount testno file count
+# declares the given "testno" to be in error if number of
+# lines in "file" differs from "count"
+
+cat <<'!' >linecount; chmod +x linecount
+awk 'END{ if(NR!='$3') print "'$TEST$1' failed" }' $2
+!
+
+rm -f out
+
+#---------------------------------------------------------------
+TEST=01; echo $TEST # -c status, checksum
+ # obsolescent features go together
+cat <<! >in
+b
+a
+!
+rm -f out -o
+
+sort -c in 2>/dev/null && echo ${TEST}A failed
+
+xsort B || '"cksum"' is probably unsuitable - see comments
+
+$o sort +0 in -o in || echo ${TEST}c failed
+
+#---------------------------------------------------------------
+TEST=02; echo $TEST # output from -c
+cat <<! >in
+x
+y
+!
+
+sort -cr in >out 2>xx && echo ${TEST}A failed
+test -s out && echo ${TEST}B failed
+test -s xx && echo option -c is noisy "(probably legal)"
+test -s xx || echo option -c is quiet "(legal, not classical)"
+
+#---------------------------------------------------------------
+TEST=03; echo $TEST # -n
+cat <<! >in
+-99.0
+-99.1
+-.0002
+-10
+2
+0010.000000000000000000000000000000000001
+10
+3x
+x
+!
+cat <<! >out
+-99.1
+-99.0
+-10
+-.0002
+x
+2
+3x
+10
+0010.000000000000000000000000000000000001
+!
+
+xsort "" -n
+
+#---------------------------------------------------------------
+TEST=04; echo $TEST # -b without fields, piping, -c status return
+cat <<! >in
+ b
+ a
+!
+cp in out
+
+xsort A -b
+
+cat in | sort | cat >xx
+cmp xx out >/dev/null || echo ${TEST}B failed
+
+sort in | sort -cr 2>/dev/null && echo ${TEST}C failed
+
+#---------------------------------------------------------------
+TEST=05; echo $TEST # fields, reverse fields, -c status return
+cat <<! >in
+b b p
+a b q
+x a
+!
+cat <<! >out
+x a
+a b q
+b b p
+!
+
+$o xsort A +1 -2
+
+$o xsort B +1 -2 +2r
+
+xsort C -k 2,2
+
+xsort D -k 2,2 -k 3r
+
+xsort E -k 2,2.0
+
+xsort F -k 2,2 -k 1,1 -k 3
+
+sort -c -k 2 in 2>/dev/null && ${TEST}G failed
+
+#---------------------------------------------------------------
+TEST=06; echo $TEST # -t
+cat <<! >in
+a:
+a!
+!
+cp in out
+
+$o xsort A -t : -r +0
+
+$o xsort B -t : +0 -1
+
+xsort C -t : -r -k 1
+
+xsort D -t : -k 1,1
+
+#---------------------------------------------------------------
+TEST=07; echo $TEST # -t, character positions in fields
+ # -t: as 1 arg is not strictly conforming, but classical
+cat <<! >in
+: ab
+:bac
+!
+cat <<! >out
+:bac
+: ab
+!
+
+$o xsort A -b -t: +1.1
+
+$o xsort B -t: +1.1r
+
+xsort C -b -t: -k 2.2
+
+xsort D -t: -k 2.2r
+
+#---------------------------------------------------------------
+TEST=08; echo $TEST # space and tab as -t characters
+cat <<! >in
+ b c
+ b c
+ b c
+!
+cp in out
+
+xsort A -t ' ' -k2,2
+
+xsort B -t ' ' -k2.1,2.0
+
+cat <<! >out
+ b c
+ b c
+ b c
+!
+
+xsort C -t ' ' -k2,2
+
+xsort D -t ' ' -k2.1,2.0
+
+cat <<! >out
+ b c
+ b c
+ b c
+!
+
+xsort E -k2
+
+cat <<! >out
+ b c
+ b c
+ b c
+!
+
+xsort F -k2b
+
+#---------------------------------------------------------------
+TEST=09; echo $TEST # alphabetic as -t character
+cat <<! >in
+zXa
+yXa
+zXb
+!
+cp in out
+
+xsort "" -tX -k2 -k1r,1
+
+#---------------------------------------------------------------
+TEST=10; echo $TEST # -m
+cat <<! >in
+a
+ab
+ab
+bc
+ca
+!
+cat <<! >in1
+Z
+a
+aa
+ac
+c
+!
+cat <<! >out
+Z
+a
+a
+aa
+ab
+ab
+ac
+bc
+c
+ca
+!
+
+sort -m in in1 >xx
+cmp xx out >/dev/null || echo $TEST failed
+
+#---------------------------------------------------------------
+TEST=11; echo $TEST # multiple files, -o overwites input, -m, -mu
+cat <<! >in
+a
+b
+c
+d
+!
+
+sort -o xx in in in in in in in in in in in in in in in in in
+linecount A xx 68
+sort -o in -mu in in in in in in in in in in in in in in in in in
+linecount B in 4
+sort -o in -m in in in in in in in in in in in in in in in in in
+
+cmp in xx >/dev/null || echo ${TEST}C failed
+
+#---------------------------------------------------------------
+TEST=12; echo $TEST # does -mu pick the first among equals?
+cat <<! >in
+3B
+3b
+3B2
+~3B2
+4.1
+41
+5
+5.
+!
+cat <<! >out
+3B
+3B2
+4.1
+5
+!
+
+xsort A -mudf || echo "(other behavior is legal, not classical)"
+
+xsort B -mudf -k1 || echo "(other behavior is legal, not classical)"
+
+#---------------------------------------------------------------
+TEST=13; echo $TEST # long records (>8000 bytes, keys >16000), -r
+awk '
+BEGIN { x="x"
+ for(i=1; i<=12; i++) x = x x
+ for(i=15; i<=25; i++) print x i
+}' >in
+awk '
+BEGIN { x="x"
+ for(i=1; i<=12; i++) x = x x
+ for(i=25; i>=15; i--) print x i
+}' >out
+
+xsort A -r
+
+xsort B -k 1,1r -k 1
+
+#---------------------------------------------------------------
+TEST=14; echo $TEST "(3 long parts)"
+awk 'BEGIN { for(i=0; i<100000; i++) print rand() }' | grep -v e >in
+rm -f out
+
+xsort A; echo $TEST "(part A done)"
+
+xsort B -n; echo $TEST "(part B done)"
+
+# next test is unclean: xx is a hidden side-effect of xsort
+
+awk '
+ $0 < x { print "test '${TEST}C' failed"; exit }
+ $0 "" != x { print >"out"; x = $0 }
+' xx
+
+xsort C -n -u
+
+#---------------------------------------------------------------
+TEST=15; echo $TEST "(long)" # force intermediate files if possible
+awk 'BEGIN { for(i=0; i<20000; i++) print rand() }' >in
+rm -f out
+
+xsort A -r $y
+
+sort -r in | awk '$0 "x" != x { print ; x = $0 "x" }' >out
+
+xsort B -u -r $y
+
+#---------------------------------------------------------------
+TEST=16; echo $TEST # -nr, -nm, file name -
+awk 'BEGIN { for(i=-100; i<=100; i+=2) printf "%.10d\n", i }' >in
+
+awk 'BEGIN { for(i=-99; i<=100; i+=2) print i }' | sort -nr in - >xx
+awk '$0+0 != 101-NR { print "'${TEST}A' failed"; exit }' xx
+
+awk 'BEGIN { for(i=-99; i<=100; i+=2) print i }' | sort -mn in - >xx
+awk '$0+0 != -101+NR { print "'${TEST}B' failed"; exit }' xx
+
+#---------------------------------------------------------------
+TEST=17; echo $TEST # -d, fields without end, modifier override
+cat <<! >in
+a-B
+a+b
+a b
+A+b
+a b
+!
+cat <<! >out
+a b
+a b
+A+b
+a-B
+a+b
+!
+
+$o xsort A -df +0 +0d
+
+xsort B -df -k 1 -k 1d
+
+#---------------------------------------------------------------
+TEST=18; echo $TEST # -u on key only
+cat <<! >in
+12 y
+13 z
+12 x
+!
+cat <<! >out
+12 x
+12 y
+13 z
+!
+
+$o xsort A +0 -1
+
+xsort B -k 1,1
+
+sort -u -k 1,1 in >xx
+linecount C xx 2
+
+#---------------------------------------------------------------
+TEST=19; echo $TEST # -i, -d, -f
+cat <<! >xx.c
+run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
+main(){ run(0, 011); /* 012=='\n' */
+ run(013, 0377); }
+!
+cc xx.c
+a.out >in
+cat <<! >xx.c
+run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
+main(){ run(0, 011);
+ run(013, ' '-1);
+ run(0177, 0377);
+ run(' ', 0176); }
+!
+cc xx.c
+a.out >out
+
+xsort A -i -k 2
+
+cat <<! >xx.c
+run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
+main(){ run(0, 010); /* 011=='\t', 012=='\n' */
+ run(013, ' '-1);
+ run(' '+1, '0'-1);
+ run('9'+1, 'A'-1);
+ run('Z'+1, 'a'-1);
+ run('z'+1, 0377);
+ run('\t', '\t');
+ run(' ', ' ');
+ run('0', '9');
+ run('A', 'Z');
+ run('a', 'z'); }
+!
+cc xx.c
+a.out >out
+
+xsort B -d -k 2
+
+cat <<! >xx.c
+run(i,j){ for( ; i<=j; i++) printf("%.3o %c\n",i,i); }
+main(){ int i;
+ run(0, 011);
+ run(013, 'A'-1);
+ for(i='A'; i<='Z'; i++)
+ printf("%.3o %c\n%.3o %c\n",i,i,i+040,i+040);
+ run('Z'+1, 'a'-1);
+ run('z'+1, 0377); }
+!
+cc xx.c
+a.out >out
+rm xx.c
+
+xsort C -f -k 2
+
+#---------------------------------------------------------------
+TEST=20; echo $TEST # -d, -f, -b applies only to fields
+cat <<! >in
+ b
+'C
+a
+!
+cp in out
+
+xsort A -d
+
+xsort B -f
+
+cat <<! >out
+ b
+a
+'C
+!
+
+xsort C -dfb
+
+#---------------------------------------------------------------
+TEST=21; echo $TEST # behavior of null bytes
+cat <<'!' >xx.c
+main() { printf("%cb\n%ca\n",0,0); }
+!
+cc xx.c
+a.out >in
+sort in >xx
+cmp in xx >/dev/null && echo ${TEST}A failed
+test "`wc -c <in`" = "`wc -c <xx`" || echo ${TEST}B failed
+rm xx.c a.out
+
+#---------------------------------------------------------------
+TEST=22; echo $TEST # field limits
+cat <<! >in
+a 2
+a 1
+b 2
+b 1
+!
+cat <<! >out
+b 1
+b 2
+a 1
+a 2
+!
+
+xsort "" -r -k1,1 -k2n
+
+#---------------------------------------------------------------
+TEST=23; echo $TEST # empty file
+
+sort -o xx </dev/null
+cmp xx /dev/null 2>/dev/null || echo ${TEST}A failed
+
+sort -c </dev/null || echo ${TEST}B failed
+
+sort -cu </dev/null || echo ${TEST}C failed
+
+#---------------------------------------------------------------
+TEST=24; echo $TEST # many fields
+cat <<! >in
+0:2:3:4:5:6:7:8:9
+1:1:3:4:5:6:7:8:9
+1:2:2:4:5:6:7:8:9
+1:2:3:3:5:6:7:8:9
+1:2:3:4:4:6:7:8:9
+1:2:3:4:5:5:7:8:9
+1:2:3:4:5:6:6:8:9
+1:2:3:4:5:6:7:7:9
+1:2:3:4:5:6:7:8:8
+!
+cat <<! >out
+1:2:3:4:5:6:7:8:8
+1:2:3:4:5:6:7:7:9
+1:2:3:4:5:6:6:8:9
+1:2:3:4:5:5:7:8:9
+1:2:3:4:4:6:7:8:9
+1:2:3:3:5:6:7:8:9
+1:2:2:4:5:6:7:8:9
+1:1:3:4:5:6:7:8:9
+0:2:3:4:5:6:7:8:9
+!
+
+xsort "" -t: -k9 -k8 -k7 -k6 -k5 -k4 -k3 -k2 -k1
+
+#---------------------------------------------------------------
+TEST=25; echo $TEST # variously specified alpha fields
+ # numbers give the correct orderings
+cat <<! >in
+01:04:19:01:16:01:21:01 a
+02:03:13:15:13:19:15:02 a
+03:02:07:09:07:13:09:03 a
+04:01:01:03:01:07:03:04 a
+05:08:20:16:17:02:20:05 aa
+06:07:14:18:14:20:14:06 aa
+07:06:08:10:08:14:08:07 aa
+08:05:02:04:02:08:02:08 aa
+09:16:22:02:22:04:24:13 b
+10:15:16:20:19:22:18:14 b
+11:14:10:12:10:16:12:15 b
+12:13:04:06:04:10:06:16 b
+13:24:24:22:24:06:22:21 bb
+14:23:18:24:21:24:16:22 bb
+15:22:12:14:12:18:10:23 bb
+16:21:06:08:06:12:04:24 bb
+17:12:21:21:18:03:19:09 ab
+18:11:15:19:15:21:13:10 ab
+19:10:09:11:09:15:07:11 ab
+20:09:03:05:03:09:01:12 ab
+21:20:23:17:23:05:23:17 ba
+22:19:17:23:20:23:17:18 ba
+23:18:11:13:11:17:11:19 ba
+24:17:05:07:05:11:05:20 ba
+!
+sort -k2b -k2 in >xx &&
+ sort -c -t: -k2n xx 2>/dev/null || echo ${TEST}A failed
+sort -k2,2.1b -k2 in >xx &&
+ sort -c -t: -k3n xx 2>/dev/null || echo ${TEST}B failed
+sort -k2.3 -k2 in >xx &&
+ sort -c -t: -k4n xx 2>/dev/null || echo ${TEST}C failed
+sort -k2b,2.3 -k2 in >xx &&
+ sort -c -t: -k5n xx 2>/dev/null || echo ${TEST}D failed
+sort -k2.3,2.1b -k2 in >xx &&
+ sort -c -t: -k6n xx 2>/dev/null || echo ${TEST}E failed
+sort -k2,2.1b -k2r in >xx &&
+ sort -c -t: -k7n xx 2>/dev/null || echo ${TEST}F failed
+sort -b -k2,2 -k2 in >xx &&
+ sort -c -t: -k8n xx 2>/dev/null || echo ${TEST}G failed
+sort -b -k2,2b -k2 in >xx && # perhaps same as G
+ sort -c -t: -k3n xx 2>/dev/null || echo ${TEST}H failed\
+ "(standard is not clear on this)"
+
+#---------------------------------------------------------------
+TEST=26; echo $TEST # empty fields, out of bounds fields
+cat <<! >in
+0 5
+1 4
+2 3
+3 2
+4 1
+5 0
+!
+cp in out
+
+xsort "" -k2.2,2.1 -k2.3,2.4
+
+#---------------------------------------------------------------
+TEST=27; echo $TEST # displaced -o
+rm -f out
+
+$o sort /dev/null -o out || $o echo ${TEST}B failed
+$o test -f out || $o echo ${TEST}C failed
+
+#---------------------------------------------------------------
+TEST=28; echo $TEST # apparently nonmonotone field specs
+cat <<! >in
+aaaa c
+x a
+0 b
+!
+cp in out
+
+$o xsort A +1 -0.3 +1.4 -1.5
+
+xsort B -k2,1.3 -k2.5,2.5
+
+#---------------------------------------------------------------
+TEST=29; echo $TEST # determination of end of option list
+cat >-k <<!
+x
+!
+rm -f out -c
+
+sort -- -k </dev/null >xx || echo ${TEST}A argument failed
+cmp xx -k || echo ${TEST}A comparison failed
+
+sort - -c </dev/null 2>/dev/null && echo ${TEST}B failed
+
+#---------------------------------------------------------------
+TEST=30; echo $TEST # missing newline
+awk 'BEGIN{ printf "%s", "x"}' | sort >xx
+wc -c <xx | awk '$1!=2{ print "'${TEST}' failed" }'
+
+#---------------------------------------------------------------
+TEST=31; echo $TEST # -M, multiple fields
+cat <<! >in
+jan 10 1900
+Feb 26 1900
+feb 25 1900
+January xx 1900
+August 11 1900
+jan 15 1990
+feb 22 1990
+mar 15 1990
+apr 1 1990
+may 45 1990
+jun 14 1990
+jul 4 1990
+aug 1~ 1990
+aug 11 1990
+sep 1 1990
+oct 12 1990
+nov 24 1990
+dec 25 1990
+never 3 1990
+ Dec 25 1990
+!
+cat <<! >out
+January xx 1900
+jan 10 1900
+feb 25 1900
+Feb 26 1900
+August 11 1900
+never 3 1990
+jan 15 1990
+feb 22 1990
+mar 15 1990
+apr 1 1990
+may 45 1990
+jun 14 1990
+jul 4 1990
+aug 1~ 1990
+aug 11 1990
+sep 1 1990
+oct 12 1990
+nov 24 1990
+ Dec 25 1990
+dec 25 1990
+!
+
+$M xsort "" -k3n -k1M -k2n
+
+#---------------------------------------------------------------
+TEST=32; echo $TEST # -M case insensitivity, -r
+cat <<! >in
+x
+june
+january
+december
+!
+cat <<! >out
+december
+june
+january
+x
+!
+
+$M xsort "" -Mr
+
+#---------------------------------------------------------------
+TEST=33; echo $TEST # -g
+cat <<! >in
+2
+1
+10
+.2
+1e
+1E1
+1e.
+!
+cat <<! >out
+.2
+1
+1e
+1e.
+2
+10
+1E1
+!
+
+$g xsort "" -g
+
+#---------------------------------------------------------------
+TEST=34; echo $TEST # -g wide operands
+cat <<! >in
+.99999999999999999999
+099999999999999999999e-21
+099999999999999999999e-19
+.1e1
+!
+cat <<! >out
+099999999999999999999e-21
+.99999999999999999999
+.1e1
+099999999999999999999e-19
+!
+
+$g xsort A -g
+
+cat <<! >out
+.1e1
+.99999999999999999999
+099999999999999999999e-19
+099999999999999999999e-21
+!
+
+xsort B -n
+
+#---------------------------------------------------------------
+TEST=35; echo $TEST #-g, -u with different fp reps
+cat <<! >in
++0
+-0
+0.10
++.1
+-.1
+-100e-3
+x
+!
+cat <<! >out
+-.1
+-100e-3
++0
+-0
+x
++.1
+0.10
+!
+
+$g xsort A -g
+
+$g sort -gu in >xx && $g sort -c -gu xx || echo ${TEST}B failed
+$g linecount C xx 3
+
+#---------------------------------------------------------------
+TEST=36; echo $TEST # -s
+cat <<! >in
+a 2
+b 1
+c 2
+a 1
+b 2
+c 1
+!
+cat <<! >out
+a 2
+a 1
+b 1
+b 2
+c 2
+c 1
+!
+
+$s xsort "" -s -k1,1
+
+#---------------------------------------------------------------
+TEST=37; echo $TEST # -s, multiple files
+cat <<! >in
+a 2
+c 2
+!
+cat <<! >in1
+a 1
+b 1
+c 1
+!
+cat <<! >out
+c 2
+b 1
+a 2
+!
+
+$s sort -smru -k1,1 in in in1 in1 >xx
+$s cmp xx out >/dev/null || echo $TEST failed
+
+#---------------------------------------------------------------
+TEST=38; echo $TEST # -s
+$s awk '
+ BEGIN {
+ for(i=1; i<50; i++)
+ for(j=1; j<=i; j++) {
+ print i, 2 >"in"
+ print i, 1 >"in1"
+ }
+ }'
+
+$s sort -m -s -k1,1n in in1 >out
+
+$s awk '
+ func stop() { print "'$TEST' failed"; exit }
+ $1!=last1 { if(count!=last1 || $2!=2) stop();
+ count = 0}
+ $1==last1 && $2!=last2 { if(count!=last1 || $2!=1) stop();
+ count = 0 }
+ { count++; last1 = $1; last2 = $2 }
+ ' out
View
188 usr.bin/sort/append.c
@@ -0,0 +1,188 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char sccsid[] = "@(#)append.c 8.1 (Berkeley) 6/6/93";
+#endif /* not lint */
+
+#include "sort.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#define OUTPUT { \
+ if ((n = cpos - ppos) > 1) { \
+ for (; ppos < cpos; ++ppos) \
+ *ppos -= odepth; \
+ ppos -= n; \
+ radixsort(ppos, n, wts1, REC_D); \
+ for (; ppos < cpos; ppos++) { \
+ prec = (RECHEADER *) (*ppos - sizeof(TRECHEADER));\
+ put(prec, fd); \
+ } \
+ } else put(prec, fd); \
+}
+
+/*
+ * copy sorted lines to output; check for uniqueness
+ */
+void
+append(keylist, nelem, depth, fd, put, ftbl)
+ u_char **keylist;
+ int nelem;
+ register int depth;
+ FILE *fd;
+ void (*put)(RECHEADER *, FILE *);
+ struct field *ftbl;
+{
+ register u_char *wts, *wts1;
+ register n, odepth;
+ register u_char **cpos, **ppos, **lastkey;
+ register u_char *cend, *pend, *start;
+ register struct recheader *crec, *prec;
+
+ if (*keylist == '\0' && UNIQUE)
+ return;
+ wts1 = wts = ftbl[0].weights;
+ if ((!UNIQUE) && SINGL_FLD) {
+ if (ftbl[0].flags & F && ftbl[0].flags & R)
+ wts1 = Rascii;
+ else if (ftbl[0].flags & F)
+ wts1 = ascii;
+ odepth = depth;
+ }
+ lastkey = keylist + nelem;
+ depth += sizeof(TRECHEADER);
+ if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
+ ppos = keylist;
+ prec = (RECHEADER *) (*ppos - depth);
+ if (UNIQUE)
+ put(prec, fd);
+ for (cpos = keylist+1; cpos < lastkey; cpos++) {
+ crec = (RECHEADER *) (*cpos - depth);
+ if (crec->length == prec->length) {
+ pend = (u_char *) &prec->offset + prec->length;
+ cend = (u_char *) &crec->offset + crec->length;
+ for (start = *cpos; cend >= start; cend--) {
+ if (wts[*cend] != wts[*pend])
+ break;
+ pend--;
+ }
+ if (pend + 1 != *ppos) {
+ if (!UNIQUE) {
+ OUTPUT;
+ } else
+ put(crec, fd);
+ ppos = cpos;
+ prec = crec;
+ }
+ } else {
+ if (!UNIQUE) {
+ OUTPUT;
+ } else
+ put(crec, fd);
+ ppos = cpos;
+ prec = crec;
+ }
+ }
+ if (!UNIQUE) { OUTPUT; }
+ } else if (UNIQUE) {
+ ppos = keylist;
+ prec = (RECHEADER *) (*ppos - depth);
+ put(prec, fd);
+ for (cpos = keylist+1; cpos < lastkey; cpos++) {
+ crec = (RECHEADER *) (*cpos - depth);
+ if (crec->offset == prec->offset) {
+ pend = (u_char *) &prec->offset + prec->offset;
+ cend = (u_char *) &crec->offset + crec->offset;
+ for (start = *cpos; cend >= start; cend--) {
+ if (wts[*cend] != wts[*pend])
+ break;
+ pend--;
+ }
+ if (pend + 1 != *ppos) {
+ ppos = cpos;
+ prec = crec;
+ put(prec, fd);
+ }
+ } else {
+ ppos = cpos;
+ prec = crec;
+ put(prec, fd);
+ }
+ }
+ } else for (cpos = keylist; cpos < lastkey; cpos++) {
+ crec = (RECHEADER *) (*cpos - depth);
+ put(crec, fd);
+ }
+}
+
+/*
+ * output the already sorted eol bin.
+ */
+void
+rd_append(binno, infl0, nfiles, outfd, buffer, bufend)
+ u_char *buffer, *bufend;
+ int binno, nfiles;
+ union f_handle infl0;
+ FILE *outfd;
+{
+ struct recheader *rec;
+ rec = (RECHEADER *) buffer;
+ if (!getnext(binno, infl0, nfiles, (RECHEADER *) buffer, bufend, 0)) {
+ putline(rec, outfd);
+ while (getnext(binno, infl0, nfiles, (RECHEADER *) buffer,
+ bufend, 0) == 0) {
+ if (!UNIQUE)
+ putline(rec, outfd);
+ }
+ }
+}
+
+/*
+ * append plain text--used after sorting the biggest bin.
+ */
+void
+concat(a, b)
+ FILE *a, *b;
+{
+ int nread;
+ char buffer[4096];
+
+ rewind(b);
+ while ((nread = fread(buffer, 1, 4096, b)) > 0)
+ EWRITE(buffer, 1, nread, a);
+}
View
67 usr.bin/sort/extern.h
@@ -0,0 +1,67 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)extern.h 8.1 (Berkeley) 6/6/93
+ */
+
+void append __P((u_char **, int, int, FILE *, void (*)(), struct field *));
+void concat __P((FILE *, FILE *));
+length_t enterkey __P((struct recheader *,
+ DBT *, int, struct field *));
+void fixit __P((int *, char **));
+void fldreset __P((struct field *));
+FILE *ftmp __P((void));
+void fmerge __P((int, union f_handle,
+ int, int (*)(), FILE *, void (*)(), struct field *));
+void fsort __P((int, int, union f_handle, int, FILE *, struct field *));
+int geteasy __P((int, union f_handle,
+ int, struct recheader *, u_char *, struct field *));
+int getnext __P((int, union f_handle,
+ int, struct recheader *, u_char *, struct field *));
+int makekey __P((int, union f_handle,
+ int, struct recheader *, u_char *, struct field *));
+int makeline __P((int, union f_handle,
+ int, struct recheader *, u_char *, struct field *));
+void merge __P((int, int, int (*)(), FILE *, void (*)(), struct field *));
+void num_init __P((void));
+void onepass __P((u_char **, int, long, long *, u_char *, FILE *));
+int optval __P((int, int));
+void order __P((union f_handle, int (*)(), struct field *));
+void putline __P((struct recheader *, FILE *));
+void putrec __P((struct recheader *, FILE *));
+void rd_append __P((int, union f_handle, int, FILE *, u_char *, u_char *));
+int seq __P((FILE *, DBT *, DBT *));
+int setfield __P((char *, struct field *, int));
+void settables __P((int));
View
319 usr.bin/sort/fields.c
@@ -0,0 +1,319 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char sccsid[] = "@(#)fields.c 8.1 (Berkeley) 6/6/93";
+#endif /* not lint */
+
+/* Subroutines to generate sort keys. */
+
+#include "sort.h"
+
+#define blancmange(ptr) { \
+ if (BLANK & d_mask[*(ptr)]) \
+ while (BLANK & d_mask[*(++(ptr))]); \
+}
+
+#define NEXTCOL(pos) { \
+ if (!SEP_FLAG) \
+ while (BLANK & l_d_mask[*(++pos)]); \
+ while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \
+}
+
+extern u_char *enterfield __P((u_char *, u_char *, struct field *, int));
+
+extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
+
+extern struct coldesc clist[(ND+1)*2];
+extern int ncols;
+
+#define DECIMAL '.'
+#define OFFSET 128
+
+u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
+u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
+u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */
+u_char fnum[NBINS], rnum[NBINS];
+
+/*
+ * constructs sort key with leading recheader, followed by the key,
+ * followed by the original line.
+ */
+length_t
+enterkey(keybuf, line, size, fieldtable)
+ struct recheader *keybuf; /* pointer to start of key */
+ DBT *line;
+ int size;
+ struct field fieldtable[];
+{
+ int i;
+ register u_char *l_d_mask;
+ register u_char *lineend, *pos;
+ u_char *endkey, *keypos;
+ register struct coldesc *clpos;
+ register int col = 1;
+ struct field *ftpos;
+ l_d_mask = d_mask;
+ pos = (u_char *) line->data - 1;
+ lineend = (u_char *) line->data + line->size-1;
+ /* don't include rec_delimiter */
+ keypos = keybuf->data;
+
+ for (i = 0; i < ncols; i++) {
+ clpos = clist + i;
+ for (; (col < clpos->num) && (pos < lineend); col++)
+ { NEXTCOL(pos); }
+ if (pos >= lineend)
+ break;
+ clpos->start = SEP_FLAG ? pos + 1 : pos;
+ NEXTCOL(pos);
+ clpos->end = pos;
+ col++;
+ if (pos >= lineend) {
+ clpos->end = lineend;
+ ++i;
+ break;
+ }
+ }
+ for (; i <= ncols; i++)
+ clist[i].start = clist[i].end = lineend;
+ if (clist[0].start < (u_char *) line->data)
+ ++clist[0].start;
+ endkey = (u_char *) keybuf + size - line->size;
+ for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
+ if ((keypos = enterfield(keypos, endkey, ftpos,
+ fieldtable->flags)) == NULL)
+ return (1);
+
+ if (UNIQUE)
+ *(keypos-1) = REC_D;
+ keybuf->offset = keypos - keybuf->data;
+ keybuf->length = keybuf->offset + line->size;
+ if (keybuf->length + sizeof(TRECHEADER) > size)
+ return (1); /* line too long for buffer */
+ memcpy(keybuf->data + keybuf->offset, line->data, line->size);
+ return (0);
+}
+
+/*
+ * constructs a field (as defined by -k) within a key
+ */
+u_char *
+enterfield(tablepos, endkey, cur_fld, gflags)
+ struct field *cur_fld;
+ register u_char *tablepos, *endkey;
+ int gflags;
+{
+ register u_char *start, *end, *lineend, *mask, *lweight;
+ struct column icol, tcol;
+ register u_int flags;
+ u_int Rflag;
+ icol = cur_fld->icol;
+ tcol = cur_fld->tcol;
+ flags = cur_fld->flags;
+ start = icol.p->start;
+ lineend = clist[ncols].end;
+ if (flags & BI)
+ blancmange(start);
+ start += icol.indent;
+ start = min(start, lineend);
+ if (!tcol.num)
+ end = lineend;
+ else {
+ if (tcol.indent) {
+ end = tcol.p->start;
+ if (flags & BT) blancmange(end);
+ end += tcol.indent;
+ end = min(end, lineend);
+ } else
+ end = tcol.p->end;
+ }
+ if (flags & N) {
+ Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
+ tablepos = number(tablepos, endkey, start, end, Rflag);
+ return (tablepos);
+ }
+ mask = alltable;
+ mask = cur_fld->mask;
+ lweight = cur_fld->weights;
+ for (; start < end; start++)
+ if (mask[*start]) {
+ if (*start <= 1) {
+ if (tablepos+2 >= endkey)
+ return (NULL);
+ *tablepos++ = lweight[1];
+ *tablepos++ = lweight[*start ? 2 : 1];
+ } else {
+ *tablepos++ = lweight[*start];
+ if (tablepos == endkey)
+ return (NULL);
+ }
+ }
+ *tablepos++ = lweight[0];
+ return (tablepos == endkey ? NULL : tablepos);
+}
+
+/* Uses the first bin to assign sign, expsign, 0, and the first
+ * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
+ * When sorting in forward order:
+ * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
+ * else use (0-99)->(2-102).
+ * If the exponent is >=61, use another byte for each additional 253
+ * in the exponent. Cutoff is at 567.
+ * To avoid confusing the exponent and the mantissa, use a field delimiter
+ * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
+ * only time a field delimiter can come in that position.
+ * Reverse order is done analagously.
+*/
+
+u_char *
+number(pos, bufend, line, lineend, Rflag)
+ register u_char *line, *pos, *bufend, *lineend;
+ int Rflag;
+{
+ register int or_sign, parity = 0;
+ register int expincr = 1, exponent = -1;
+ int bite, expsign = 1, sign = 1;
+ register u_char lastvalue, *nonzero, *tline, *C_TENS;
+ u_char *nweights;
+
+ if (Rflag)
+ nweights = rnum;
+ else
+ nweights = fnum;
+ if (pos > bufend - 8)
+ return (NULL);
+ /* or_sign sets the sort direction:
+ * (-r: +/-)(sign: +/-)(expsign: +/-) */
+ or_sign = sign ^ expsign ^ Rflag;
+ blancmange(line);
+ if (*line == '-') { /* set the sign */
+ or_sign ^= 1;
+ sign = 0;
+ line++;
+ }
+ /* eat initial zeroes */
+ for (; *line == '0' && line < lineend; line++);
+ /* calculate exponents < 0 */
+ if (*line == DECIMAL) {
+ exponent = 1;
+ while (*++line == '0' && line < lineend)
+ exponent++;
+ expincr = 0;
+ expsign = 0;
+ }
+ /* next character better be a digit */
+ if (*line < '1' || *line > '9' || line >= lineend) {
+ *pos++ = nweights[127];
+ return (pos);
+ }
+ if (expincr) {
+ for (tline = line-1; *++tline >= '0' &&
+ *tline <= '9' && tline < lineend;)
+ exponent++;
+ }
+ if (exponent > 567) {
+ *pos++ = nweights[sign ? (expsign ? 254 : 128)
+ : (expsign ? 0 : 126)];
+ warnx("exponent out of bounds");
+ return (pos);
+ }
+ bite = min(exponent, 61);
+ *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
+ : (expsign ? 64-bite : 64+bite)];
+ if (bite >= 61) {
+ do {
+ exponent -= bite;
+ bite = min(exponent, 254);
+ *pos++ = nweights[or_sign ? 254-bite : bite];
+ } while (bite == 254);
+ }
+ C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
+ for (; line < lineend; line++) {
+ if (*line >= '0' && *line <= '9') {
+ if (parity) {
+ *pos++ = C_TENS[lastvalue] + (or_sign ? - *line
+ : *line);
+ if (pos == bufend)
+ return (NULL);
+ if (*line != '0' || lastvalue != '0')
+ nonzero = pos;
+ } else
+ lastvalue = *line;
+ parity ^= 1;
+ } else if(*line == DECIMAL) {
+ if(!expincr) /* a decimal already occurred once */
+ break;
+ expincr = 0;
+ } else
+ break;
+ }
+ if (parity && lastvalue != '0') {
+ *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
+ OFF_TENS[lastvalue] + '0';
+ } else
+ pos = nonzero;
+ if (pos > bufend-1)
+ return (NULL);
+ *pos++ = or_sign ? nweights[254] : nweights[0];
+ return (pos);
+}
+
+/* This forces a gap around the record delimiter
+ * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
+ * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
+*/
+void
+num_init()
+{
+ int i;
+ TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
+ NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
+ OFF_TENS = TENS - '0';
+ OFF_NTENS = NEGTENS - '0';
+ for (i = 1; i < 10; i++) {
+ TENS[i] = TENS[i-1] + 10;
+ NEGTENS[i] = NEGTENS[i-1] - 10;
+ }
+ for (i = 0; i < REC_D; i++) {
+ fnum[i] = i;
+ rnum[255-i] = i;
+ }
+ for (i = REC_D; i <255; i++) {
+ fnum[i] = i+1;
+ rnum[255-i] = i-1;
+ }
+}
View
338 usr.bin/sort/files.c
@@ -0,0 +1,338 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char sccsid[] = "@(#)files.c 8.1 (Berkeley) 6/6/93";
+#endif /* not lint */
+
+#include "sort.h"
+#include "fsort.h"
+
+#include <string.h>
+
+/*
+ * this is the subroutine for file management for fsort().
+ * It keeps the buffers for all temporary files.
+ */
+int
+getnext(binno, infl0, nfiles, pos, end, dummy)
+ int binno, nfiles;
+ union f_handle infl0;
+ register struct recheader *pos;
+ register u_char *end;
+ struct field *dummy;
+{
+ register int i;
+ register u_char *hp;
+ static long nleft = 0;
+ static int cnt = 0, flag = -1;
+ static u_char maxb = 0;
+ static FILE *fd;
+
+ if (nleft == 0) {
+ if (binno < 0) /* reset files. */ {
+ for (i = 0; i < nfiles; i++) {
+ rewind(fstack[infl0.top + i].fd);
+ fstack[infl0.top + i].max_o = 0;
+ }
+ flag = -1;
+ nleft = cnt = 0;
+ return(-1);
+ }
+ maxb = fstack[infl0.top].maxb;
+ for (; nleft == 0; cnt++) {
+ if (cnt >= nfiles) {
+ cnt = 0;
+ return (EOF);
+ }
+ fd = fstack[infl0.top + cnt].fd;
+ hp = (u_char *) &nleft;
+ for (i = sizeof(TRECHEADER); i; --i)
+ *hp++ = getc(fd);
+ if (binno < maxb)
+ fstack[infl0.top+cnt].max_o
+ += sizeof(nleft) + nleft;
+ else if (binno == maxb) {
+ if (binno != fstack[infl0.top].lastb) {
+ fseek(fd, fstack[infl0.top+
+ cnt].max_o, SEEK_SET);
+ fread(&nleft, sizeof(nleft), 1, fd);
+ }
+ if (nleft == 0)
+ fclose(fd);
+ } else if (binno == maxb + 1) { /* skip a bin */
+ fseek(fd, nleft, SEEK_CUR);
+ fread(&nleft, sizeof(nleft), 1, fd);
+ flag = cnt;
+ }
+ }
+ }
+ if ((u_char *) pos > end - sizeof(TRECHEADER))
+ return (BUFFEND);
+ hp = (u_char *) pos;
+ for (i = sizeof(TRECHEADER); i ; --i)
+ *hp++ = (u_char) getc(fd);
+ if (end - pos->data < pos->length) {
+ for (i = sizeof(TRECHEADER); i ; i--)
+ ungetc(*--hp, fd);
+ return (BUFFEND);
+ }
+ fread(pos->data, pos->length, 1, fd);
+ nleft -= pos->length + sizeof(TRECHEADER);
+ if (nleft == 0 && binno == fstack[infl0.top].maxb)
+ fclose(fd);
+ return (0);
+}
+
+/*
+ * this is called when there is no special key. It's only called
+ * in the first fsort pass.
+ */
+int
+makeline(flno, filelist, nfiles, buffer, bufend, dummy2)
+ int flno, nfiles;
+ union f_handle filelist;
+ struct recheader *buffer;
+ u_char *bufend;
+ struct field *dummy2;
+{
+ static char *opos;
+ register char *end, *pos;
+ static int fileno = 0, overflow = 0;
+ static FILE *fd = 0;
+ register int c;
+
+ pos = (char *) buffer->data;
+ end = min((char *) bufend, pos + MAXLLEN);
+ if (overflow) {
+ memmove(pos, opos, bufend - (u_char *) opos);
+ pos += ((char *) bufend - opos);
+ overflow = 0;
+ }
+ for (;;) {
+ if (flno >= 0) {
+ if (!(fd = fstack[flno].fd))
+ return (EOF);
+ } else if (!fd) {
+ if (fileno >= nfiles) return(EOF);
+ if (!(fd = fopen(filelist.names[fileno], "r")))
+ err(2, "%s", filelist.names[fileno]);
+ ++fileno;
+ }
+ while ((pos < end) && ((c = getc(fd)) != EOF)) {
+ if ((*pos++ = c) == REC_D) {
+ buffer->offset = 0;
+ buffer->length = pos - (char *) buffer->data;
+ return (0);
+ }
+ }
+ if (pos >= end && end == (char *) bufend) {
+ if ((char *) buffer->data < end) {
+ overflow = 1;
+ opos = (char *) buffer->data;
+ }
+ return (BUFFEND);
+ } else if (c == EOF) {
+ if (buffer->data != (u_char *) pos) {
+ warnx("last character not record delimiter");
+ *pos++ = REC_D;
+ buffer->offset = 0;
+ buffer->length = pos - (char *) buffer->data;
+ return(0);
+ }
+ FCLOSE(fd);
+ fd = 0;
+ if(flno >= 0) fstack[flno].fd = 0;
+ } else {
+ buffer->data[100] = '\000';
+ warnx("line too long:ignoring %s...", buffer->data);
+ }
+ }
+}
+
+/*
+ * This generates keys. It's only called in the first fsort pass
+ */
+int
+makekey(flno, filelist, nfiles, buffer, bufend, ftbl)
+ int flno, nfiles;
+ union f_handle filelist;
+ struct recheader *buffer;
+ u_char *bufend;
+ struct field *ftbl;
+{
+ static int (*get)();
+ static int fileno = 0;
+ static FILE *dbdesc = 0;
+ static DBT dbkey[1], line[1];
+ static int overflow = 0;
+ int c;
+ if (overflow) {
+ overflow = 0;
+ enterkey(buffer, line, bufend - (u_char *) buffer, ftbl);
+ return (0);
+ }
+ for (;;) {
+ if (flno >= 0) {
+ get = seq;
+ if (!(dbdesc = fstack[flno].fd))
+ return(EOF);
+ } else if (!dbdesc) {
+ if (fileno >= nfiles)
+ return (EOF);
+ dbdesc = fopen(filelist.names[fileno], "r");
+ if (!dbdesc)
+ err(2, "%s", filelist.names[fileno]);
+ ++fileno;
+ get = seq;
+ }
+ if (!(c = get(dbdesc, line, dbkey))) {
+ if ((signed)line->size > bufend - buffer->data)
+ overflow = 1;
+ else
+ overflow = enterkey(buffer, line,
+ bufend - (u_char *) buffer, ftbl);
+ if (overflow)
+ return (BUFFEND);
+ else
+ return (0);
+ }
+ if (c == EOF) {
+ FCLOSE(dbdesc);
+ dbdesc = 0;
+ if (flno >= 0) fstack[flno].fd = 0;
+ } else {
+
+ ((char *) line->data)[60] = '\000';
+ warnx("line too long: ignoring %.100s...",
+ (char *)line->data);
+ }
+
+ }
+}
+
+/*
+ * get a key/line pair from fd
+ */
+int
+seq(fd, line, key)
+ FILE *fd;
+ DBT *key, *line;
+{
+ static char *buf, flag = 1;
+ register char *end, *pos;
+ register int c;
+ if (flag) {
+ flag = 0;
+ buf = (char *) linebuf;
+ end = buf + MAXLLEN;
+ line->data = buf;
+ }
+ pos = buf;
+ while ((c = getc(fd)) != EOF) {
+ if ((*pos++ = c) == REC_D) {
+ line->size = pos - buf;
+ return (0);
+ }
+ if (pos == end) {
+ line->size = MAXLLEN;
+ *--pos = REC_D;
+ while ((c = getc(fd)) != EOF) {
+ if (c == REC_D)
+ return (BUFFEND);
+ }
+ }
+ }
+ if (pos != buf) {
+ warnx("last character not record delimiter");
+ *pos++ = REC_D;
+ line->size = pos - buf;
+ return (0);
+ } else
+ return (EOF);
+}
+
+/*
+ * write a key/line pair to a temporary file
+ */
+void
+putrec(rec, fd)
+ register struct recheader *rec;
+ register FILE *fd;
+{
+ EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fd);
+}
+
+/*
+ * write a line to output
+ */
+void
+putline(rec, fd)
+ register struct recheader *rec;
+ register FILE *fd;
+{
+ EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fd);
+}
+
+/*
+ * get a record from a temporary file. (Used by merge sort.)
+ */
+int
+geteasy(flno, filelist, nfiles, rec, end, dummy2)
+ int flno, nfiles;
+ union f_handle filelist;
+ register struct recheader *rec;
+ register u_char *end;
+ struct field *dummy2;
+{
+ int i;
+ FILE *fd;
+ fd = fstack[flno].fd;
+ if ((u_char *) rec > end - sizeof(TRECHEADER))
+ return (BUFFEND);
+ if (!fread(rec, 1, sizeof(TRECHEADER), fd)) {
+ fclose(fd);
+ fstack[flno].fd = 0;
+ return (EOF);
+ }
+ if (end - rec->data < rec->length) {
+ for (i = sizeof(TRECHEADER) - 1; i >= 0; i--)
+ ungetc(*((char *) rec + i), fd);
+ return (BUFFEND);
+ }
+ fread(rec->data, rec->length, 1, fd);
+ return (0);
+}
View
286 usr.bin/sort/fsort.c
@@ -0,0 +1,286 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char sccsid[] = "@(#)fsort.c 8.1 (Berkeley) 6/6/93";
+#endif /* not lint */
+
+/*
+ * Read in the next bin. If it fits in one segment sort it;
+ * otherwise refine it by segment deeper by one character,
+ * and try again on smaller bins. Sort the final bin at this level
+ * of recursion to keep the head of fstack at 0.
+ * After PANIC passes, abort to merge sort.
+*/
+#include "sort.h"
+#include "fsort.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+u_char **keylist = 0, *buffer = 0, *linebuf = 0;
+struct tempfile fstack[MAXFCT];
+extern char *toutpath;
+#define FSORTMAX 4
+int PANIC = FSORTMAX;
+
+void
+fsort(binno, depth, infiles, nfiles, outfd, ftbl)
+ register int binno, depth, nfiles;
+ register union f_handle infiles;
+ FILE *outfd;
+ register struct field *ftbl;
+{
+ register u_char *bufend, **keypos, *tmpbuf;
+ u_char *weights;
+ int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
+ register int c, nelem;
+ long sizes [NBINS+1];
+ union f_handle tfiles, mstart = {MAXFCT-16};
+ register int (*get)(int, union f_handle, int, RECHEADER *,
+ u_char *, struct field *);
+ register struct recheader *crec;
+ struct field tfield[2];
+ FILE *prevfd, *tailfd[FSORTMAX+1];
+
+ memset(tailfd, 0, sizeof(tailfd));
+ prevfd = outfd;
+ memset(tfield, 0, sizeof(tfield));
+ if (ftbl[0].flags & R)
+ tfield[0].weights = Rascii;
+ else
+ tfield[0].weights = ascii;
+ tfield[0].icol.num = 1;
+ weights = ftbl[0].weights;
+ if (!buffer) {
+ buffer = malloc(BUFSIZE);
+ keylist = malloc(MAXNUM * sizeof(u_char *));
+ if (!SINGL_FLD)
+ linebuf = malloc(MAXLLEN);
+ }
+ bufend = buffer + BUFSIZE;
+ if (binno >= 0) {
+ tfiles.top = infiles.top + nfiles;
+ get = getnext;
+ } else {
+ tfiles.top = 0;
+ if (SINGL_FLD)
+ get = makeline;
+ else
+ get = makekey;
+ }
+ for (;;) {
+ memset(sizes, 0, sizeof(sizes));
+ c = ntfiles = 0;
+ if (binno == weights[REC_D] &&
+ !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */
+ rd_append(weights[REC_D],
+ infiles, nfiles, prevfd, buffer, bufend);
+ break;
+ } else if (binno == weights[REC_D]) {
+ depth = 0; /* start over on flat weights */
+ ftbl = tfield;
+ weights = ftbl[0].weights;
+ }
+ while (c != EOF) {
+ keypos = keylist;
+ nelem = 0;
+ crec = (RECHEADER *) buffer;
+ while((c = get(binno, infiles, nfiles, crec, bufend,
+ ftbl)) == 0) {
+ *keypos++ = crec->data + depth;
+ if (++nelem == MAXNUM) {
+ c = BUFFEND;
+ break;
+ }
+ crec =(RECHEADER *) ((char *) crec +
+ SALIGN(crec->length) + sizeof(TRECHEADER));
+ }
+ if (c == BUFFEND || ntfiles || mfct) { /* push */
+ if (panic >= PANIC) {
+ fstack[MAXFCT-16+mfct].fd = ftmp();
+ if (radixsort(keylist, nelem, weights,
+ REC_D))
+ err(2, NULL);
+ append(keylist, nelem, depth, fstack[
+ MAXFCT-16+mfct].fd, putrec, ftbl);
+ mfct++;
+ /* reduce number of open files */
+ if (mfct == 16 ||(c == EOF && ntfiles)) {
+ tmpbuf = malloc(bufend -
+ crec->data);
+ memmove(tmpbuf, crec->data,
+ bufend - crec->data);
+ fstack[tfiles.top + ntfiles].fd
+ = ftmp();
+ fmerge(0, mstart, mfct, geteasy,
+ fstack[tfiles.top+ntfiles].fd,
+ putrec, ftbl);
+ ++ntfiles;
+ mfct = 0;
+ memmove(crec->data, tmpbuf,
+ bufend - crec->data);
+ free(tmpbuf);
+ }
+ } else {
+ fstack[tfiles.top + ntfiles].fd= ftmp();
+ onepass(keylist, depth, nelem, sizes,
+ weights, fstack[tfiles.top+ntfiles].fd);
+ ++ntfiles;
+ }
+ }
+ }
+ get = getnext;
+ if (!ntfiles && !mfct) { /* everything in memory--pop */
+ if (nelem > 1)
+ if (radixsort(keylist, nelem, weights, REC_D))
+ err(2, NULL);
+ append(keylist, nelem, depth, outfd, putline, ftbl);
+ break; /* pop */
+ }
+ if (panic >= PANIC) {
+ if (!ntfiles)
+ fmerge(0, mstart, mfct, geteasy,
+ outfd, putline, ftbl);
+ else
+ fmerge(0, tfiles, ntfiles, geteasy,
+ outfd, putline, ftbl);
+ break;
+
+ }
+ total = maxb = lastb = 0; /* find if one bin dominates */
+ for (i = 0; i < NBINS; i++)
+ if (sizes[i]) {
+ if (sizes[i] > sizes[maxb])
+ maxb = i;
+ lastb = i;
+ total += sizes[i];
+ }
+ if (sizes[maxb] < max((total / 2) , BUFSIZE))
+ maxb = lastb; /* otherwise pop after last bin */
+ fstack[tfiles.top].lastb = lastb;
+ fstack[tfiles.top].maxb = maxb;
+
+ /* start refining next level. */
+ get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */
+ for (i = 0; i < maxb; i++) {
+ if (!sizes[i]) /* bin empty; step ahead file offset */
+ get(i, tfiles, ntfiles, crec, bufend, 0);
+ else
+ fsort(i, depth+1, tfiles, ntfiles, outfd, ftbl);
+ }
+ if (lastb != maxb) {
+ if (prevfd != outfd)
+ tailfd[panic] = prevfd;
+ prevfd = ftmp();
+ for (i = maxb+1; i <= lastb; i++)
+ if (!sizes[i])
+ get(i, tfiles, ntfiles, crec, bufend,0);
+ else
+ fsort(i, depth+1, tfiles, ntfiles,
+ prevfd, ftbl);
+ }
+
+ /* sort biggest (or last) bin at this level */
+ depth++;
+ panic++;
+ binno = maxb;
+ infiles.top = tfiles.top; /* getnext will free tfiles, */
+ nfiles = ntfiles; /* so overwrite them */
+ }
+ if (prevfd != outfd) {
+ concat(outfd, prevfd);
+ fclose(prevfd);
+ }
+ for (i = panic; i >= 0; --i)
+ if (tailfd[i]) {
+ concat(outfd, tailfd[i]);
+ fclose(tailfd[i]);
+ }
+}
+
+/*
+ This is one pass of radix exchange, dumping the bins to disk.
+ */
+#define swap(a, b, t) t = a, a = b, b = t
+void
+onepass(a, depth, n, sizes, tr, fd)
+ u_char **a;
+ int depth;
+ long n, sizes[];
+ u_char *tr;
+ FILE *fd;
+{
+ long tsizes[NBINS+1];
+ u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp;
+ static histo[256];
+ int *hp;
+ register int c;
+ u_char **an, *t, **aj;
+ register u_char **ak, *r;
+
+ memset(tsizes, 0, sizeof(tsizes));
+ depth += sizeof(TRECHEADER);
+ an = a + n;
+ for (ak = a; ak < an; ak++) {
+ histo[c = tr[**ak]]++;
+ tsizes[c] += ((RECHEADER *) (*ak -= depth))->length;
+ }
+
+ bin[0] = a;
+ bpmax = bin + 256;
+ tp = top, hp = histo;
+ for (bp = bin; bp < bpmax; bp++) {
+ *tp++ = *(bp+1) = *bp + (c = *hp);
+ *hp++ = 0;
+ if (c <= 1)
+ continue;
+ }
+ for(aj = a; aj < an; *aj = r, aj = bin[c+1])
+ for(r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;)
+ swap(*ak, r, t);
+
+ for (ak = a, c = 0; c < 256; c++) {
+ an = bin[c+1];
+ n = an - ak;
+ tsizes[c] += n * sizeof(TRECHEADER);
+ /* tell getnext how many elements in this bin, this segment. */
+ EWRITE(tsizes+c, sizeof(long), 1, fd);
+ sizes[c] += tsizes[c];
+ for (; ak < an; ++ak)
+ putrec((RECHEADER *) *ak, fd);
+ }
+}
View
60 usr.bin/sort/fsort.h
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fsort.h 8.1 (Berkeley) 6/6/93
+ */
+
+#define POW 20 /* exponent for buffer size */
+#define BUFSIZE (1 << POW)
+#define MAXNUM (BUFSIZE/10) /* lowish guess at average record size */
+#define BUFFEND (EOF-2)
+#define MAXFCT 1000
+#define MAXLLEN ((1 << min(POW-4, 16)) - 14)
+
+extern u_char **keylist, **l2buf, *buffer, *linebuf;
+
+/* temp files in the stack have a file descriptor, a largest bin (maxb)
+ * which becomes the last non-empty bin (lastb) when the actual largest
+ * bin is smaller than max(half the total file, BUFSIZE)
+ * Max_o is the offset of maxb so it can be sought after the other bins
+ * are sorted.
+*/
+struct tempfile {
+ FILE *fd;
+ u_char maxb;
+ u_char lastb;
+ long max_o;
+};
+extern struct tempfile fstack[MAXFCT];
View
326 usr.bin/sort/init.c
@@ -0,0 +1,326 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char sccsid[] = "@(#)init.c 8.1 (Berkeley) 6/6/93";
+#endif /* not lint */
+
+#include "sort.h"
+
+#include <ctype.h>
+#include <string.h>
+
+extern struct coldesc clist[(ND+1)*2];
+extern int ncols;
+u_char gweights[NBINS];
+
+/*
+ * clist (list of columns which correspond to one or more icol or tcol)
+ * is in increasing order of columns.
+ * Fields are kept in increasing order of fields.
+ */
+
+/*
+ * keep clist in order--inserts a column in a sorted array
+ */
+static void
+insertcol(field)
+ struct field *field;
+{
+ int i;
+ for (i = 0; i < ncols; i++)
+ if (field->icol.num <= clist[i].num)
+ break;
+ if (field->icol.num != clist[i].num) {
+ memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
+ clist[i].num = field->icol.num;
+ ncols++;
+ }
+ if (field->tcol.num && field->tcol.num != field->icol.num) {
+ for (i = 0; i < ncols; i++)
+ if (field->tcol.num <= clist[i].num)
+ break;
+ if (field->tcol.num != clist[i].num) {
+ memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
+ clist[i].num = field->tcol.num;
+ ncols++;
+ }
+ }
+}
+
+/*
+ * matches fields with the appropriate columns--n^2 but who cares?
+ */
+void
+fldreset(fldtab)
+ struct field *fldtab;
+{
+ int i;
+ fldtab[0].tcol.p = clist+ncols-1;
+ for (++fldtab; fldtab->icol.num; ++fldtab) {
+ for (i = 0; fldtab->icol.num != clist[i].num; i++);
+ fldtab->icol.p = clist + i;
+ if (!fldtab->tcol.num)
+ continue;
+ for (i = 0; fldtab->tcol.num != clist[i].num; i++);
+ fldtab->tcol.p = clist + i;
+ }
+}
+
+/*
+ * interprets a column in a -k field
+ */
+char *
+setcolumn(pos, cur_fld, gflag)
+ char *pos;
+ struct field *cur_fld;
+ int gflag;
+{
+ struct column *col;
+ int tmp;
+ col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol);
+ pos += sscanf(pos, "%d", &(col->num));
+ while (isdigit(*pos))
+ pos++;
+ if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
+ errx(2, "field numbers must be positive");
+ if (*pos == '.') {
+ if (!col->num)
+ errx(2, "cannot indent end of line");
+ pos += sscanf(++pos, "%d", &(col->indent));
+ while (isdigit(*pos))
+ pos++;
+ if (&cur_fld->icol == col)
+ col->indent--;
+ if (col->indent < 0)
+ errx(2, "illegal offset");
+ }
+ if (optval(*pos, cur_fld->tcol.num))
+ while (tmp = optval(*pos, cur_fld->tcol.num)) {
+ cur_fld->flags |= tmp;
+ pos++;
+ }
+ if (cur_fld->icol.num == 0)
+ cur_fld->icol.num = 1;
+ return (pos);
+}
+
+int
+setfield(pos, cur_fld, gflag)
+ char *pos;
+ struct field *cur_fld;
+ int gflag;
+{
+ static int nfields = 0;
+ int tmp;
+ char *setcolumn();
+ if (++nfields == ND)
+ errx(2, "too many sort keys. (Limit is %d)", ND-1);
+ cur_fld->weights = ascii;
+ cur_fld->mask = alltable;
+ pos = setcolumn(pos, cur_fld, gflag);
+ if (*pos == '\0') /* key extends to EOL. */
+ cur_fld->tcol.num = 0;
+ else {
+ if (*pos != ',')
+ errx(2, "illegal field descriptor");
+ setcolumn((++pos), cur_fld, gflag);
+ }
+ if (!cur_fld->flags)
+ cur_fld->flags = gflag;
+ tmp = cur_fld->flags;
+
+ /*
+ * Assign appropriate mask table and weight table.
+ * If the global weights are reversed, the local field
+ * must be "re-reversed".
+ */
+ if (((tmp & R) ^ (gflag & R)) && tmp & F)
+ cur_fld->weights = RFtable;
+ else if (tmp & F)
+ cur_fld->weights = Ftable;
+ else if ((tmp & R) ^ (gflag & R))
+ cur_fld->weights = Rascii;
+ if (tmp & I)
+ cur_fld->mask = itable;
+ else if (tmp & D)
+ cur_fld->mask = dtable;
+ cur_fld->flags |= (gflag & (BI | BT));
+ if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
+ cur_fld->flags &= (D|F|I|N|R|BI);
+ if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
+ && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
+ && cur_fld->tcol.indent < cur_fld->icol.indent))
+ errx(2, "fields out of order");
+ insertcol(cur_fld);
+ return (cur_fld->tcol.num);
+}
+
+int
+optval(desc, tcolflag)
+ int desc, tcolflag;
+{
+ switch(desc) {
+ case 'b':
+ if (!tcolflag)
+ return(BI);
+ else
+ return(BT);
+ case 'd': return(D);
+ case 'f': return(F);
+ case 'i': return(I);
+ case 'n': return(N);
+ case 'r': return(R);
+ default: return(0);
+ }
+}
+
+void
+fixit(argc, argv)
+ int *argc;
+ char **argv;
+{
+ int i, j, v, w, x;
+ static char vbuf[ND*20], *vpos, *tpos;
+ vpos = vbuf;
+
+ for (i = 1; i < *argc; i++) {
+ if (argv[i][0] == '+') {
+ tpos = argv[i]+1;
+ argv[i] = vpos;
+ vpos += sprintf(vpos, "-k");
+ tpos += sscanf(tpos, "%d", &v);
+ while (isdigit(*tpos))
+ tpos++;
+ vpos += sprintf(vpos, "%d", v+1);
+ if (*tpos == '.') {
+ tpos += sscanf(++tpos, "%d", &x);
+ vpos += sprintf(vpos, ".%d", x+1);
+ }
+ while (*tpos)
+ *vpos++ = *tpos++;
+ vpos += sprintf(vpos, ",");
+ if (argv[i+1] &&
+ argv[i+1][0] == '-' && isdigit(argv[i+1][1])) {
+ tpos = argv[i+1] + 1;
+ tpos += sscanf(tpos, "%d", &w);
+ while (isdigit(*tpos))
+ tpos++;
+ x = 0;
+ if (*tpos == '.') {
+ tpos += sscanf(++tpos, "%d", &x);
+ while (isdigit(*tpos))
+ *tpos++;
+ }
+ if (x) {
+ vpos += sprintf(vpos, "%d", w+1);
+ vpos += sprintf(vpos, ".%d", x);
+ } else
+ vpos += sprintf(vpos, "%d", w);
+ while (*tpos)
+ *vpos++ = *tpos++;
+ for (j= i+1; j < *argc; j++)
+ argv[j] = argv[j+1];
+ *argc -= 1;
+ }
+ }
+ }
+}
+
+/*
+ * ascii, Rascii, Ftable, and RFtable map
+ * REC_D -> REC_D; {not REC_D} -> {not REC_D}.
+ * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
+ * Note: when sorting in forward order, to encode character zero in a key,
+ * use \001\001; character 1 becomes \001\002. In this case, character 0
+ * is reserved for the field delimiter. Analagously for -r (fld_d = 255).
+ * Note: this is only good for ASCII sorting. For different LC 's,
+ * all bets are off. See also num_init in number.c
+ */
+void
+settables(gflags)
+ int gflags;
+{
+ u_char *wts;
+ int i, incr;
+ for (i=0; i < 256; i++) {
+ ascii[i] = i;
+ if (i > REC_D && i < 255 - REC_D+1)
+ Rascii[i] = 255 - i + 1;
+ else
+ Rascii[i] = 255 - i;
+ if (islower(i)) {
+ Ftable[i] = Ftable[i- ('a' -'A')];
+ RFtable[i] = RFtable[i - ('a' - 'A')];
+ } else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
+ Ftable[i] = i + 1;
+ RFtable[i] = Rascii[i] - 1;
+ } else {
+ Ftable[i] = i;
+ RFtable[i] = Rascii[i];
+ }
+ alltable[i] = 1;
+ if (i == '\n' || isprint(i))
+ itable[i] = 1;
+ else itable[i] = 0;
+ if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
+ dtable[i] = 1;
+ else dtable[i] = 0;
+ }
+ Rascii[REC_D] = RFtable[REC_D] = REC_D;
+ if (REC_D >= 'A' && REC_D < 'Z')
+ ++Ftable[REC_D + ('a' - 'A')];
+ if (gflags & R && (!(gflags & F) || !SINGL_FLD))
+ wts = Rascii;
+ else if (!(gflags & F) || !SINGL_FLD)
+ wts = ascii;
+ else if (gflags & R)
+ wts = RFtable;
+ else
+ wts = Ftable;
+ memmove(gweights, wts, sizeof(gweights));
+ incr = (gflags & R) ? -1 : 1;
+ for (i = 0; i < REC_D; i++)
+ gweights[i] += incr;
+ gweights[REC_D] = ((gflags & R) ? 255 : 0);
+ if (SINGL_FLD && gflags & F) {
+ for (i = 0; i < REC_D; i++) {
+ ascii[i] += incr;
+ Rascii[i] += incr;
+ }
+ ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
+ }
+}
View
304 usr.bin/sort/msort.c
@@ -0,0 +1,304 @@
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char sccsid[] = "@(#)msort.c 8.1 (Berkeley) 6/6/93";
+#endif /* not lint */
+
+#include "sort.h"
+#include "fsort.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/* Subroutines using comparisons: merge sort and check order */
+#define DELETE (1)
+#define LALIGN(n) ((n+3) & ~3)
+
+typedef struct mfile {
+ u_char *end;
+ short flno;
+ struct recheader rec[1];
+} MFILE;
+typedef struct tmfile {
+ u_char *end;
+ short flno;
+ struct trecheader rec[1];
+} TMFILE;
+u_char *wts, *wts1 = 0;
+struct mfile *cfilebuf;
+
+static int cmp __P((struct recheader *, struct recheader *));
+static int insert __P((struct mfile **, struct mfile **, int, int));
+
+void
+fmerge(binno, files, nfiles, get, outfd, fput, ftbl)
+ union f_handle files;
+ int binno, nfiles;
+ int (*get)();
+ FILE *outfd;
+ void (*fput)();
+ struct field *ftbl;
+{
+ FILE *tout;
+ int i, j, last;
+ void (*put)(struct recheader *, FILE *);
+ extern int geteasy();
+ struct tempfile *l_fstack;
+
+ wts = ftbl->weights;
+ if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
+ wts1 = (ftbl->flags & R) ? Rascii : ascii;
+ if (!cfilebuf)
+ cfilebuf = malloc(MAXLLEN + sizeof(TMFILE));
+
+ i = min(16, nfiles) * LALIGN(MAXLLEN+sizeof(TMFILE));
+ if (!buffer || i > BUFSIZE) {
+ buffer = buffer ? realloc(buffer, i) : malloc(i);
+ if (!buffer)
+ err(2, NULL);
+ if (!SINGL_FLD)
+ linebuf = malloc(MAXLLEN);
+ }
+
+ if (binno >= 0)
+ l_fstack = fstack + files.top;
+ else
+ l_fstack = fstack;
+ while (nfiles) {
+ put = putrec;
+ for (j = 0; j < nfiles; j += 16) {
+ if (nfiles <= 16) {
+ tout = outfd;
+ put = fput;
+ }
+ else
+ tout = ftmp();
+ last = min(16, nfiles - j);
+ if (binno < 0) {
+ for (i = 0; i < last; i++)
+ if (!(l_fstack[i+MAXFCT-1-16].fd =
+ fopen(files.names[j + i], "r")))
+ err(2, "%s", files.names[j+i]);
+ merge(MAXFCT-1-16, last, get, tout, put, ftbl);
+ }
+ else {
+ for (i = 0; i< last; i++)
+ rewind(l_fstack[i+j].fd);
+ merge(files.top+j, last, get, tout, put, ftbl);
+ }
+ if (nfiles > 16) l_fstack[j/16].fd = tout;
+ }
+ nfiles = (nfiles + 15) / 16;
+ if (nfiles == 1)
+ nfiles = 0;
+ if (binno < 0) {
+ binno = 0;
+ get = geteasy;
+ files.top = 0;
+ }
+ }
+}
+
+void
+merge(infl0, nfiles, get, outfd, put, ftbl)
+ int infl0, nfiles;
+ int (*get)();
+ void (*put)(struct recheader *, FILE *);
+ FILE *outfd;
+ struct field *ftbl;
+{
+ int c, i, j;
+ union f_handle dummy = {0};
+ struct mfile *flist[16], *cfile;
+ for (i = j = 0; i < nfiles; i++) {
+ cfile = (MFILE *) (buffer +
+ i * LALIGN(MAXLLEN + sizeof(TMFILE)));
+ cfile->flno = j + infl0;
+ cfile->end = cfile->rec->data + MAXLLEN;
+ for (c = 1; c == 1;) {
+ if (EOF == (c = get(j+infl0, dummy, nfiles,
+ cfile->rec, cfile->end, ftbl))) {
+ --i;
+ --nfiles;
+ break;
+ }
+ if (i)
+ c =