Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 355 lines (320 sloc) 9.43 KB
#!/bin/sh
#
# rfcstrip --
#
# Extract code from text files, like RFCs or I-Ds.
#
# This program is a slightly patched version of smistrip. In addition to
# MIB modules, it recognizes YANG modules and the markers:
# <CODE BEGINS> file "name-of-file"
# <CODE ENDS>
#
# OR:
# <CODE BEGINS> file
# "very-long-name-of-file"
# <CODE ENDS>
#
# Modified by Martin Bjorklund, Tail-f Systems.
#
# smistrip:
# Copyright (c) 1999 Frank Strauss, Technical University of Braunschweig.
#
# See the file "COPYING" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
# NOTE, that this script relies on awk (tested with GNU awk) and getopts
# (shell builtin like in bash or standalone).
#
# History:
# 0.6 - 2019-02-22
# handle -f option properly for YANG files without <CODE BEGINS>
# handle <CODE BEGINS> file with the filename on next line;
# needed for long file names
# 0.5 - 2018-10-08
# fixed bug when a line with a single '}' is not properly extracted
# 0.4 - 2018-08-13
# fixes #1 - liberal parsing of <CODE BEGINS>
# 0.3 - 2018-05-17
# handle -f option
AWK=/usr/bin/awk
GETOPTS=getopts
VERSION=0.6
do_version () {
echo "rfcstrip $VERSION"
}
do_usage () {
echo "Usage: rfcstrip [-Vhn] [-i dir] [-d dir] [-f file] file1 [file2 [...]]"
echo "-V show version"
echo "-h show usage information"
echo "-n do not write files"
echo "-i dir try to read files from directory dir"
echo "-d dir write file to directory dir"
echo "-f file strip only the specified file"
echo "file1 ... input files to parse (RFCs, I-Ds, ...)"
}
do_strip () {
if [ "$indir" ] ; then
FILE="$indir/$1"
else
FILE="$1"
fi
if [ ! -f "$FILE" -a -f "$FILE.gz" ] ; then
FILE="$FILE.gz"
fi
echo "$FILE" | grep -q '\.gz$'
if [ $? -ne 0 ] ; then
CMD=cat
else
CMD=zcat
fi
$CMD "$FILE" | \
tr -d '\015' | \
$AWK -vtest="$test" -vdir="$dir" -vsingle="$single" '
BEGIN {
type = 0
gen_marker_header = 0
}
# generic start marker - we are a bit liberal and accept zero or more
# spaces between the mandatory tokens
type == 0 && /^[ \t]*<CODE BEGINS>[ \t]*file[ \t]*"(.*)"/ {
# extract the filename
match($0, "\"(.*)\"")
file = substr($0, RSTART+1, RLENGTH-2)
preskip = 3
skip = 5
skipped = -1
n = 0
type = 1
delete line
next
}
# generic start marker, with (hopefully) filename on next line */
type == 0 && /^[ \t]*<CODE BEGINS>[ \t]*file[ \t]*$/ {
gen_marker_header = 1
delete line
next
}
type == 0 && gen_marker_header == 1 && /^[ \t]*"(.*)"*$/ {
# extract the filename
match($0, "\"(.*)\"")
file = substr($0, RSTART+1, RLENGTH-2)
preskip = 3
skip = 5
skipped = -1
n = 0
type = 1
delete line
next
}
# start of SMI module
type == 0 &&
/^[ \t]*[A-Za-z0-9-]* *(PIB-)?DEFINITIONS *(::=)? *(BEGIN)? *$/ {
file = $1
preskip = 3
skip = 4
skipped = -1
macro = 0
n = 0
type = 2
delete line
}
# start of YANG module
type == 0 &&
/^[ \t]*(sub)?module +([A-Za-z0-9-]*) *{ *$/ {
module = $2
file = module".yang"
modindent = match($0, "[^ \t]")
preskip = 3
skip = 4
skipped = -1
n = 0
type = 3
delete line
}
# process each line in the file
type != 0 {
# at the end of a page we go back one line (which is expected to
# be a separator line), and start the counter skipped to skip the
# next few lines.
if ($0 ~ /\[[pP]age [iv0-9]*\] */) {
for (i = 0 ; i < preskip ; i++) {
n--
if (!(line[n] ~ /^[ \t]*$/)) {
print "WARNING: the line:: "line[n]":: \
should be blank before a page break. It was kept. "
n++
break
}
}
# some drafts do not use that separator line. so keep it if
# there are non-blank characters.
skipped = 0
}
# if we are skipping...
if (skipped >= 0) {
skipped++
# if we have skipped enough lines to the top of the next page...
if (skipped > skip) {
skipped = -1
} else {
# finish skipping if we find a non-empty line, but not before
# we have skipped three lines.
if ((skipped > 3) && ($0 ~ /[^ \t]/)) {
skipped = -1
}
}
}
# so, if we are not skipping, remember the line.
if (skipped == -1) {
line[n++] = $0
}
}
# remember when we enter a macro definition
type == 2 && /^[ \t]*[A-Za-z0-9-]* *MACRO *::=/ {
macro = 1
}
# generic end marker
type == 1 && /^[ \t]*<CODE ENDS>.*$/ {
if ((length(single) == 0) || (single == file)) {
n--
strip = 99
for (i=0 ; i < n ; i++) {
# find the minimum column that contains non-blank characters
# in order to cut a blank prefix off. Ignore lines that only
# contain white spaces.
if (!(line[i] ~ /^[ \t]*$/)) {
p = match(line[i], "[^ \t]")
if ((p < strip) && (length(line[i]) >= p)) { strip = p }
}
}
if (dir) {
f = dir"/"file
} else {
f = file
}
# skip empty lines in the beginning
j = 0
for (i=0 ; i < n ; i++) {
if (!(line[i] ~ /^[ \t]*$/)) {
break
}
j++
}
# skip empty lines at the end
m = n-1
for (i=n-1 ; i >= 0 ; i--) {
if (!(line[i] ~ /^[ \t]*$/)) {
break
}
m--
}
if (test != "1") {
for (i = j ; i <= m ; i++) {
print substr(line[i], strip) >f
}
}
print f ": " 1+m-j " lines."
}
file = ""
type = 0
gen_marker_header = 0
}
# end of SMI module
type == 2 && /^[ \t]*END[ \t]*$/ {
if (macro == 0) {
if ((length(single) == 0) || (single == file)) {
strip = 99
for (i=0 ; i < n ; i++) {
# find the minimum column that contains non-blank characters
# in order to cut a blank prefix off. Ignore lines that only
# contain white spaces.
if (!(line[i] ~ /^[ \t]*$/)) {
p = match(line[i], "[^ ]")
if ((p < strip) && (length(line[i]) >= p)) { strip = p }
}
}
if (dir) {
f = dir"/"file
} else {
f = file
}
if (test != "1") {
for (i=0 ; i < n ; i++) {
print substr(line[i], strip) >f
}
}
print file ": " n " lines."
}
file = ""
type = 0
} else {
macro = 0
}
}
# end of YANG module
type == 3 && /^[ \t]*}.*$/ {
indent = match($0, "[^ \t]")
if (indent == modindent) {
modindent = -1
# we assume that a module is ended with a single "}" with the
# same indentation level as the module statement.
if ((length(single) == 0) || (single == file)) {
strip = 99
for (i=0 ; i < n ; i++) {
# find the minimum column that contains non-blank characters
# in order to cut a blank prefix off. Ignore lines that only
# contain white spaces.
if (!(line[i] ~ /^[ \t]*$/)) {
p = match(line[i], "[^ ]")
if ((p < strip) && (length(line[i]) >= p)) { strip = p }
}
}
if (test != "1") {
if (dir) {
f = dir"/"file
} else {
f = file
}
for (i=0 ; i < n ; i++) {
print substr(line[i], strip) >f
}
}
print f ": " n " lines."
}
module = ""
file = ""
type = 0
}
}
'
}
while $GETOPTS Vhnm:i:d:f: c ; do
case $c in
n) test=1
;;
f) single=$OPTARG
;;
i) indir=$OPTARG
;;
d) dir=$OPTARG
;;
h) do_usage
exit 0
;;
V) do_version
exit 0
;;
*) do_usage
exit 1
;;
esac
done
shift `expr $OPTIND - 1`
if [ $# -eq 0 ] ; then
do_strip -
else
for f in "$@" ; do
do_strip "$f"
done
fi
exit 0
You can’t perform that action at this time.