diff --git a/adjacent_uniq.go b/adjacent_uniq.go new file mode 100644 index 0000000..da5161d --- /dev/null +++ b/adjacent_uniq.go @@ -0,0 +1,32 @@ +package uniq2 + +/* +AdjacentUniqer is an implementation of uniqer for adjacent lines. +*/ +type AdjacentUniqer struct { + prev string + firstLine bool +} + +/* +NewAdjacentUniqer creates an instance of AdjacentUniqer. +*/ +func NewAdjacentUniqer() *AdjacentUniqer { + return &AdjacentUniqer{prev: "", firstLine: true} +} + +/* +StreamLine tries to remove the duplicated line. +*/ +func (au *AdjacentUniqer) StreamLine(line string) bool { + if au.firstLine { + au.prev = line + au.firstLine = false + return true + } + if au.prev == line { + return false + } + au.prev = line + return true +} diff --git a/cmd/uniq2/main.go b/cmd/uniq2/main.go index 4482d01..70376b9 100644 --- a/cmd/uniq2/main.go +++ b/cmd/uniq2/main.go @@ -5,7 +5,7 @@ import ( "os" flag "github.com/spf13/pflag" - "github.com/tamada/uniq2/lib" + "github.com/tamada/uniq2" ) /* @@ -19,6 +19,7 @@ OPTIONS -a, --adjacent delete only adjacent duplicated lines. -d, --delete-lines only prints deleted lines. -i, --ignore-case case sensitive. + -s, --show-counts show counts. -h, --help print this message. INPUT gives file name of input. If argument is single dash ('-') @@ -27,20 +28,27 @@ OUTPUT represents the destination. `, appName) } -func perform(flags *flag.FlagSet, opts *lib.Options) int { - var args, err = lib.NewArguments(opts, flags.Args()[1:]) - defer args.Close() +func printError(err error, statusCode int) int { if err == nil { - err = args.Perform() + return 0 } + fmt.Println(err.Error()) + return statusCode +} + +func perform(flags *flag.FlagSet, opts *uniq2.Parameters) int { + var args, err = uniq2.NewArguments(flags.Args()[1:]) if err != nil { - fmt.Println(err.Error()) - return 1 + return printError(err, 1) } - return 0 + defer args.Close() + err = args.Perform(opts) + return printError(err, 2) } func goMain() int { + // defer profile.Start(profile.ProfilePath(".")).Stop() + var flags, opts = buildFlagSet() var err = flags.Parse(os.Args) if err == nil { @@ -51,8 +59,8 @@ func goMain() int { return 0 } -func buildFlagSet() (*flag.FlagSet, *lib.Options) { - var opts = lib.Options{} +func buildFlagSet() (*flag.FlagSet, *uniq2.Parameters) { + var opts = uniq2.Parameters{} var flags = flag.NewFlagSet("uniq2", flag.ContinueOnError) flags.Usage = func() { printHelp("uniq2") } flags.BoolVarP(&opts.Adjacent, "adjacent", "a", false, "delete only the adjacent duplicate lines") @@ -62,7 +70,6 @@ func buildFlagSet() (*flag.FlagSet, *lib.Options) { } func main() { - // separates main function in order to run defers before exit. var exitStatus = goMain() os.Exit(exitStatus) } diff --git a/lib/uniq2.go b/lib/uniq2.go deleted file mode 100644 index c32d2eb..0000000 --- a/lib/uniq2.go +++ /dev/null @@ -1,180 +0,0 @@ -package lib - -import ( - "bufio" - "fmt" - "io" - "os" - "strings" -) - -/* -Options represents option parameter values. -*/ -type Options struct { - Adjacent bool - ShowCounts bool - DeleteLines bool - IgnoreCase bool -} - -/* -Arguments represents the command line arguments. -*/ -type Arguments struct { - Options *Options - Input io.Reader - Output io.Writer -} - -type entry struct { - line string - duplicatedCount int - uniqCount int -} - -func closeImpl(value interface{}) { - var closer, ok = value.(io.Closer) - if ok { - closer.Close() - } -} - -/* -Close finalize the files of Arguments. -*/ -func (args *Arguments) Close() { - closeImpl(args.Input) - closeImpl(args.Output) -} - -/* -NewArguments construct an instance of Arguments with the given parameters. -*/ -func NewArguments(opts *Options, args []string) (*Arguments, error) { - var arguments = Arguments{Options: opts} - var input, output, err = parseCliArguments(args) - arguments.Input = input - arguments.Output = output - return &arguments, err -} - -func parseCliArguments(args []string) (*os.File, *os.File, error) { - switch len(args) { - case 0: - return os.Stdin, os.Stdout, nil - case 1: - var input, err = createInput(args[0]) - return input, os.Stdout, err - case 2: - var input, output *os.File - var err error - input, err = createInput(args[0]) - if err == nil { - output, err = createOutput(args[1]) - } - return input, output, err - } - return nil, nil, fmt.Errorf("too many arguments: %v", args) -} - -func createOutput(output string) (*os.File, error) { - if output == "-" { - return os.Stdout, nil - } - return os.OpenFile(output, os.O_CREATE|os.O_WRONLY, 0644) -} - -func createInput(input string) (*os.File, error) { - if input == "-" { - return os.Stdin, nil - } - return os.Open(input) -} - -/* -Perform reads files from args.Input and writes result to args.Output. -*/ -func (args *Arguments) Perform() error { - var scanner = bufio.NewScanner(args.Input) - var writer = bufio.NewWriter(args.Output) - - return args.runUnique(scanner, writer) -} - -func isPrint(uniqFlag bool, deleteLineFlag bool) bool { - return !uniqFlag && !deleteLineFlag || - uniqFlag && deleteLineFlag -} - -func (e *entry) increment(flag bool) { - if flag { - e.uniqCount++ - } else { - e.duplicatedCount++ - } -} - -func updateDatabase(line string, uniqFlag bool, entries []*entry) []*entry { - for i, entry := range entries { - if entry.line == line { - entries[i].increment(uniqFlag) - } - } - return entries -} - -func upsertDatabase(line string, uniqFlag bool, entries []*entry) []*entry { - if uniqFlag { - return updateDatabase(line, uniqFlag, entries) - } - var entry = &entry{line: line} - entry.increment(uniqFlag) - return append(entries, entry) -} - -func (args *Arguments) runUnique(scanner *bufio.Scanner, writer *bufio.Writer) error { - var entries = []*entry{} - for scanner.Scan() { - var line = scanner.Text() - var uniqFlag, lineToDB = args.isUniqLine(line, entries) - entries = upsertDatabase(lineToDB, uniqFlag, entries) - if isPrint(uniqFlag, args.Options.DeleteLines) { - writer.WriteString(line) - writer.WriteString("\n") - } - } - writer.Flush() - return nil -} - -func (opts *Options) match(readLine string, lineOfDB *entry) bool { - return readLine == lineOfDB.line -} - -func (opts *Options) isFoundLineInAdjacentDB(line string, list []*entry) bool { - if len(list) == 0 { - return false - } - return opts.match(line, list[len(list)-1]) -} - -func (opts *Options) isFoundLineInDB(line string, list []*entry) bool { - for _, lineInList := range list { - if line == lineInList.line { - return true - } - } - return false -} - -func (args *Arguments) isUniqLine(line string, list []*entry) (flag bool, lineToDB string) { - lineToDB = line - if args.Options.IgnoreCase { - lineToDB = strings.ToLower(line) - } - if args.Options.Adjacent { - return args.Options.isFoundLineInAdjacentDB(lineToDB, list), lineToDB - } - return args.Options.isFoundLineInDB(lineToDB, list), lineToDB -} diff --git a/lib/uniq2_test.go b/lib/uniq2_test.go deleted file mode 100644 index dd76f72..0000000 --- a/lib/uniq2_test.go +++ /dev/null @@ -1,134 +0,0 @@ -package lib - -import ( - "bytes" - "fmt" - "io" - "os" - "strings" - "testing" -) - -func open(fileName string) *os.File { - var file, _ = os.Open(fileName) - return file -} - -func TestSample(t *testing.T) { - var testdata = []struct { - message string - target io.Reader - fileFlag bool - }{ - {"stdin", os.Stdin, true}, - {"file", open("../testdata/test1.txt"), true}, - {"buffer", &bytes.Buffer{}, false}, - } - - for _, td := range testdata { - var _, ok = td.target.(*os.File) - if ok != td.fileFlag { - t.Errorf("conversion error: %v", td) - } - } -} - -func deleteFile(path string) { - os.Remove(path) -} - -func TestOpenFile(t *testing.T) { - var testdata = []struct { - args []string - buildError bool - inputPath string - outputPath string - }{ - {[]string{"../testdata/test1.txt", "../testdata/dest1.txt"}, false, "../testdata/test1.txt", "../testdata/dest1.txt"}, - {[]string{"../testdata/not_exist.txt"}, true, "", ""}, - } - for _, td := range testdata { - var args, err = NewArguments(&Options{}, td.args) - defer args.Close() - defer deleteFile(td.outputPath) - if (err == nil) == td.buildError { - t.Errorf("%v: unexpected build arguments error: %v", td.args, err) - } - if err == nil { - var inputFile = args.Input.(*os.File) - var outputFile = args.Output.(*os.File) - if inputFile.Name() != td.inputPath { - t.Errorf("%v: input did not match, wont: %v, got: %v", td.args, td.inputPath, args.Input) - } - if outputFile.Name() != td.outputPath { - t.Errorf("%v: output did not match, wont: %v, got: %v", td.args, td.outputPath, args.Output) - } - } - } -} - -func TestNewArguments(t *testing.T) { - var testdata = []struct { - args []string - buildError string - wontInput io.Reader - wontOutput io.Writer - }{ - {[]string{}, "", os.Stdin, os.Stdout}, - {[]string{"-"}, "", os.Stdin, os.Stdout}, - {[]string{"-", "-"}, "", os.Stdin, os.Stdout}, - {[]string{"-", "-", "-"}, fmt.Sprintf("too many arguments: %v", []string{"-", "-", "-"}), nil, nil}, - } - - for _, td := range testdata { - var args, err = NewArguments(&Options{}, td.args) - if err != nil && err.Error() != td.buildError { - t.Errorf("%v: build arguments error: wont: %v, got: %v", td.args, td.buildError, err) - } - if err == nil { - if args.Input != td.wontInput { - t.Errorf("%v: input did not match, wont: %v, got: %v", td.args, td.wontInput, args.Input) - } - if args.Output != td.wontOutput { - t.Errorf("%v: output did not match, wont: %v, got: %v", td.args, td.wontOutput, args.Output) - } - } - } -} - -func TestPerform(t *testing.T) { - var testdata = []struct { - opts *Options - input string - result string - }{ - {&Options{}, "../testdata/test1.txt", "a1+a2+a3+a4+A1"}, - {&Options{Adjacent: true}, "../testdata/test1.txt", "a1+a2+a3+a4+a1+A1"}, - {&Options{IgnoreCase: true}, "../testdata/test1.txt", "a1+a2+a3+a4"}, - {&Options{Adjacent: true, IgnoreCase: true}, "../testdata/test1.txt", "a1+a2+a3+a4+a1"}, - {&Options{DeleteLines: true}, "../testdata/test1.txt", "a1+a2+a1"}, - {&Options{Adjacent: true, DeleteLines: true}, "../testdata/test1.txt", "a1+a2"}, - {&Options{IgnoreCase: true, DeleteLines: true}, "../testdata/test1.txt", "a1+a2+a1+A1"}, - {&Options{Adjacent: true, IgnoreCase: true, DeleteLines: true}, "../testdata/test1.txt", "a1+a2+A1"}, - } - - for _, td := range testdata { - var inputFile, _ = os.Open(td.input) - defer inputFile.Close() - var output = bytes.Buffer{} - var args = Arguments{Options: td.opts, Input: inputFile, Output: &output} - args.Perform() - var result = convertLnToPlus(output.String()) - if result != td.result { - t.Errorf("test failed on option %v, wont: %s, got: %s", td.opts, td.result, result) - } - } -} - -func convertLnToPlus(string string) string { - var lines = strings.Split(strings.TrimSpace(string), "\n") - for i, line := range lines { - lines[i] = strings.TrimSpace(line) - } - return strings.Join(lines, "+") -} diff --git a/uniq2.go b/uniq2.go new file mode 100644 index 0000000..04cc732 --- /dev/null +++ b/uniq2.go @@ -0,0 +1,96 @@ +package uniq2 + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" +) + +/* +Arguments shows data source and destination. +*/ +type Arguments struct { + input io.ReadCloser + output io.WriteCloser +} + +/* +NewArguments creates an instance of Arguments from given args. +*/ +func NewArguments(args []string) (*Arguments, error) { + input, output, err := parseCliArguments(args) + if err != nil { + return nil, err + } + return &Arguments{input: input, output: output}, nil +} + +/* +Close closes data source and destination. +*/ +func (args *Arguments) Close() { + args.input.Close() + args.output.Close() +} + +/* +Perform executes Uniq2 by following the given Parameters. +*/ +func (args *Arguments) Perform(opts *Parameters) error { + uniqer := opts.BuildUniqer() + return args.performImpl(uniqer) + +} + +func (args *Arguments) performImpl(uniqer Uniqer) error { + reader := bufio.NewReader(args.input) + writer := bufio.NewWriter(args.output) + for { + line, err := reader.ReadString('\n') + if err == io.EOF { + break + } + line = strings.TrimSpace(line) + if uniqer.StreamLine(line) { + writer.WriteString(line) + writer.WriteString("\n") + } + } + writer.Flush() + return nil +} + +func createOutput(output string) (*os.File, error) { + if output == "-" { + return os.Stdout, nil + } + return os.OpenFile(output, os.O_CREATE|os.O_WRONLY, 0644) +} + +func createInput(input string) (*os.File, error) { + if input == "-" { + return os.Stdin, nil + } + return os.Open(input) +} + +func parseCliArguments(args []string) (io.ReadCloser, io.WriteCloser, error) { + switch len(args) { + case 0: + return os.Stdin, os.Stdout, nil + case 1: + var input, err = createInput(args[0]) + return input, os.Stdout, err + case 2: + var input, output *os.File + var err error + input, err = createInput(args[0]) + if err == nil { + output, err = createOutput(args[1]) + } + return input, output, err + } + return nil, nil, fmt.Errorf("too many arguments: %v", args) +} diff --git a/uniq2_api.go b/uniq2_api.go new file mode 100644 index 0000000..2dfa28c --- /dev/null +++ b/uniq2_api.go @@ -0,0 +1,175 @@ +package uniq2 + +import ( + "strings" +) + +/* +Uniqer removes duplicated lines of given files by certain algorithm. +*/ +type Uniqer interface { + /* + StreamLine pours lines from reader and returns the given line should show or not show. + */ + StreamLine(line string) (uniqFlag bool) +} + +/* +InverseUniqer negates the result from the other Uniqer. +*/ +type InverseUniqer struct { + uniqer Uniqer +} + +/* +StreamLine tries to remove the duplicated line. +*/ +func (iu *InverseUniqer) StreamLine(line string) bool { + return !iu.uniqer.StreamLine(line) +} + +/* +FilterUniqer composes Filter and Uniqer. +*/ +type FilterUniqer interface { + StreamLine(line string) (uniqFlag bool) + Filter(line string) string +} + +/* +BasicFilterUniqer is the default implementation of FilterUniqer. +*/ +type BasicFilterUniqer struct { + filter Filter + uniqer Uniqer +} + +/* +Filter filters given string. +*/ +func (bfu *BasicFilterUniqer) Filter(line string) string { + return bfu.filter.Filter(line) +} + +/* +StreamLine tries to remove the duplicated line. +*/ +func (bfu *BasicFilterUniqer) StreamLine(line string) (uniqFlag bool) { + return bfu.uniqer.StreamLine(bfu.Filter(line)) +} + +/* +Filter is an interface for filtering given line. +*/ +type Filter interface { + Filter(line string) string +} + +/* +Parameters represents option parameter values. +*/ +type Parameters struct { + Adjacent bool + ShowCounts bool + DeleteLines bool + IgnoreCase bool +} + +func (params *Parameters) String() string { + types := []string{} + if params.Adjacent { + types = append(types, "adjacent") + } + if params.ShowCounts { + types = append(types, "show-counts") + } + if params.DeleteLines { + types = append(types, "delete-lines") + } + if params.IgnoreCase { + types = append(types, "ignore-case") + } + return strings.Join(types, ",") +} + +/* +BuildUniqer creates suitable Uniqer following params, the receiver. +*/ +func (params *Parameters) BuildUniqer() Uniqer { + filter := params.buildFilter() + uniqer := createUniqer(params.Adjacent) + if params.DeleteLines { + uniqer = &InverseUniqer{uniqer: uniqer} + } + return &BasicFilterUniqer{filter: filter, uniqer: uniqer} +} + +func createUniqer(adjacent bool) Uniqer { + if adjacent { + return NewAdjacentUniqer() + } + return NewWholeLineUniqer() +} + +func (params *Parameters) buildFilter() Filter { + filters := []Filter{} + if params.IgnoreCase { + filters = append(filters, &IgnoreCaseFilter{}) + } + if params.ShowCounts { + filters = append(filters, &CountLineFilter{counts: map[string]int{}}) + } + return &MultipleFilter{filters: filters} +} + +/* +MultipleFilter contains multiple filters, and apply filters by the order. +*/ +type MultipleFilter struct { + filters []Filter +} + +/* +Filter filters given string. +*/ +func (mf *MultipleFilter) Filter(line string) string { + for _, filter := range mf.filters { + line = filter.Filter(line) + } + return line +} + +/* +CountLineFilter counts lines. +*/ +type CountLineFilter struct { + counts map[string]int +} + +/* +Filter filters given string. +*/ +func (clf *CountLineFilter) Filter(line string) string { + clf.counts[line] = clf.counts[line] + 1 + return line +} + +/* +Counts returns count of the line. +*/ +func (clf *CountLineFilter) Counts(line string) int { + return clf.counts[line] +} + +/* +IgnoreCaseFilter shows an filter of ignoring case. +*/ +type IgnoreCaseFilter struct { +} + +/* +Filter filters given string. +*/ +func (icf *IgnoreCaseFilter) Filter(line string) string { + return strings.ToLower(line) +} diff --git a/uniq2_api_test.go b/uniq2_api_test.go new file mode 100644 index 0000000..c1463b1 --- /dev/null +++ b/uniq2_api_test.go @@ -0,0 +1,63 @@ +package uniq2 + +import "testing" + +func TestParametersString(t *testing.T) { + testdata := []struct { + giveParams *Parameters + wontString string + }{ + {&Parameters{Adjacent: false, ShowCounts: false, DeleteLines: false, IgnoreCase: false}, ""}, + {&Parameters{Adjacent: true, ShowCounts: false, DeleteLines: false, IgnoreCase: false}, "adjacent"}, + {&Parameters{Adjacent: false, ShowCounts: true, DeleteLines: false, IgnoreCase: false}, "show-counts"}, + {&Parameters{Adjacent: false, ShowCounts: false, DeleteLines: true, IgnoreCase: false}, "delete-lines"}, + {&Parameters{Adjacent: false, ShowCounts: false, DeleteLines: false, IgnoreCase: true}, "ignore-case"}, + {&Parameters{Adjacent: true, ShowCounts: true, DeleteLines: false, IgnoreCase: false}, "adjacent,show-counts"}, + {&Parameters{Adjacent: true, ShowCounts: true, DeleteLines: true, IgnoreCase: false}, "adjacent,show-counts,delete-lines"}, + {&Parameters{Adjacent: true, ShowCounts: true, DeleteLines: true, IgnoreCase: true}, "adjacent,show-counts,delete-lines,ignore-case"}, + {&Parameters{Adjacent: false, ShowCounts: true, DeleteLines: true, IgnoreCase: false}, "show-counts,delete-lines"}, + {&Parameters{Adjacent: false, ShowCounts: true, DeleteLines: true, IgnoreCase: true}, "show-counts,delete-lines,ignore-case"}, + {&Parameters{Adjacent: false, ShowCounts: false, DeleteLines: true, IgnoreCase: true}, "delete-lines,ignore-case"}, + } + for _, td := range testdata { + gotString := td.giveParams.String() + if td.wontString != gotString { + t.Errorf("Parameters(%s).String() did not match, wont %s, got %s", td.giveParams.String(), td.wontString, gotString) + } + } +} + +func TestUniq2BuildUniqer(t *testing.T) { + testdata := []struct { + params *Parameters + filterSize int + inverseUniqer bool + adjacentUniqer bool + wholeUniqer bool + }{ + {&Parameters{}, 0, false, false, true}, + {&Parameters{Adjacent: true}, 0, false, true, false}, + {&Parameters{DeleteLines: true}, 0, true, false, false}, + {&Parameters{ShowCounts: true, Adjacent: true}, 1, false, true, false}, + {&Parameters{IgnoreCase: true, ShowCounts: true, Adjacent: true}, 2, false, true, false}, + } + for _, td := range testdata { + uniqer, _ := td.params.BuildUniqer().(*BasicFilterUniqer) + filter, _ := uniqer.filter.(*MultipleFilter) + if len(filter.filters) != td.filterSize { + t.Errorf("filter size of buildUniqer({ %s }) did not match, wont %d, got %d", td.params.String(), td.filterSize, len(filter.filters)) + } + _, inverseFlag := uniqer.uniqer.(*InverseUniqer) + _, adjacentFlag := uniqer.uniqer.(*AdjacentUniqer) + _, wholeLineFlag := uniqer.uniqer.(*WholeLineUniqer) + if inverseFlag != td.inverseUniqer { + t.Errorf("type error InverseUniqer by buildUniqer({ %s })", td.params.String()) + } + if adjacentFlag != td.adjacentUniqer { + t.Errorf("type error AdjacentUniqer by buildUniqer({ %s })", td.params.String()) + } + if wholeLineFlag != td.wholeUniqer { + t.Errorf("type error InverseUniqer by buildUniqer({ %s })", td.params.String()) + } + } +} diff --git a/wholeline_uniq.go b/wholeline_uniq.go new file mode 100644 index 0000000..057e470 --- /dev/null +++ b/wholeline_uniq.go @@ -0,0 +1,26 @@ +package uniq2 + +/* +WholeLineUniqer is an implementation of uniqer for whole lines. +*/ +type WholeLineUniqer struct { + lines map[string]bool +} + +/* +NewWholeLineUniqer creates an instance of WholeLineUniqer. +*/ +func NewWholeLineUniqer() *WholeLineUniqer { + return &WholeLineUniqer{lines: map[string]bool{}} +} + +/* +StreamLine tries to remove the duplicated line. +*/ +func (wlu *WholeLineUniqer) StreamLine(line string) (isUniq bool) { + _, ok := wlu.lines[line] + if !ok { + wlu.lines[line] = true + } + return !ok +}