Permalink
Browse files

MB-7758: implemented faster misc:dir_size

On my box it appears to be massive improvement.

(n_0@10.17.37.209)9> timer:tc(dir_size, get, [BD]).
{734,3357296}
(n_0@10.17.37.209)10> timer:tc(dir_size, get, [BD]).
{692,3357296}
(n_0@10.17.37.209)11> timer:tc(dir_size, get, [BD]).
{962,3357296}
(n_0@10.17.37.209)12> timer:tc(dir_size, get_slow, [BD]).
{13695,3357296}
(n_0@10.17.37.209)13> timer:tc(dir_size, get_slow, [BD]).
{14616,3357296}
(n_0@10.17.37.209)14> timer:tc(dir_size, get_slow, [BD]).
{13059,3357296}

Unfortunately while go is easy to cross-compile and run almost
anywhere, most of cost is some initialization of go executable. I
think 3 milliseconds is a still a bit on higher side. So we have to
run it as port.

We're sending port netstring of path, and receive LF-terminated json.

Change-Id: I2f153bbd700809b9afe23141b6fe6e8bc958ad83
Reviewed-on: http://review.couchbase.org/24617
Tested-by: Aliaksey Kandratsenka <alkondratenko@gmail.com>
Reviewed-by: Aliaksey Artamonau <aliaksiej.artamonau@gmail.com>
  • Loading branch information...
1 parent 3a5aeba commit 6f7569a7915d37fd94397bb579a60c3543dbd8ec Aliaksey Kandratsenka committed with alk Feb 15, 2013
View
@@ -120,6 +120,7 @@ do-install:
cp -r ebin $(NS_SERVER_LIBDIR)/
mkdir -p $(NS_SERVER_LIBDIR)/priv
cp -r priv/public $(NS_SERVER_LIBDIR)/priv/
+ cp priv/i386-linux-godu priv/i386-win32-godu.exe $(NS_SERVER_LIBDIR)/priv/
mkdir -p $(ERLWSH_LIBDIR)
cp -r deps/erlwsh/ebin $(ERLWSH_LIBDIR)/
cp -r deps/erlwsh/priv $(ERLWSH_LIBDIR)/
View
@@ -0,0 +1,19 @@
+
+Simple golang implementation of du
+==================================
+
+Erlang is too slow at walking directories and computing their
+aggregate size. This is simple implementation that speeds it up.
+
+You normally don't need to rebuild anything here. I expect this
+program to change very infrequently. So we're keeping produced
+executables (and go produces easily usable static executables) in
+git. Simple cross-compilation and static executables are reason why
+I've chosen golang for this.
+
+If you need to build anything, use build-stuff.sh script. But you'll
+need to setup your go toolchain for cross-compilation.
+
+I've followed instructions at to setup go toolchain for
+cross-compilation
+http://dave.cheney.net/2012/09/08/an-introduction-to-cross-compilation-with-go
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+echo "see README in build-stuff.sh directory for go cross compilation setup instructions"
+
+cd `dirname $0` || exit $?
+
+export CGO_ENABLED=0
+
+set -x
+
+GOOS=windows GOARCH=386 go build -o ../../priv/i386-win32-godu.exe || exit $?
+
+GOOS=linux GOARCH=386 go build -o ../../priv/i386-linux-godu || exit $?
View
@@ -0,0 +1,153 @@
+package main
+
+import (
+ "bufio"
+ "encoding/json"
+ "fmt"
+ "io"
+ "os"
+ "strconv"
+ "strings"
+ "time"
+)
+
+func readdir(path string) (infos []os.FileInfo, err error) {
+ f, err := os.Open(".")
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+ infos, err = f.Readdir(0)
+ return
+}
+
+var errorCount int = 0
+var lastError error
+
+func traverse(entry string) uint64 {
+ var infos []os.FileInfo
+ var rv uint64 = 0
+
+ old, err := os.Getwd()
+ if err != nil {
+ goto exit
+ }
+ defer os.Chdir(old)
+
+ err = os.Chdir(entry)
+ if err != nil {
+ goto exit
+ }
+
+ infos, err = readdir(".")
+ if err != nil {
+ errorCount += 1
+ lastError = err
+ if infos == nil {
+ goto exit
+ }
+ }
+
+ for _, info := range infos {
+ mode := info.Mode()
+ if (mode & os.ModeType) == 0 {
+ rv += (uint64)(info.Size())
+ } else if (mode & os.ModeDir) != 0 {
+ rv += traverse(info.Name())
+ }
+ }
+ return rv
+exit:
+ errorCount += 1
+ lastError = err
+ return 0
+}
+
+func doRun(path string) []byte {
+ before := time.Now()
+
+ size := traverse(path)
+
+ outputMap := map[string]interface{}{
+ "size": size,
+ "errorCount": errorCount,
+ "lastError": nil,
+ }
+ if lastError != nil {
+ outputMap["lastError"] = lastError.Error()
+ }
+ output, err := json.Marshal(outputMap)
+ if err != nil {
+ panic(err)
+ }
+
+ if os.Getenv("GODU_TIMING") != "" {
+ after := time.Now()
+ duration := after.Sub(before)
+ fmt.Fprintf(os.Stderr, "\n%f %f %v\n", (float64)(before.UnixNano())*1E-9, (float64)(after.UnixNano())*1E-9, duration)
+ }
+
+ return output
+}
+
+func readNetString(rd *bufio.Reader) (rv string, err error) {
+ lengthS, err := rd.ReadString(':')
+ if err != nil {
+ return "", err
+ }
+
+ lengthS = lengthS[0 : len(lengthS)-1]
+ lengthS = strings.TrimSpace(lengthS)
+ length, err := strconv.ParseUint(lengthS, 10, 16)
+ if err != nil {
+ return "", err
+ }
+
+ path := make([]byte, length+1)
+ _, err = io.ReadFull(rd, path)
+ if err != nil {
+ return "", err
+ }
+
+ if lastCH := path[length]; lastCH != ',' {
+ return "", fmt.Errorf("Expected , got %c", lastCH)
+ }
+
+ return string(path[:length]), nil
+}
+
+func maybePanic(err error) {
+ if err != nil {
+ panic(err)
+ }
+}
+
+func runPort() {
+ rd := bufio.NewReader(os.Stdin)
+ wr := bufio.NewWriter(os.Stdout)
+
+ for {
+ path, err := readNetString(rd)
+ if err == io.EOF {
+ break
+ }
+ maybePanic(err)
+
+ output := doRun(path)
+
+ _, err = fmt.Fprintf(wr, "%s\n", output)
+ maybePanic(err)
+ err = wr.Flush()
+ maybePanic(err)
+ }
+}
+
+func main() {
+ if len(os.Args) < 2 {
+ fmt.Fprintf(os.Stderr, "working as port\n")
+ runPort()
+ return
+ }
+
+ os.Stdout.Write(doRun(os.Args[1]))
+}
View
Binary file not shown.
Binary file not shown.
@@ -189,8 +189,8 @@ grab_couch_stats(Bucket, Config, MinFileSize) ->
{ok, CouchDir} = ns_storage_conf:this_node_dbdir(),
{ok, ViewRoot} = ns_storage_conf:this_node_ixdir(),
- DocsActualDiskSize = misc:dir_size(filename:join([CouchDir, Bucket])),
- ViewsActualDiskSize = misc:dir_size(couch_set_view:set_index_dir(ViewRoot, BinBucket)),
+ DocsActualDiskSize = dir_size:get(filename:join([CouchDir, Bucket])),
+ ViewsActualDiskSize = dir_size:get(couch_set_view:set_index_dir(ViewRoot, BinBucket)),
#ns_server_couch_stats{couch_docs_actual_disk_size = DocsActualDiskSize,
couch_views_actual_disk_size = ViewsActualDiskSize,
View
@@ -0,0 +1,132 @@
+%% @author Couchbase <info@couchbase.com>
+%% @copyright 2012 Couchbase, Inc.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+-module(dir_size).
+
+-include("ns_common.hrl").
+
+-export([get/1, get_slow/1, start_link/0]).
+
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
+ code_change/3]).
+
+
+godu_name() ->
+ case erlang:system_info(system_architecture) of
+ "win32" ->
+ "i386-win32-godu.exe";
+ "x86_64-pc-linux-gnu" ->
+ "i386-linux-godu";
+ "i" ++ [_ | "86-pc-linux-gnu"] ->
+ "i386-linux-godu";
+ _ ->
+ undefined
+ end.
+
+start_link() ->
+ case godu_name() of
+ undefined ->
+ ignore;
+ Name ->
+ ?log_info("Starting quick version of dir_size with program name: ~s", [Name]),
+ gen_server:start_link({local, ?MODULE}, ?MODULE, Name, [])
+ end.
+
+get(Dir) ->
+ case erlang:whereis(?MODULE) of
+ undefined ->
+ get_slow(Dir);
+ _Pid ->
+ case gen_server:call(?MODULE, {dir_size, Dir}) of
+ undefined ->
+ get_slow(Dir);
+ X -> X
+ end
+ end.
+
+get_slow(Dir) ->
+ Fn =
+ fun (File, Acc) ->
+ Size = filelib:file_size(File),
+ Acc + Size
+ end,
+ filelib:fold_files(Dir, ".*", true, Fn, 0).
+
+init(ProgramName) ->
+ DuPath = menelaus_deps:local_path(["priv", ProgramName], ?MODULE),
+ Port = erlang:open_port({spawn_executable, DuPath},
+ [stream, {args, []},
+ binary, eof, use_stdio]),
+ {ok, Port}.
+
+decode_reply(Dir, IOList) ->
+ Data = erlang:iolist_to_binary(IOList),
+ {struct, Decoded} = mochijson2:decode(Data),
+ Size = proplists:get_value(<<"size">>, Decoded),
+ ErrorCount = proplists:get_value(<<"errorCount">>, Decoded),
+ case ErrorCount of
+ 0 ->
+ ok;
+ _ ->
+ ?log_info("Has some errors on trying to grab aggregate size of ~s:~n~p", [Dir, Decoded])
+ end,
+ Size.
+
+get_reply(Dir, Port, Acc) ->
+ receive
+ {Port, {data, Bin}} ->
+ case binary:last(Bin) of
+ $\n ->
+ {reply, decode_reply(Dir, [Acc | Bin])};
+ _ ->
+ get_reply(Dir, Port, [Acc | Bin])
+ end;
+ {Port, eof} ->
+ {stop, decode_reply(Dir, Acc)};
+ {Port, _} = Unknown ->
+ erlang:error({unexpected_message, Unknown})
+ end.
+
+handle_dir_size(Dir, Port) ->
+ Size = integer_to_list(length(Dir)),
+ port_command(Port, [Size, $:, Dir, $,]),
+ case get_reply(Dir, Port, []) of
+ {reply, RV} ->
+ {reply, RV, Port};
+ {stop, RV} ->
+ {stop, port_died, RV, Port}
+ end.
+
+handle_call({dir_size, Dir}, _From, Port) ->
+ %% dir_size on missing directory is a common thing. We don't want
+ %% to spam logs for this expected error
+ case file:read_file_info(Dir) of
+ {error, _} ->
+ {reply, undefined, Port};
+ _ ->
+ handle_dir_size(Dir, Port)
+ end.
+
+handle_cast(_, _State) ->
+ erlang:error(unexpected).
+
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+terminate(_Reason, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
View
@@ -1281,15 +1281,6 @@ parse_base_version(BaseVersionStr) ->
{lists:map(fun list_to_integer/1,
string:tokens(NumericVersion, ".")), Type}.
-%% Returns the size of directory's content (du -s).
-dir_size(Dir) ->
- Fn =
- fun (File, Acc) ->
- Size = filelib:file_size(File),
- Acc + Size
- end,
- filelib:fold_files(Dir, ".*", true, Fn, 0).
-
this_node_rest_port() ->
node_rest_port(node()).
@@ -50,7 +50,10 @@ pre_start() ->
misc:ping_jointo().
child_specs() ->
- [%% ns_log starts after ns_config because it needs the config to
+ [{dir_size, {dir_size, start_link, []},
+ permanent, 1000, worker, [dir_size]},
+
+ %% ns_log starts after ns_config because it needs the config to
%% find where to persist the logs
{ns_log, {ns_log, start_link, []},
permanent, 1000, worker, [ns_log]},

0 comments on commit 6f7569a

Please sign in to comment.