Skip to content

Commit

Permalink
[soil] Improve wedge size report
Browse files Browse the repository at this point in the history
Also make a demo of 'tail -f' and 'multitail' tool, which seems
promising for us to use.
  • Loading branch information
Andy C committed Jan 19, 2024
1 parent 356a420 commit 76019c8
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 0 deletions.
56 changes: 56 additions & 0 deletions build/deps.sh
Expand Up @@ -443,6 +443,17 @@ container-wedges() {

}

commas() {
# Wow I didn't know this :a trick
#
# OK this is a label and a loop, which makes sense. You can't do it with
# pure regex.
#
# https://shallowsky.com/blog/linux/cmdline/sed-improve-comma-insertion.html
# https://shallowsky.com/blog/linux/cmdline/sed-improve-comma-insertion.html
sed ':a;s/\b\([0-9]\+\)\([0-9]\{3\}\)\b/\1,\2/;ta'
}

show-wedge-tree() {
# 4 levels deep shows the package
if command -v tree > /dev/null; then
Expand All @@ -452,6 +463,51 @@ show-wedge-tree() {

# Sizes
du --si -s /wedge/*/*/* ~/wedge/*/*/*
echo

local tmp=_tmp/wedge-tree.txt

# Show the biggest files
find /wedge ~/wedge -type f -a -printf '%10s %P\n' > $tmp
sort -n $tmp | tail -n 20 | commas
echo

# Show the most common file extensions
#
# I feel like we should be able to get rid of .a files? That's 92 MB, second
# most common

cat $tmp | python3 -c '
import os, sys, collections
bytes = collections.Counter()
files = collections.Counter()
for line in sys.stdin:
size, path = line.split(None, 1)
path = path.strip() # remove newline
_, ext = os.path.splitext(path)
size = int(size)
bytes[ext] += size
files[ext] += 1
#print(bytes)
#print(files)
n = 20
print("Most common file types")
for ext, count in files.most_common()[:n]:
print("%10d %s" % (count, ext))
print()
print("Total bytes by file type")
for ext, total_bytes in bytes.most_common()[:n]:
print("%10d %s" % (total_bytes, ext))
' | commas

}

run-task "$@"
44 changes: 44 additions & 0 deletions demo/tail-multiplex.sh
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

set -o nounset
set -o pipefail
set -o errexit

print-output() {
local begin=${1:-1}
local end=${2:-10}

for i in $(seq $begin $end); do
echo $i
sleep 0.1
done
}

parallel() {
print-output 1 10 &
print-output 11 20 &
wait
wait
echo done
}

parallel2() {
mkdir -p _tmp
print-output 1 10 >_tmp/d1 &
print-output 11 20 >_tmp/d2 &

# Hm the output is not good because it prints too much
# also --pid would be nice for stopping
#tail -q -f _tmp/d1 _tmp/d2

multitail _tmp/d1 _tmp/d2

wait
wait
echo done
}

# TODO: try this
# https://www.vanheusden.com/multitail/

"$@"

0 comments on commit 76019c8

Please sign in to comment.