diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f63c2af..7be9d30 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,13 +12,13 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - run: shellcheck -o all -e SC2250,SC2292 diff-logs - - run: pip install flake8 - - run: flake8 --max-line-length=120 *.py + - run: sudo apt-get install man-db- libperl-critic-perl + - run: perlcritic diff-logs - run: echo '2000-01-01T00:00:00' > file.log - run: time ./diff-logs < file.log - run: time ./diff-logs file.log file.log - run: time tests/test.sh + - run: podman run -v.:/src ubuntu:latest /src/tests/test.sh # Runs on base container workflow-keepalive: if: github.event_name == 'schedule' diff --git a/README.md b/README.md index 76018f4..129aa5d 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,23 @@ `diff-logs` =========== [![Build Status](https://img.shields.io/github/actions/workflow/status/kernc/diff-logs/ci.yml?branch=master&style=for-the-badge)](https://github.com/kernc/diff-logs/actions) -[![Language: shell](https://img.shields.io/badge/lang-Shell-peachpuff?style=for-the-badge)](https://github.com/kernc/diff-logs) -[![Language: Python](https://img.shields.io/badge/lang-Python-skyblue?style=for-the-badge)](https://github.com/kernc/diff-logs) -[![Source lines of code](https://img.shields.io/endpoint?url=https://ghloc.vercel.app/api/kernc/diff-logs/badge?filter=diff-logs.py,diff-logs$&style=for-the-badge&color=greenyellow&label=SLOC)](https://github.com/kernc/diff-logs) +[![Language: Perl](https://img.shields.io/badge/lang-Perl-056?style=for-the-badge)](https://github.com/kernc/diff-logs) +[![Source lines of code](https://img.shields.io/endpoint?url=https://ghloc.vercel.app/api/kernc/diff-logs/badge?filter=diff-logs$&style=for-the-badge&color=greenyellow&label=SLOC)](https://github.com/kernc/diff-logs) [![Script size](https://img.shields.io/github/languages/code-size/kernc/diff-logs?style=for-the-badge&color=greenyellow)](https://github.com/kernc/diff-logs) -[![](https://img.shields.io/github/issues/kernc/diff-logs?style=for-the-badge)](https://github.com/kernc/diff-logs/issues) +[![Bug tracker](https://img.shields.io/github/issues/kernc/diff-logs?style=for-the-badge)](https://github.com/kernc/diff-logs/issues) A command-line utility for diff'ing log files. Quickly find **difference lines** in **all kinds of logs**, namely build/CI logs, server/container logs, or any similar such. -Figure out quickly **what changed** and _why exactly_ the shit is failing. +Figure out quickly **what changed** and _why exactly_ your shit is failing. The script works by simply replacing common stochastic string [patterns], -such as datetime timestamps, download speeds, temporary files, -HTTP header values, UUIDs, hash digests etc. with known fixed +such as datetime timestamps, download speeds, temporary filenames, +HTTP header values, UUIDs, hash digests etc. etc. with known fixed values that a tool such as `diff` can then easily skip. -[patterns]: https://github.com/kernc/diff-logs/blob/master/diff-logs.py +[patterns]: https://github.com/kernc/diff-logs/blob/master/diff-logs Installation @@ -27,13 +26,12 @@ First, check if your OS distro already provides an installable `diff-logs` packa Otherwise: 1. Star, [download](https://github.com/kernc/diff-logs/archive/refs/heads/master.zip) - or clone repo. -2. (Optional) Create a symlink in your bin-dir pointing to `diff-logs` shell script: + or clone repo. 🫶 +2. Put `diff-logs` script into your bin-dir or elsewhere on `$PATH`: ```shell - mkdir -p ~/.local/bin - export PATH="~/.local/bin:$PATH" # Also put in .bashrc or similar - # Link script into your bin - ln -s ~/path/to/diff-logs/diff-logs ~/.local/bin/diff-logs + curl -vL https://github.com/kernc/diff-logs/raw/refs/heads/master/diff-logs + sudo tee /usr/local/bin/diff-logs + sudo chmod +x /usr/local/bin/diff-logs ``` @@ -63,7 +61,11 @@ diff-logs FILE1 FILE2 # Invokes `meld` diff-logs < FILE1 > FILE1.clean ``` +Notes +----- +This once was Python, but Perl is even more ubiquituous. + ----- Finally, we can diff our logs with ease! 🥳 -Improvements welcome! +Improvements and additions welcome! diff --git a/diff-logs b/diff-logs index 0984e5e..ea1e4ea 100755 --- a/diff-logs +++ b/diff-logs @@ -1,24 +1,87 @@ -#!/bin/bash -# -# This is the diff-logs utility for diff'ing log files. -# -# See usage instructions below. -# -set -eu -set -o pipefail +#!/usr/bin/env perl +use strict; +use warnings; +use utf8; +use File::Temp (); -lib="$(dirname "$(command -v "$0" || true)")" -pyscript="$lib/diff-logs.py" -difftool="${DIFFTOOL:-diff}" +my @PATTERNS = ( + # Date/time + [ qr/\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?Z?/, '2111-11-11 11:11:11' ], + [ qr/\w{3,}, \d{1,2} \w{3,} \d{4,4} \d{1,2}(?::\d{1,2}){2} [A-Z]{3}/, 'Thu, 11 Nov 2111 11:11:11 GMT' ], + [ qr/\d{2}-\d{2}-\d{4} \d{2}(?::\d{2}){2}\.\d+/, '11-11-2111 11:11:11.111111' ], + [ qr/[A-Z][a-z]{2} [ \d]\d \d{2}:\d{2}/, 'Nov 11 11:11' ], # `ls -l` format + # Other timestamp + [ qr/\b\d+(?:\.\d+)?s(?:ec)?\b/, '1.1s' ], + [ qr/\b(in|since) \d+\.\d+/, 'in 1.1' ], + # File/download sizes + [ qr/\d+(?:\.\d+)?(?:\/\d)? ?(?P[kmg](?:i?b)?)\b/i, q["1 " . $+{suffix}] ], + # TCP / HTTP + [ qr/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/, '11.1.1.1' ], # IPv4 + [ qr/:\d{5,5}\b/, ':11111' ], # Remote port + [ qr/\bport \d{5,5}\b/, 'port 11111' ], # Remote port + [ qr{\bW/(?\\?")[^"]*\k}, 'W/"ETag"' ], # ETag header + # Common files + [ qr{/tmp/[^\s/:"']{6,}(?:/[^\s/:"']+)*/?}, '/tmp/d1ff1065' ], + # Common tools + [ qr/(?(?:\s|\A)#\d+) \d+\.\d+/, q[$+{step_no} . " 1"] ], # Docker build steps + # strace process PIDs + [ qr/(?(?:\b|_?)pid[ =])\d{4,}\b/, q[$+{prefix} . "11111"] ], + [ qr/(?strace: Process )\d+/, q[$+{prefix} . "111111"] ], -if [ $# -eq 0 ]; then - "$pyscript" <&0; -elif [ $# -eq 2 ]; then - case $difftool in diff) args='--color=auto' ;; *) args= ;; esac - # shellcheck disable=SC2086,SC2248,SC2312 - $difftool $args <("$pyscript" < "$1") <("$pyscript" < "$2"); -else - echo "Usage: $0 < FILE.log # Print log file diff-friendly" >&2 - echo " $0 FILE1.log FILE2.log # Invoke \$DIFFTOOL (e.g. diff)" >&2 - exit 1 -fi + [ qr/(?:[\da-fA-F]{4,}-){4,}[\da-fA-F]{4,}/, 'd1ff1065-d1ff-1065-1007-d1ff1065' ], + [ qr/[a-zA-Z0-9]{18,}/, 'AAAAAAAAAAAAAAAAAA' ], # Long payload + [ qr/[a-fA-F0-9]{7,}/, 'd1ff1065' ], # Hash digest + # Progress bar, e.g. in pip, tqdm + [ qr{(?[ \t]*)(?: *(?:\[ *)?\d+%(?:])? *)?[[|]?[\x{2500}-\x{259F}=.\-]{5,} *[\]|]?(?: *(?:\[ *)?\d+%(?:])? *)?[(]?[\d.KMGB ]+/.*}, q[$+{indent} . "......."] ], +); + +# Self-test to ensure idempotence for simple replacements +for my $pair (@PATTERNS) { + my ($pattern, $replacement) = @$pair; + if (index($replacement, '$+') == -1) { + if ($replacement !~ m/\A(?:$pattern)\z/s) { + die "Assertion failed: Pattern-replacement pair '$pattern' => '$replacement' not idempotent!"; + } + } +} + +# Subroutine to read from an input handle, apply all normalizations, +# and write the result to an output handle +sub normalize { + my ($in_fh, $out_fh) = @_; + while (my $line = <$in_fh>) { + for my $rule (@PATTERNS) { + my ($pattern, $replacement) = @$rule; + # Use /ee (evaluate 2x) for replacements with named backreferences + if (index($replacement, '$+') != -1) { + $line =~ s/$pattern/$replacement/gee; + } else { + $line =~ s/$pattern/$replacement/g; + } + } + print $out_fh $line; + } +} + +# Main +my $argc = @ARGV; +if ($argc == 0) { + binmode(STDIN, ":utf8"); binmode(STDOUT, ":utf8"); ## no critic + normalize(\*STDIN, \*STDOUT); +} elsif ($argc == 2) { + my ($file1, $file2) = @ARGV; + my $temp1 = File::Temp->new(UNLINK => 1); + my $temp2 = File::Temp->new(UNLINK => 1); + binmode($temp1, ':utf8'); binmode($temp2, ':utf8'); ## no critic + open my $fh1_in, '<:utf8', $file1 or die "Error: Cannot read '$file1': $!"; ## no critic + open my $fh2_in, '<:utf8', $file2 or die "Error: Cannot read '$file2': $!"; ## no critic + normalize($fh1_in, $temp1); + normalize($fh2_in, $temp2); + close $fh1_in; close $fh2_in; + my $difftool = $ENV{DIFFTOOL} || 'diff'; + exec $difftool, ($difftool eq 'diff' ? '--color=auto' : ()), $temp1->filename, $temp2->filename; +} else { + print STDERR "Usage: $0 < FILE # Print log file diff-friendly\n"; + print STDERR " $0 FILE1 FILE2 # Invoke \$DIFFTOOL (e.g. diff)\n"; + exit 1; +} diff --git a/diff-logs.py b/diff-logs.py deleted file mode 100755 index 18ff97c..0000000 --- a/diff-logs.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python3 -""" -Normalize logs so that they can be diffed effectively. - -This script is part of the diff-logs CLI tool. -""" -import re -import sys - - -PATTERNS = { - # Date/time - r'\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?Z?': '2111-11-11 11:11:11', - r'\w{3,}, \d{1,2} \w{3,} \d{4,4} \d{1,2}(?::\d{1,2}){2} [A-Z]{3}': 'Thu, 11 Nov 2111 11:11:11 GMT', - r'\d{2}-\d{2}-\d{4} \d{2}(?::\d{2}){2}\.\d+': '11-11-2111 11:11:11.111111', - r'[A-Z][a-z]{2} [ \d]\d \d{2}:\d{2}': 'Nov 11 11:11', # `ls -l` format - # Other timestamp - r'\b\d+(?:\.\d+)?s(?:ec)?\b': '1.1s', - r'\b(in|since) \d+\.\d+': 'in 1.1', - # File/download sizes - r'(?i)\d+(?:\.\d+)?(?:/\d)? ?(?P[kmg](?:i?b)?)\b': r'1 \g', - # TCP / HTTP - r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b': '11.1.1.1', # IPv4 - r':\d{5,5}\b': ':11111', # Remote port - r'\bport \d{5,5}\b': 'port 11111', # Remote port - r'\bW/(?P\\?")[^"]*(?P=quote)': 'W/"ETag"', # ETag header - # Common files - r'/tmp/[^/:"\'\s]{6,}(?:/[^/:"\'\s]+)*/?': '/tmp/d1ff1065', - # Common tools - r'(?P(?:\s|\A)#\d+) \d+\.\d+': r'\g 1', # Docker build steps - # strace process PIDs - r'(?P(?:\b|_?)pid[ =])\d{4,}\b': r'\g11111', - r'(?Pstrace: Process )\d+': r'\g111111', - - r'(?:[\da-fA-F]{4,}-){4,}[\da-fA-F]{4,}': 'd1ff1065-d1ff-1065-1007-d1ff1065', # UUID - r'[a-zA-Z0-9]{18,}': 'AAAAAAAAAAAAAAAAAA', # Long payload - r'[a-fA-F0-9]{7,}': 'd1ff1065', # Hash digest - # Progress bar, e.g. in pip, tqdm - r'(?P[ \t]*)(?: *(?:\[ *)?\d+%(?:])? *)?[[|]?[\u2500-\u259f=.-]{5,} *[\]|]?(?: *(?:\[ *)?\d+%(?:])? *)?[(]?[\d.KMGB ]+/.*': r'\g.......', # noqa: E501 -} - -assert all(re.match(fr'\A(?:{p})\Z', v) - for p, v in PATTERNS.items() - if r'\g<' not in v), \ - next(f'Pattern-replacement pair {p!r} not idempotent!' - for p, v in PATTERNS.items() - if r'\g<' not in v and not re.match(fr'\A(?:{p})\Z', v)) - -if len(sys.argv) > 1: - raise RuntimeError('Pass file to stdin') - -for line in sys.stdin: - for pattern, replacement in PATTERNS.items(): - line = re.sub(pattern, replacement, line) - try: - print(line, end='') - except BrokenPipeError: - break