# Finding common commands from my bash history

aliases and functions are shortcuts for doing common or cumbersome tasks.
Looking through your command history can help identify good candidates
(or use your own annoyance at typing things often).

First, read my bash history file

In [1]:
import os
history_file = os.path.expanduser('~/.bash_history')
with open(history_file, errors='replace') as f:
    history = f.read().splitlines()

In [2]:
history[:10]

['echo $PATH',
 'brew cask install vscode',
 'mate ~/.bashrc',
 'which bash',
 'bash',
 'brew install git',
 'j',
 'cd',
 'pwd',
 'j']

In [3]:
len(history)

45521

Find the most common commands:

In [4]:
from collections import Counter
all_commands = Counter(history)
all_commands.most_common(10)


[('gitx', 1830),
 ('ls', 1734),
 ('git diff', 1551),
 ('mp', 831),
 ('kubectl get pod', 594),
 ('git log', 536),
 ('pr', 475),
 ('ip', 376),
 ('git push mine -f', 319),
 ('git push mine', 316)]

Partial commands

In [5]:
import shlex

def get_ngrams(history, n):
    """Get all the n-word sequences, wherever they occur in a line"""
    ngrams = []
    for line in history:
        try:
            words = tuple(shlex.split(line))
        except ValueError:
            # not a valid command, use simple splitting
            continue

        chunks = 1 + len(words) - n
        if chunks < 1:
            continue
        for i in range(chunks):
            ngrams.append(' '.join(words[i:i+n]))
    return ngrams


In [6]:
ngrams = get_ngrams(history, 2)
Counter(ngrams).most_common(10)

[('git diff', 1750),
 ('kubectl get', 1675),
 ('git push', 1117),
 ('get pod', 1100),
 ('git log', 723),
 ('push mine', 661),
 ('| grep', 660),
 ('pip install', 648),
 ('python -m', 485),
 ('kubectl logs', 411)]

In [7]:
def get_all_subcommands(history):
    """Get all subsets of commands starting from the beginning
    
    The most popular long ones of these are good candidates for aliases
    """
    subcommands = []
    for line in history:
        try:
            words = tuple(shlex.split(line))
        except ValueError:
            # not a valid command, skip it
            continue
        commands = [words]
        
        while words:
            indices = [
                words.index(c)
                for c in ('|', ';')
                if c in words 
            ]
            if indices:
                idx = min(indices)
                # add next snippet
                commands.append(words[:idx])
                words = words[idx+1:]
                # and the full right-hand side,
                # even if it's multiple commands
                commands.append(words)
            else:
                words = []
        for command in commands:
            for n in range(1, len(command)):
                subcommands.append(' '.join(command[:n]))
    return subcommands

In [8]:
all_subcommands = get_all_subcommands(history)

In [9]:
len(all_subcommands)

77699

In [10]:
counts = Counter(all_subcommands)
counts.most_common(100)

[('git', 6486),
 ('mate', 4979),
 ('kubectl', 3544),
 ('j', 2218),
 ('kubectl get', 1822),
 ('cd', 1460),
 ('docker', 1442),
 ('ls', 1201),
 ('conda', 1078),
 ('python', 1029),
 ('grep', 880),
 ('git push', 859),
 ('pip', 769),
 ('pip install', 622),
 ('make', 594),
 ('b', 556),
 ('helm', 543),
 ('ag', 516),
 ('pytest', 475),
 ('python -m', 473),
 ('npm', 449),
 ('kubectl logs', 425),
 ('cat', 413),
 ('kubectl delete', 396),
 ('kubectl get pod', 378),
 ('kubectl describe', 377),
 ('gcloud', 371),
 ('brew', 367),
 ('rm', 365),
 ('ssh', 356),
 ('kubectl delete pod', 348),
 ('git push mine', 342),
 ('git commit', 328),
 ('docker run', 326),
 ('conda install', 314),
 ('python3', 291),
 ('python3 .circleci/integration-test.py', 287),
 ('setup', 263),
 ('c', 245),
 ('pip install -e', 235),
 ('git reset', 232),
 ('git rebase', 229),
 ('conda build', 228),
 ('jupyter', 223),
 ('open', 217),
 ('git add', 206),
 ('git diff', 201),
 ('rm -rf', 193),
 ('kubectl get pod |', 193),
 ('export', 190),


Some of these most used commands are aliases:

In [12]:
alias_list = !bash -l -c alias
alias_list[10:20]

["alias c-='git-last-branch'",
 "alias checkout='git fuzzy-checkout'",
 "alias cherry='git cherry-pick'",
 "alias ci='git commit'",
 "alias cleardnscache='dscacheutil -flushcache'",
 "alias clone='git clone --recursive'",
 "alias co='git fuzzy-checkout'",
 "alias commit='git commit'",
 "alias da='deactivate'",
 "alias develop='pip install -e .'"]

parse the bash aliases into a dict

In [15]:
aliases = {}
for line in alias_list:
    alias, key_value = line.split(' ', 1)
    key, value = key_value.split('=', 1)
    aliases[key] = value


Which are my most commonly used aliases?

In [16]:
seen = set()
for cmd, count in counts.most_common(200):
    key = cmd.split()[0]
    if key in aliases and key not in seen:
        seen.add(key)
        print(f"alias {key}={aliases[key]} {count}")

alias git='hub' 6486
alias mate='openmate' 4979
alias ls='ls -G' 1201
alias b='git checkout -b' 556
alias rm='rm -i' 365
alias setup='python setup.py' 263
alias c='git fuzzy-checkout' 245
alias psg='psgrep' 135
alias nb='jupyter notebook' 129
alias clone='git clone --recursive' 102
alias pskill='psgrepkillall' 56
alias inplace='python setup.py build_ext --inplace' 45
alias cherry='git cherry-pick' 42
