For the distribution induced by a count vector $\mathbf{c} \in \mathbb{N}^k$ with $n=\sum_{i=1}^k c_i$ we can write the entropy function as
\begin{align*}
\mathrm{H}(\mathbf{c}) &= - \sum_{i=1}^k \frac{c_i}{n} \log \frac{c_i}{n}\\
&=\log n - \frac{1}{n} \sum_{i=1}^k c_i \log c_i
\end{align*}
where as usual we define $0 \log 0 = 0$.
By expressing the sum over counts as
\begin{equation*}
\sum_{i=1}^k c_i \log c_i = n(\log n - \mathrm{H}(\mathbf{c}))
\end{equation*}
we can derive the following incremental formula for the entropy of a count vector $\mathbf{c}+\delta\mathbf{e}_i$:
\begin{equation*}
\mathrm{H}(\mathbf{c} + \delta\mathbf{e}_i) =
\begin{cases}
0 &, \text{ if } n = 0 \text{ or } n = -\delta\\
\log(n + \delta) - ((c_i+\delta)\log (c_i+\delta) + n(\log n - H(\mathbf{c})) - c_i \log c_i )/(n+\delta) &, \text{ otherwise}\\
\end{cases}
\end{equation*}

In [1]:
import numpy as np

def entropy_from_counts(counts):
    n = sum(counts)
    return np.log(n) - sum(counts*np.log(counts))/n

def incremental_entropy(h_old, n, c_old, c_new):
    delta = c_new - c_old
    if n == 0 or n == -delta: # old or new histogram empty
        return 0.0
    else:
        new_term = c_new*np.log(c_new) if c_new > 0 else 0
        old_term = c_old*np.log(c_old) if c_old > 0 else 0
        return np.log(n+delta)-(new_term + n*(np.log(n)-h_old) - old_term)/(n+delta)


In [2]:
from scipy.stats import entropy 

def test(counts, delta, i):
    _counts = counts.copy()
    _counts[i] += delta
    print('testing:', counts, '->', _counts)

    n = sum(counts)
    ent = entropy(counts, base=np.e)
    _ent = entropy(_counts, base=np.e)
    incr = incremental_entropy(ent, n, counts[i], _counts[i])

    print(ent)    
    print(_ent)
    print(incr)
    print(np.isclose(_ent,incr))
    print()

t1 = np.array([1, 1, 1, 1])
t2 = np.array([0, 1, 1, 1])
    
test(t1, 1, 0)
test(t1, -1, 0)
test(t2, 1, 0)

testing: [1 1 1 1] -> [2 1 1 1]
1.3862943611198906
1.3321790402101223
1.3321790402101221
True

testing: [1 1 1 1] -> [0 1 1 1]
1.3862943611198906
1.0986122886681096
1.0986122886681098
True

testing: [0 1 1 1] -> [1 1 1 1]
1.0986122886681096
1.3862943611198906
1.3862943611198904
True



In [3]:
t3 = np.array([1, 0, 0, 0])
test(t3, 1, 0)
test(t3, 1, 1)
test(t3, -1, 0)

testing: [1 0 0 0] -> [2 0 0 0]
0.0
0.0
0.0
True

testing: [1 0 0 0] -> [1 1 0 0]
0.0
0.6931471805599453
0.6931471805599453
True

testing: [1 0 0 0] -> [0 0 0 0]
0.0
nan
0.0
False



  pk = 1.0*pk / np.sum(pk, axis=axis, keepdims=True)


In [4]:
t4 = np.array([0, 1, 0, 0, 0])
test(t4, 1, 1)

testing: [0 1 0 0 0] -> [0 2 0 0 0]
0.0
0.0
0.0
True



In [5]:
t5 = np.array([0, 2, 0, 0, 0])
test(t5, 1, 3)

testing: [0 2 0 0 0] -> [0 2 0 1 0]
0.0
0.6365141682948128
0.636514168294813
True



In [6]:
t6 = np.array([1, 2, 1, 0, 1])
test(t6, 0, 0)

testing: [1 2 1 0 1] -> [1 2 1 0 1]
1.3321790402101223
1.3321790402101223
1.3321790402101223
True

