In [3]:
import numpy as np

def calculate_entropy(sample, verbose=False):
    """
    Calculate the entropy of a binary sample (0s and 1s).

    Entropy is computed using the formula:
    Entropy = -p(0) * log2(p(0)) - p(1) * log2(p(1)),
    where p(0) and p(1) are the probabilities of 0s and 1s in the sample.

    Parameters:
    sample (list of int): A binary sample containing only 0s and 1s.
    verbose (bool, optional): If True, prints the calculation process. Defaults to False.

    Returns:
    float or None: The calculated entropy of the sample, or None if the sample is empty.

    Example:
    >>> calculate_entropy([0, 0, 1, 1], verbose=True)
    1.0
    """
    Total_zero_in_sample = sample.count(0)
    Total_one_in_sample = sample.count(1)
    Total = len(sample)

    if Total == 0:
        return None

    prob_zero = Total_zero_in_sample / Total
    prob_one = Total_one_in_sample / Total

    if verbose:
        print(f"Total_zero_in_sample = {Total_zero_in_sample}, Total_one_in_sample = {Total_one_in_sample}, Total = {Total}")
        print(f"prob_zero = {prob_zero}, prob_one = {prob_one}")

    entropy = 0
    entropy_calculation_str = "Entropy = "

    if prob_zero > 0:
        entropy -= prob_zero * np.log2(prob_zero)
        entropy_calculation_str += f" -({Total_zero_in_sample}/{Total})*log({Total_zero_in_sample}/{Total})"
    if prob_one > 0:
        entropy -= prob_one * np.log2(prob_one)
        entropy_calculation_str += f" -({Total_one_in_sample}/{Total})*log({Total_one_in_sample}/{Total})"

    if verbose:
        print(entropy_calculation_str)
        print(f"Entropy = -{Total_zero_in_sample/Total}*log({Total_zero_in_sample/Total}) - {Total_one_in_sample/Total}*log({Total_one_in_sample/Total})")
        print(f"Entropy = -{prob_zero}*{np.log2(prob_zero)} - {prob_one}*{np.log2(prob_one)}")

    return entropy

# Example usage with verbose output
sample = [0, 0, 0, 1]
entropy = calculate_entropy(sample, verbose=True)
print(f"Entropy = {entropy}")

Total_zero_in_sample = 3, Total_one_in_sample = 1, Total = 4
prob_zero = 0.75, prob_one = 0.25
Entropy =  -(3/4)*log(3/4) -(1/4)*log(1/4)
Entropy = -0.75*log(0.75) - 0.25*log(0.25)
Entropy = -0.75*-0.4150374992788438 - 0.25*-2.0
Entropy = 0.8112781244591328


In [4]:
import scipy.stats
print( scipy.stats.entropy([3,1], base=2) )

0.8112781244591328
