In [8]:
def flatten(arr):
    flat_list = []
    for sublist in arr:
        for item in sublist:
            flat_list.append(item)
    return np.array(flat_list)

In [5]:
import numpy as np

def arithmetic_encode(image, N, symb=None):
    # Convert image to 1D array
    image = flatten(image)
    print("Image shape after converting into a Linear array",image.shape)
    # Calculate the frequency of each symbol
    if symb is None:
        symb, freq = np.unique(image, return_counts=True)
    else:
        freq = np.zeros(len(symb))
        for i, s in enumerate(symb):
            freq[i] = np.count_nonzero(image == s)

    print("symb :\n", symb)
    print('Frequencies :\n', freq)
    # Normalize the frequency so that they add up to 1
    freq = freq / np.sum(freq)
    print('Normaliized Frequencies :\n', freq)

    # Calculate the cumulative probability of each symbol
    cum_probs = np.cumsum(freq)
    print("Cumalative Probablity of each sumbols : ", cum_probs)

    # Initialize lower and upper limits
    L = 0
    U = 1
    # Initialize the result array
    result = np.zeros(len(image) // N + 1)
    for i in range(len(image) // N):
        # Get the next N symb
        sub_img = image[i*N:(i+1)*N]
        # Calculate the probability of the sub-image
        prob = 1
        for s in sub_img:
            idx = np.where(symb == s)[0][0]
            prob *= freq[idx]
        # Update the limits
        new_L = L + (U - L) * cum_probs[np.where(symb == sub_img[0])[0]]
        new_U = L + (U - L) * cum_probs[np.where(symb == sub_img[0])[0] + 1]
        for s in sub_img[1:]:
          try:
            new_L = L + (U - L) * cum_probs[np.where(symb == s)[0]] + (U - L) * prob
            new_U = L + (U - L) * cum_probs[np.where(symb == s)[0] + 1] + (U - L) * prob
          except:
            continue
        L = new_L
        U = new_U
        # Save the result
        result[i] = L
    return result, freq

The code implements the Arithmetic Encoding algorithm to compress an image. The main steps of the algorithm are:

1. Convert the image to a 1D array
2. Calculate the frequency of each symbol in the array
3. Normalize the frequency so that they add up to 1
4. Calculate the cumulative probability of each symbol
5. Initialize the lower and upper limits
6. Loop over the array in blocks of size N
7. For each block, calculate the probability of the block by multiplying the   probabilities of the symbols in the block
8. Update the lower and upper limits based on the probabilities and cumulative probabilities of the symbols in the block
9. Save the lower limit of each block as the result array

The function takes three arguments:

1. image: The image to be compressed
2. N: The block size for encoding
3. symb: The symbols to be used in the encoding. If not provided, the function will determine the symbols automatically based on the image.
The function returns two values:

1. result: The lower limit of each block of the encoded image
2. freq: The frequency of each symbol used in the encoding.

In [9]:
import numpy as np
import cv2
import scipy.io as sio

# Load the sample image
img = cv2.imread('image2.png', cv2.IMREAD_GRAYSCALE)
# image = image.reshape((28,28))
# Apply arithmetic encoding with N = 4
result, freq = arithmetic_encode(img, 3)

# Save the result as a .mat file
sio.savemat('arithmetic_coded.mat', {'result': result, 'freq': freq})

Image shape after converting into a Linear array (87300,)
symb :
 [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 219 220 223 224 228 229 231]
Frequencies :
 [   2

In [10]:
result, freq

(array([0.14698847, 0.14836872, 0.14838282, ..., 0.14838295, 0.14838295,
        0.        ]),
 array([2.29095074e-05, 4.58190149e-05, 1.37457045e-04, 1.37457045e-04,
        2.40549828e-04, 4.69644903e-04, 5.04009164e-04, 1.17983963e-03,
        1.15693013e-03, 1.34020619e-03, 1.42038946e-03, 1.66093929e-03,
        1.55784651e-03, 1.62657503e-03, 1.71821306e-03, 1.76403207e-03,
        1.82130584e-03, 1.84421535e-03, 2.09621993e-03, 2.00458190e-03,
        2.02749141e-03, 1.98167239e-03, 2.19931271e-03, 1.94730813e-03,
        2.31386025e-03, 2.42840779e-03, 2.88659794e-03, 2.76059565e-03,
        2.71477663e-03, 2.60022910e-03, 2.63459336e-03, 3.67697595e-03,
        3.55097365e-03, 4.95990836e-03, 6.15120275e-03, 6.91867125e-03,
        9.62199313e-03, 1.13860252e-02, 1.27605956e-02, 1.51431844e-02,
        1.33562428e-02, 1.02176403e-02, 1.00916380e-02, 8.43069874e-03,
        9.45017182e-03, 9.86254296e-03, 9.63344788e-03, 9.03780069e-03,
        8.08705613e-03, 8.23596793e-03, 8

In [11]:
import scipy.io
mat = scipy.io.loadmat('arithmetic_coded.mat')

In [12]:
mat

{'__header__': b'MATLAB 5.0 MAT-file Platform: posix, Created on: Tue Apr 18 17:40:34 2023',
 '__version__': '1.0',
 '__globals__': [],
 'result': array([[0.14698847, 0.14836872, 0.14838282, ..., 0.14838295, 0.14838295,
         0.        ]]),
 'freq': array([[2.29095074e-05, 4.58190149e-05, 1.37457045e-04, 1.37457045e-04,
         2.40549828e-04, 4.69644903e-04, 5.04009164e-04, 1.17983963e-03,
         1.15693013e-03, 1.34020619e-03, 1.42038946e-03, 1.66093929e-03,
         1.55784651e-03, 1.62657503e-03, 1.71821306e-03, 1.76403207e-03,
         1.82130584e-03, 1.84421535e-03, 2.09621993e-03, 2.00458190e-03,
         2.02749141e-03, 1.98167239e-03, 2.19931271e-03, 1.94730813e-03,
         2.31386025e-03, 2.42840779e-03, 2.88659794e-03, 2.76059565e-03,
         2.71477663e-03, 2.60022910e-03, 2.63459336e-03, 3.67697595e-03,
         3.55097365e-03, 4.95990836e-03, 6.15120275e-03, 6.91867125e-03,
         9.62199313e-03, 1.13860252e-02, 1.27605956e-02, 1.51431844e-02,
         1.3356242