In [None]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import tqdm
import cv2

Images are among the most common and popular representations of data. Digital images are used for professional and personal use ranging from official documents to social media. Thus, any Organization or individual needs to store and share a large number of images. One of the most common issues associated with using images is the potentially large file-size of the image. Advancements in image acquisition technology and an increase in the popularity of digital content means that images now have very high resolutions and high quality, inevitably leading to an increase in size. Image compression has become one of the most important parts of image processing these days due to this. The goal is to achieve the least size possible for an image while not compromising on the quality of the image, that gives us the perfect balance.

#### Understanding the Algorithm
Let's take the input string as "wwwwbbbwwwwwbbbbb". At each step we identify the next character, count it's subsequent occurences and append the character followed by it's number of occurances to the code.


#### Psuedo Code
1. Start with an empty string for the code, and start traversing the string.
2. Pick the first character and append it to the code.
3. Now count the number of subsequent occurances of the currently chosen character and append it to the code.
4. When you encounter a different character, repeat the above steps.
5. In a similar manner, traverse the entire string and generate the code.

#### Decoding
Even decoding the output is quite straightforward. Whenever you encounter a number, add the character preceeding it that many number of times in your final string.
For example, A3D5G7 gets decoded to AAADDDDDGGGGGGG.

#### Decoding Masks for this competition

In [None]:
data = pd.read_csv('../input/sartorius-cell-instance-segmentation/train.csv')

In [None]:
data.head()

In [None]:
file_names = data['id'].unique()

In [None]:
def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.float32)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
    return img.reshape(shape)

In [None]:
sum = 0
for file in tqdm.tqdm(file_names):
    main_image = np.zeros(520*704, dtype=np.float32)
    main_image = main_image.reshape((520,704))
    #main_image = np.zeros([520,704])
    get_all_codes = data[data['id'] == file]
    get_all_codes = get_all_codes.reset_index()
    for d in range(len(get_all_codes)):
        #print(get_all_codes.loc[d]['id'])
        location = './'
        try:
            im = rle_decode(get_all_codes.loc[d]['annotation'],(get_all_codes.loc[d]['height'],get_all_codes.loc[d]['width']))
            main_image = cv2.bitwise_or(main_image,im)
        except Exception as e:
            sum+=1
    # save masks as png image
    cv2.imwrite(location+file+'.png',main_image)
print(sum)

In [None]:
image = cv2.imread('./0a6ecc5fe78a.png',0)
plt.imshow(image,'gray')