In [1]:
# This block just sets up the input
from PIL import Image
import numpy as np
from j2k_compress import compress
image = Image.open(f"../../test/test_data/tv-test-pattern-mono-large.jpg")
array = np.array(image.getdata())
array = array.reshape((image.size[1], image.size[0]))
array = array.astype("int32")
array_list = [array, array, array, array]
num_rows = 2
num_cols = 2



# call compression
codestream, j2klra = compress(
    array_list,
    num_rows,
    profile="NPJE",
    compression_type="NL",
)

In [2]:
levels = 5
layers = 20
components = 1

In [3]:
# set up imports
from util.codestream_parser import *
from markers.const import *
from markers.factory import *
from markers.find import *
from markers.segment import *

In [4]:
# lets us save packet info without saving packet data 
class Packet:
    def __init__(self, codestream_index, tile_index, length, tile, quality_layer, resolution_level, component):
        # codestream_index is the starting index of the packet within the full codestream
        self.codestream_index = codestream_index
        # tile_index is the starting index of the packet within only the tile codestream (starting at SOT)
        self.tile_index = tile_index
        self.length = length
        self.tile = tile
        self.quality_layer = quality_layer
        self.resolution = resolution_level
        self.component = component
        
    def print_all_attributes(self):
        print(f"codestream_index: {self.codestream_index}, tile_index: {self.tile_index}, length: {self.length}, tile: {self.tile}, quality layer: {self.quality_layer}, resolution: {self.resolution}, component: {self.component}")

In [5]:
import bitstring
from bitstring import BitArray
def get_packet_lengths(Iplt):
    # Input: valid Iplt marker segment (bytes)
    # Output: list of ints, each representing the length of a packet, in the order the packets apear
    # iterate through 1 byte at a time
    # if first bit of byte is 0, this is the last byte for current packet 
    # else it is not the last byte
    # only remanining 7 bits are part of length value
    packet_lengths = []
    current_packet_size_bits = BitArray()
    for byte in range(len(Iplt)):
        # get trailing 7 bits of current byte and append to current bits
        current_bits = BitArray(Iplt[byte:byte+1])
        current_packet_size_bits.append(current_bits[1:])
        if current_bits[0] == 0:
            # last byte in current length
            packet_lengths.append(current_packet_size_bits.uint)
            # clear bits to prep for next packet length
            current_packet_size_bits.clear()
    return packet_lengths

In [6]:
def sort_packets_by_rlcp(sot_indexes, sod_indexes, packet_lengths, levels, layers, components):
    # Input: list of ints, each representing the index of each tile's SOT marker, in the order the tiles apear
    # list of ints, each representing the langth of a packet, in the order the packets apear
    # number of resolution levels in compressed image
    # number of quality layers in compressed image
    # number of components in compressed image
    
    # Output: reordered_packets (list) - a nested list containing packets sorted into rlcp progression order
    # structured as follows: reordered_packets[resolution levels][tiles][quality layers][components]
    # NOTE: levels are ordered like this: R5, R4, R3, R2, R1, R0
    
    num_tiles = len(sod_indexes)
    # first, set up an approprately sized data structure to populate, initialized with zeros
    # reordered_packets[resolution levels][tiles][quality layers][components]
    reordered_packets = [[[[0 for c in range(components)] for q in range(layers)] for t in range(num_tiles)] for r in range(levels+1)]

    # get data for each packet
    for tile in range(num_tiles):
        current_packet = 0
        # packets start immediately after the SOD marker, which is 2 bytes long
        current_sod_index = sod_indexes[tile] + 2 
        relative_packet_index = 0
        # test: want sod index relative to tile start
        current_sod_index_within_tile = sod_indexes[tile] - sot_indexes[tile] + 2
        for layer in range(layers):
                for level in range(levels+1):
                    for component in range(components):
                        # Create Packet object and add to reordered_packets list in the appropriate location
                        reordered_packets[level][tile][layer][component] = Packet(current_sod_index+relative_packet_index, current_sod_index_within_tile+relative_packet_index, packet_lengths[tile][current_packet], tile, layer, level, component)
                        relative_packet_index += packet_lengths[tile][current_packet]
                        current_packet += 1

    return reordered_packets

In [7]:
# to "sparkify" this, all we need to do is to remove references to "codestream" or "codestream_object",
# since these will not be available 
# however, we do have the main header and tile headers, separate from the data
# could simply pass these in, and change get_sot_indexes and get_all_Iplts to work with these inputs

In [8]:
def get_sot_indexes_from_main_header(main_header):
    # Input: main_header (bytes) - the main heaader of the original compressed codestream
    # Output: list of ints, each representing the index of each tile's SOT marker, in the order the tiles apear
    main_header_object = CodestreamParser(main_header)
    tlm = find_first_marker(main_header_object, TLM)
    # get first SOT index
    sot_indexes = [len(main_header)]
    # sot idex for tile m = MainHeaderLength + m-1∑i=0 Ptlm(i)
    for i in range (1, tlm.num_tile_parts):
        sot_indexes.append(sot_indexes[i-1] + tlm.Ptlm(i-1))
    return sot_indexes

In [9]:
def get_sod_indexes_from_tile_headers(sot_indexes, tile_headers):
    sod_indexes = []
    for i, tile_header in enumerate(tile_headers):
        tile_header_object = CodestreamParser(tile_header)
        sod_marker = find_first_marker(tile_header_object, SOD)
        sod_indexes.append(sot_indexes[i] + sod_marker.offset)
    return sod_indexes

In [10]:
def get_all_Iplts_from_tile_headers(tile_headers):
    # Input: tile_headers (list) - list of bytes objects containing the tile headers, in the order they apear within the codestream
    # Output: list of bytes objects, each containing the Iplt for each tile, in raster order
    Iplt_markers = []
    for tile_header in tile_headers:
        tile_header_object = CodestreamParser(tile_header)
        plt_marker = find_first_marker(tile_header_object, PLT)
        plt = plt_marker.to_bytes()
        # Iplt always starts at the 6th byte of the PLT marker, and is the last segment of this marker
        Iplt_markers.append(plt[5:])
    return Iplt_markers

In [11]:
def reorder_packets_from_npje_to_epje_using_headers(main_header, tile_headers, levels, layers, components):
    # Input: main_header (bytes) - the main heaader of the original compressed codestream
    # tile_headers (list) - list of bytes objects containing the tile headers, in the order they apear within the codestream
    # levels (int) - number of resolution levels in compressed image
    # layers (int) - number of quality layers in compressed image
    # components (int) - number of components in compressed image
    
    # Output: reordered_packets (list) - a nested list containing packets sorted into lrcp progression order
    # structured as follows: reordered_packets[resolution levels][tiles][quality layers][components]
    # NOTE: levels are ordered like this: R5, R4, R3, R2, R1, R0

    # get all sot (start of tile) locations
    sot_indexes = get_sot_indexes_from_main_header(main_header)
    # get all Iplt markers, which contain the packet lengths
    Iplt_markers = get_all_Iplts_from_tile_headers(tile_headers)
    # now we have a Iplt for each tile
    # next we need to parse the Iplts to get the individual packet lengths
    packet_lengths = [
        get_packet_lengths(Iplt) for Iplt in Iplt_markers
    ]
    # finally, we need to sod indexes in order to calculate packet offsets
    sod_indexes = get_sod_indexes_from_tile_headers(sot_indexes, tile_headers)
    # now we can use the sot indexes and packet lengths to calculate the packet indices, 
    # and re-order them into RLCP progression order
    reordered_packets = sort_packets_by_rlcp(sot_indexes, sod_indexes, packet_lengths, levels, layers, components)
    return reordered_packets

In [12]:
main_header = codestream[:187]
tile_headers = [codestream[187:390], codestream[835912:835912+203], codestream[1671637:1671637+203], codestream[2507362:2507362+203]]
tiles = [codestream[187:835912], codestream[835912:1671637], codestream[1671637:2507362], codestream[2507362:]]
reordered_packets_spark_version = reorder_packets_from_npje_to_epje_using_headers(main_header, tile_headers, levels, layers, components)

In [17]:
for packet_list in reordered_packets_spark_version[2][0]:
    for packet in packet_list:
        packet.print_all_attributes()

codestream_index: 7541, tile_index: 7354, length: 8253, tile: 0, quality layer: 0, resolution: 2, component: 0
codestream_index: 28125, tile_index: 27938, length: 4081, tile: 0, quality layer: 1, resolution: 2, component: 0
codestream_index: 51845, tile_index: 51658, length: 5013, tile: 0, quality layer: 2, resolution: 2, component: 0
codestream_index: 101083, tile_index: 100896, length: 11422, tile: 0, quality layer: 3, resolution: 2, component: 0
codestream_index: 196450, tile_index: 196263, length: 2662, tile: 0, quality layer: 4, resolution: 2, component: 0
codestream_index: 392595, tile_index: 392408, length: 26, tile: 0, quality layer: 5, resolution: 2, component: 0
codestream_index: 471016, tile_index: 470829, length: 4, tile: 0, quality layer: 6, resolution: 2, component: 0
codestream_index: 549476, tile_index: 549289, length: 4, tile: 0, quality layer: 7, resolution: 2, component: 0
codestream_index: 628074, tile_index: 627887, length: 4, tile: 0, quality layer: 8, resolution:

In [None]:
"""
spark design: 
-need function that creates all tile parts for 1 given tile
    -can apply this function to every tile (map to rdd)
    -function will take in tile, reordered_packets, levels, layers, components
    -fucntion will output list of tile parts (includes tile part header and data)
        -len(list) = levels+1
-will need separate function to create new main header

"""

In [None]:
# should we assume that any resolution reduction has been done by this point?
# if resolution reduction truly is as simple as deleting certain resolution levels, this
# operation will be as trivial as removing items from a list
# decision: input "levels" into function below should reflect desired levels of thumbnail, not the original input levels

In [46]:
def create_tile_parts(compressed_tile, reordered_packets, levels, layers, components, tile_index):
    # reordered_packets[resolution levels][tiles][quality layers][components]
    # NOTE: levels are ordered like this: R5, R4, R3, R2, R1, R0
    
    # will create 1 tile part per resolution level: Rlevels-R0
    tile_part_list = []

    for level in range(levels+1):
        # flatten list of packets
        tile_part_packets = [layer for layers in reordered_packets[level][tile_index] for layer in layers]
        tile_part_list.append(tile_part_packets)
        
        
    return tile_part_list

In [47]:
tile_index = 0
tile_part_list = create_tile_parts(tiles[tile_index], reordered_packets_spark_version, levels, layers, components, tile_index)

In [56]:
for packet in tile_part_list[3]:
    packet.print_all_attributes()

codestream_index: 15794, tile_index: 15607, length: 6601, tile: 0, quality layer: 0, resolution: 3, component: 0
codestream_index: 32206, tile_index: 32019, length: 7909, tile: 0, quality layer: 1, resolution: 3, component: 0
codestream_index: 56858, tile_index: 56671, length: 13441, tile: 0, quality layer: 2, resolution: 3, component: 0
codestream_index: 112505, tile_index: 112318, length: 22418, tile: 0, quality layer: 3, resolution: 3, component: 0
codestream_index: 199112, tile_index: 198925, length: 26529, tile: 0, quality layer: 4, resolution: 3, component: 0
codestream_index: 392621, tile_index: 392434, length: 7586, tile: 0, quality layer: 5, resolution: 3, component: 0
codestream_index: 471020, tile_index: 470833, length: 55, tile: 0, quality layer: 6, resolution: 3, component: 0
codestream_index: 549480, tile_index: 549293, length: 597, tile: 0, quality layer: 7, resolution: 3, component: 0
codestream_index: 628078, tile_index: 627891, length: 158, tile: 0, quality layer: 8, 

In [None]:
def create_tile_part_header(reordered_packets):
    # According to BP_J2K_01.10, the following tile part header segments are needed:
    # SOT, PLT, SOD
    
    # SOT = b"\xFF\x90"
    # Lsot = b"\x00\x0A"
    # Isot: tile index
    # Psot: length (in bytes) from beginning of first byte of SOT marker to end of this tile part
    # TPsot: tile part index for this tile
    # TNsot: total number of tile parts in the codesteam for this tile
    
    # PLT = b"\xFF\x58"
    # Lplt: length of PLT segment, not including the initial marker (b"\xFF\x58")
    # Zplt: index of this marker segment relative to all other PLT marker segments present in this tile part header
    # Iplt-i: length of each packet i (make a function to create these, max_Iplt_length = 65532 bytes so might need more than one)
    
    # SOD = b"\xFF\x93"
    

    return