In [161]:
import os
from library.pyMagicBytes import *
from PIL import Image, ImagePalette


currently_handled = {
    0:"Global Palette",
    1:"GIF Animated",
    2:"Hidden Frames"
}

# Ending data sub-block + trailer + should search for 
end_of_format = "003B"
# Hidden frames sublock + trailer + new block
hidden_frames = "003B21f9"

zip_format = "504B0304"


file_path = "../data/raw/hiddenZip.gif"
output_dir = "../data/processed/"

In [162]:
def count_byte_sequence_in_image(img: Image.Image, search_bytes: str):
    with open(img.filename, 'rb') as f:
        img_data = f.read()

    hex_data = img_data.hex().upper()

    count = hex_data.count(search_bytes.upper())
    return count

In [163]:
def extract_zip(img: Image.Image):
    with open(img.filename, 'rb') as f:
        img_data = f.read()

    hex_data = img_data.hex().upper()

    count = hex_data.count((end_of_format+zip_format).upper())
    
    search_pattern = (end_of_format + zip_format).upper()
    
    start_idx = hex_data.find(search_pattern)
    if start_idx == -1:
        return 0
    
    # Convert the starting index from hexadecimal string index to byte index
    byte_start_idx = start_idx // 2
    
    # Extract data from the start index to the end
    extracted_data = img_data[byte_start_idx+2:]
    
    # Save the extracted data to the output file
    with open(os.path.join(output_dir, "hidden_magic_byte.zip"), 'wb') as output_file:
        output_file.write(extracted_data)
        
    return count

In [None]:
def hidden_magic_byte(img: Image.Image):
    with open(img.filename, 'rb') as f:
        img_data = f.read()

    hex_data = img_data.hex().upper()
    
    start_idx = hex_data.find(end_of_format.upper())

    if start_idx == -1:
        return 0
    
    byte_start_idx = start_idx // 2
    
    extracted_data = img_data[byte_start_idx:]
    print(extracted_data)

    obj = HexStringObject(extracted_data).getPossibleTypes()

    i=0
    for value in obj:
        if i==0:
            bytes_offset = int(value[0][1])
            #This bytes offset sucks, TOFIX
            extracted_data = extracted_data[bytes_offset:]
            print(f'Extracting hidden data found using magic bytes to "{value[3][1]}.{value[2][1]}"')

            # Save the extracted data to the output
            with open(os.path.join(output_dir, f"{value[3][1]}.{value[2][1]}"), 'wb') as output_file:
                output_file.write(extracted_data)  
        
        i+=1
    
    return obj

In [165]:
def get_image_info(img:Image.Image):
    can_fix=[]

    obj = FileObject(file_path)
    
    metadata = {
        "format": img.format,
        "size": img.size,
        "mode": img.mode,
        "n_frames": getattr(img, "n_frames", 1),
        "is_animated": getattr(img, "is_animated", False),
        "has_global_palette": bool(img.info.get("palette")),
        "sub_block_trailer": count_byte_sequence_in_image(img, end_of_format),
        "hidden_gif_frames": count_byte_sequence_in_image(img, hidden_frames),
        "hidden_magic_byte": hidden_magic_byte(img),
        "is_it_really_gif_tho?": obj.getPossibleTypes(ReturnArray=True), 
    }

    extract_zip(img)
    
    print("GIF Metadata:")
    for key, value in metadata.items():
        print(f"  {key.replace('_', ' ').title()}: {value}")

    # Identify issues
    if not metadata["is_animated"]:
        print("This GIF is not animated.")
        can_fix.append(1)
    else:
        print("This GIF is animated.")

    if not metadata["has_global_palette"]:
        print("Global Palette: Not Present")
        can_fix.append(0)
    else:
        print("Global Palette: Present")
        
    if metadata["hidden_gif_frames"] != 0:
        can_fix.append(2)
    else:
        print("No hidden gif frames detected")
    
    return {
        "metadata": metadata,
        "fixable_issues": can_fix
    }

In [166]:
def fix_hidden_frames(img: Image.Image, dest: str):
    with open(img.filename, 'rb') as f:
        img_data = f.read()

    hex_data = img_data.hex().upper()

    # Remove trailing 3B
    img_fixed_hex = hex_data.replace(hidden_frames.upper(), "0021f9")
    
    img_fixed_bytes = bytes.fromhex(img_fixed_hex)

    with open(f"{dest}_hidden_frames.gif", 'wb') as f:
        f.write(img_fixed_bytes)

In [167]:
def fix_global_palette(img: Image.Image, dest: str):
    rd_palette = ImagePalette.random()
    bw_palette = [i for i in range(256) for _ in range(3)]
    bw_palette[0:3] = [255, 255, 255]
    bw_palette = ImagePalette.ImagePalette(mode="RGB", palette=bw_palette)
    
    if img.is_animated:
        frames_rd = []
        frames_bw = []
        
        total_frames = img.n_frames
        
        # Iterate through all frames using seek and tell as written in PILLOW doc
        # https://pillow.readthedocs.io/en/latest/handbook/image-file-formats.html#gif
        for frame_index in range(total_frames):
            img.seek(frame_index)
            frame_rd = img.copy()
            frame_bw = img.copy()
            
            if frame_rd.mode != "P":
                frame_rd = frame_rd.convert("P")
                frame_bw = frame_bw.convert("P")

            # Random color palette
            frame_rd.putpalette(rd_palette.palette) 
            frames_rd.append(frame_rd)
            
            # Black color palette
            frame_bw.putpalette(bw_palette.palette)
            frames_bw.append(frame_bw)


        img_rd = frames_rd[0]
        img_rd.save(f"{dest}_animated_random.gif", save_all=True, append_images=frames_rd[1:], loop=0, duration=img.info['duration'])
        
        img_bw = frames_bw[0]
        img_bw.save(f"{dest}_animated_blackwhite.gif", save_all=True, append_images=frames_bw[1:], loop=0, duration=img.info['duration'])
    
    else:
        if img.mode != "P":
            img = img.convert("P")
            
        img.putpalette(rd_palette.palette)
        img.save(f"{dest}_random.gif")
        
        img.putpalette(bw_palette.palette)
        img.save(f"{dest}_blackwhite.gif")

    return img

In [168]:
def analyze_gif_with_pillow(file_path):
    with Image.open(file_path) as img:
        if img.format != "GIF":
            return -1
        
        result = get_image_info(img)
        fixable_issues = result["fixable_issues"]
        
        if fixable_issues == None:
            return 0

        for issue in fixable_issues:
            if issue == 0:
                print(f"Fixing: {currently_handled[issue]}")
                fix_global_palette(img, os.path.join(output_dir, f"FIXED_{currently_handled[0]}"))
    
                
            if issue == 1:
                print(f"Fixing: {currently_handled[issue]}")
                #fixed_img = fix_global_palette(img)
                #fixed_img.save(os.path.join(output_dir, f"FIXED_{currently_handled[0]}_{original_name}.gif"))
            
            if issue == 2:
                print(f"Fixing: {currently_handled[issue]}")
                fix_hidden_frames(img, os.path.join(output_dir, f"FIXED_{currently_handled[0]}"))
            

analyze_gif_with_pillow(file_path)


b'\x00\x03\xb4.\x10\x80\xad\x0b\xe0\xaakF\xbc^\xe2kA\xc0N*,\x10\xce6\xb7\x11&\xb8\x92\xbaLk\x82\xa1\xbb\xa02t\xcc\x97\xaa\x19\xbb\xb4\xe4\x8d\xbd\x0c\x94wQ\xa5\xd3\x00D\x88\xb8\xe3\xa2\x10\xc5Q\xe9\xcc\x07l\x86\x06\xce Ik\xaf]\x8b\x8b\x7f\xe3\xf8\xd7!4e\\Ajb\x07\xef;\xa3\xc6\x01L\xd6dn\xc02\xf0GKx\xa1`I\xb8\x80\x1a\x1c\xee\x8f=,\xfc\x18>\rA\x8c\x84\xc4\xfeQ\x9c\x85\xc5)\x86\x0b\xf0\xc6\xfbv,\xdb\xc7\xe1\x8a\x1ckF%\xab\x82@\xae\xc0\xa6\xac3\x8c\t\xc3\x103\xc3\xdf4h\xcd\'\xdd\x84\xb6JO\xee\x1a1\x1e\xaa:k\xcc\xf3\xbeg(\xd0\xa4\x02\x9c\x1d\x14vF\x81}J\x82%\x89\xcdk0\xc7\xbb\xec\x80\xf5\xa20i\x91\xc8\xd1#)\xb2\n\xde\xdd\xb0\xebJ\xbf\x19\x7fMc~0\xfe\x01\xb0`\'\xf0\xe1\rv\x8c^(\xf7-n\xd5-\xea\xdd\xb7\xb0\xb2\xb7\xcc~_\xf2M\xff\xb8\xb8\xdd7x\x03d#e\xb8\xc6\x88\x9b\xa0\xf8(\x8c\xc3\xe78\x0ev5\xd7\x9e\x16W\xe7\xed\xc7\xde\xb6\xa0\xdb\x1a\xd7\x81W6\xe3\xe7\x19\xb6cc\xaadF=/\x00Vn0\xc6\xe2;S\xaa1\xeb\x90\xa7\xf8\xba\xa8=\xd1L;\xe4D\x8a\xc3\x91\x80\xfb\x0e\xeey\xe8\xb5\x95\'T\xf5\xa8*K<\x9e\x18\xd0