# File signatures

The [file signatures](https://en.wikipedia.org/wiki/List_of_file_signatures) are portion of data used to identify or verify the content of a file. Such signatures are also known as magic numbers or Magic Bytes


| Filename extension | Hex signature           |
|--------------------|-------------------------|
|                png | 89 50 4E 47 0D 0A 1A 0A |
|                jpg | FF D8 FF                |
|               jpeg | FF D8 FF                |
|                bmp | 42 4D                   |
|                gif | 47 49 46 38             |

In [8]:
import base64
import binascii

HEADER_SIZE = 8 # bytes
test_images = ['logistic_function.png' 
    , 'pink_primose.jpg'         
    , 'pippo_txt.jpg'
    , 'pink_primose_copy.jpeg'     
    , 'sunflower.bmp'              
    , 'test.gif'     
    ]

dict_extension_signature = {'png' : '89504e470d0a'
    , 'jpg' : 'ffd8ff'
    , 'jpeg' : 'ffd8ff'
    , 'bmp' : '424d'
    , 'gif' : '47494638'
    }

def check_extension_match_signature(filepath, header_bytes = 8):
    '''
    Check if the image extension matches the file signature

    Arguments:
        filepath (str): filename 

    Returns:
        match (boolean): True if extension and signature mathces and False otherwise
    '''
    filename, extension = filepath.rsplit('.', 1)
    with open(filepath, 'rb') as image_file:
        image_read = image_file.read()
        header_byte = image_read[0:header_bytes]
        header_hex = header_byte.hex()
        header_base64 = base64.b64encode(header_byte).decode()
        signature_hex = dict_extension_signature[extension]
        signature_byte = bytearray.fromhex(signature_hex)
        signature_base64 = base64.b64encode(signature_byte).decode()
        
        print(header_byte)
        print(header_hex)
        print(header_base64)
        print(signature_byte)
        print(signature_hex)
        print(signature_base64)

        if signature_hex == header_byte.hex()[0:len(signature_hex)]:
            return True
        else:
            return False


for image_name in test_images:
    filepath = 'images/' + image_name
    with open(filepath, 'rb') as image_file:
        image_read = image_file.read()
        header_byte = image_read[0:HEADER_SIZE]
        header_hex = header_byte.hex()
        header_base64 = base64.b64encode(header_byte)
        print('images/' + image_name)
        print('Extension match signature: {}'.format(check_extension_match_signature(filepath)))
    print()

images/logistic_function.png
b'\x89PNG\r\n\x1a\n'
89504e470d0a1a0a
iVBORw0KGgo=
AAAAAAAAAAA 89504e470d0a1a0a
bytearray(b'\x89PNG\r\n')
89504e470d0a
iVBORw0K
Extension match signature: True

images/pink_primose.jpg
b'\xff\xd8\xff\xe0\x00\x10JF'
ffd8ffe000104a46
/9j/4AAQSkY=
AAAAAAAAAAA ffd8ffe000104a46
bytearray(b'\xff\xd8\xff')
ffd8ff
/9j/
Extension match signature: True

images/pippo_txt.jpg
b'joniubvp'
6a6f6e6975627670
am9uaXVidnA=
AAAAAAAAAAA 6a6f6e6975627670
bytearray(b'\xff\xd8\xff')
ffd8ff
/9j/
Extension match signature: False

images/pink_primose_copy.jpeg
b'\xff\xd8\xff\xe0\x00\x10JF'
ffd8ffe000104a46
/9j/4AAQSkY=
AAAAAAAAAAA ffd8ffe000104a46
bytearray(b'\xff\xd8\xff')
ffd8ff
/9j/
Extension match signature: True

images/sunflower.bmp
b'BM\xf6z\x10\x00\x00\x00'
424df67a10000000
Qk32ehAAAAA=
AAAAAAAAAAA 424df67a10000000
bytearray(b'BM')
424d
Qk0=
Extension match signature: True

images/test.gif
b'GIF89a\xb5\x02'
474946383961b502
R0lGODlhtQI=
AAAAAAAAAAA 474946383961b502
bytearray

'pippo_txt.jpg' was a .txt then I have changed the extension in .jpg: the check on the signature does reveal that the file is not a .jpg as expected