# Getting the PNG images into numpy arrays

In [24]:
import os
import numpy as np
import tensorflow as tf

from PIL import Image

In [25]:
IMG_SIZE = 128

def get_label(file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    if parts[-2] == "goodware":
        return [0]
    else:
        return [1]

def get_image(path_img):
    image = np.asarray(Image.open(path_img))
    image = tf.convert_to_tensor(image, dtype_hint=None, name=None)
    return image


def get_shape(image):
    return image.shape[0]


def decode_img(path_img):
    image = tf.numpy_function(get_image, [path_img], tf.uint8)
    shape = tf.numpy_function(get_shape, [image], tf.int64)
    image = tf.reshape(image, [shape, 1, 1])
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, [IMG_SIZE * IMG_SIZE, 1])
    return tf.reshape(image, [IMG_SIZE * IMG_SIZE, 1])


def process_path(file_path):
    label = get_label(file_path)
    img = decode_img(file_path)
    return img, label

In [27]:
# Conversion of 1 image with DexRay code
dex_image1 = get_image("/scratch/users/mbenali/download_apk/images/malware/7F87D3C14086DAD9ECEBD36EBD7D99640942F6386323E99143259C2E83E5DA79.png")
dex_image1_shape = tf.numpy_function(get_shape, [dex_image1], tf.int64)

image = tf.reshape(dex_image1, [dex_image1_shape, 1, 1])
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [IMG_SIZE * IMG_SIZE, 1])
resized_dex_image1 = tf.reshape(image, [IMG_SIZE * IMG_SIZE, 1])

print(resized_dex_image1.shape)

(16384, 1)


A random goodware hash: 8219FD7332A38E465CFCD3500FB1DC1EBBA00E7BA7C9616AD0704A5817DA52FD

GPT function to do the resizing on numpy arrays manually since the pytorch function doesn't exist: 

In [None]:
from scipy.ndimage import zoom

IMG_SIZE = 128  # Replace with your desired image size

def resize_image(image, target_height, target_width):
    """Resize the image to the target dimensions using interpolation."""
    current_height, current_width = image.shape[:2]
    zoom_factors = (target_height / current_height, target_width / current_width)
    resized_image = zoom(image, zoom_factors + (1,) if image.ndim == 3 else zoom_factors, order=1)  # Bilinear interpolation
    return resized_image

def get_image(path_img):
    """Reads an image from the given path and converts it to a NumPy array."""
    image = np.asarray(Image.open(path_img))
    return image

def get_shape(image):
    """Returns the height (first dimension) of the image."""
    return image.shape[0]

def decode_img(path_img):
    """Processes an image: loads, normalizes, resizes, and reshapes it."""
    image = get_image(path_img)  # Load the image as a NumPy array
    shape = get_shape(image)  # Get the height of the image
    
    # Normalize to [0, 1]
    image = image.astype(np.float32) / 255.0
    
    # Resize the image to (IMG_SIZE, IMG_SIZE)
    image = resize_image(image, IMG_SIZE, IMG_SIZE)
    
    # Reshape to final format
    return image.reshape(IMG_SIZE * IMG_SIZE, 1)

In [None]:
npy_image = decode_img("/scratch/users/mbenali/download_apk/images/goodware/8219FD7332A38E465CFCD3500FB1DC1EBBA00E7BA7C9616AD0704A5817DA52FD.png")

print(npy_image.shape, npy_image[0])

(16384, 1) [0.39215687]


This resized image should now be the exact same as one that's obtained by DexRay code.

In [None]:
dex_image1_npy = resized_dex_image1.numpy()
print(dex_image1_npy.shape)
print(dex_image1_npy == npy_image)

(16384, 1)
[[False]
 [False]
 [False]
 ...
 [False]
 [ True]
 [ True]]


In [None]:
print(dex_image1_npy, npy_image)

[[0.00804036]
 [0.22706372]
 [0.38615534]
 ...
 [0.06666667]
 [0.        ]
 [0.        ]] [[0.39215687]
 [0.39215687]
 [0.39215687]
 ...
 [0.        ]
 [0.        ]
 [0.        ]]


## Compare generated images with DexRay

A random malware hash: 7F87D3C14086DAD9ECEBD36EBD7D99640942F6386323E99143259C2E83E5DA79

In [32]:
from_npy_files = np.load("/scratch/users/mbenali/download_apk/npy2/malware/7F87D3C14086DAD9ECEBD36EBD7D99640942F6386323E99143259C2E83E5DA79.npy")

from_npy_files

array([[0.38309494],
       [0.33188623],
       [0.71744466],
       ...,
       [0.        ],
       [0.        ],
       [0.        ]], dtype=float32)

In [34]:
resized_dex_image1.numpy() == from_npy_files

array([[ True],
       [ True],
       [ True],
       ...,
       [ True],
       [ True],
       [ True]])