In [33]:
import os
import numpy as np
import pandas as pd
import cv2
import mahotas
from skimage.feature import local_binary_pattern
from tqdm.notebook import tqdm


In [34]:
def bytes_to_image(file_path, width=256):
    with open(file_path, 'r') as f:
        byte_values = []
        for line in f:
            parts = line.strip().split()[1:]  # skip the address
            for b in parts:
                if b != '??':
                    byte_values.append(int(b, 16))
    # Pad to make array square-shaped
    byte_array = np.array(byte_values, dtype=np.uint8)
    height = int(np.ceil(len(byte_array) / width))
    padded = np.pad(byte_array, (0, height * width - len(byte_array)), 'constant', constant_values=0)
    img = padded.reshape((height, width))
    return img


In [35]:
def extract_haralick_features(img):
    # Convert to 8-bit grayscale
    return mahotas.features.haralick(img).mean(axis=0)  # mean of all directions (13 features)


In [36]:
def extract_lbp_features(img, P=8, R=1):
    lbp = local_binary_pattern(img, P, R, method='uniform')
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)  # normalize
    return hist  # shape: (10,) for uniform LBP


In [37]:
haralick_features = []
lbp_features = []
file_ids = []

for filename in tqdm(os.listdir('.')):
    if filename.endswith('.bytes'):
        file_id = filename.replace('.bytes', '')
        file_path = os.path.join('.', filename)
        try:
            img = bytes_to_image(file_path)
            h_feat = extract_haralick_features(img)
            l_feat = extract_lbp_features(img)
        except Exception as e:
            h_feat = np.zeros(13)
            l_feat = np.zeros(10)
        haralick_features.append(h_feat)
        lbp_features.append(l_feat)
        file_ids.append(file_id)


  0%|          | 0/11 [00:00<?, ?it/s]

In [38]:
# Haralick (IMG1)
df_img1 = pd.DataFrame(haralick_features, columns=[f'haralick_{i}' for i in range(13)])
df_img1.insert(0, 'Id', file_ids)
df_img1.to_csv('features/img1_haralick.csv', index=False)

# LBP (IMG2)
df_img2 = pd.DataFrame(lbp_features, columns=[f'lbp_{i}' for i in range(10)])
df_img2.insert(0, 'Id', file_ids)
df_img2.to_csv('features/img2_lbp.csv', index=False)
