In [None]:

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight


In [None]:

# Load the data and preprocess 'Bytes' column
data = pd.read_csv('shuffled_file_paths.csv')
X = []

for bytes_data in data['Bytes']:
    # Convert hex or integer to byte array format
    if isinstance(bytes_data, int):
        byte_array = bytes([bytes_data])
    elif isinstance(bytes_data, str):
        byte_array = bytes.fromhex(bytes_data)
    else:
        byte_array = bytes(bytes_data)
    # Convert bytes to a scaled array of floats
    X.append(np.frombuffer(byte_array, dtype=np.uint8) / 255.0)

X = np.array(X)
y = data['Target'].values  # Extract target labels

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Define model architecture with increased complexity and L2 regularization
from tensorflow.keras.regularizers import l2

input_shape = X_train.shape[1]
model = Sequential([
    Dense(512, activation='relu', kernel_regularizer=l2(0.01), input_shape=(input_shape,)),
    Dropout(0.3),
    Dense(256, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.3),
    Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])


In [None]:

# Compile the model with Adam optimizer and lower initial learning rate
model.compile(optimizer=Adam(learning_rate=0.0005),
              loss='binary_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

# Adjusted class weights for improved accuracy
class_weights = {0: 3, 1: 1}


In [None]:

# Define learning rate scheduler and early stopping
def lr_scheduler(epoch, lr):
    return lr * 0.5 if (epoch % 5 == 0 and epoch) else lr

callbacks = [
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    LearningRateScheduler(lr_scheduler)
]


In [None]:

# Training configuration
batch_size = 64
epochs = 15

# Train the model with new settings
history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    batch_size=batch_size,
                    epochs=epochs,
                    class_weight=class_weights,
                    callbacks=callbacks)
