## Import library

In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

## Read dataset

In [None]:
dataset = pd.read_csv("./Ransomware_headers.csv")
display(dataset)

## Filter dataset and assign to rans_df and benign_df in a single statement

In [None]:
rans_df, benign_df = (dataset[dataset["GR"] > 0], dataset[dataset["GR"] == 0])

# Display to verify results
rans_df.shape, benign_df.shape

In [None]:
class_index = {
    1: 'Avaddon',
    2: 'Babuk',
    3: 'Blackmatter',
    4: 'Conti',
    5: 'Darkside',
    6: 'Dharma',
    7: 'Doppelpaymer',
    8: 'Exorcist',
    9: 'Gandcrab',
    10: 'Lockbit',
    11: 'Makop',
    12: 'Maze',
    13: 'Mountlocker',
    14: 'Nefilim',
    15: 'Netwalker',
    16: 'Phobos',
    17: 'Pysa',
    18: 'Ragnarok',
    19: 'RansomeXX',
    20: 'Revil',
    21: 'Ryuk',
    22: 'Stop',
    23: 'Thanos',
    24: 'Wastedlocker',
    25: 'Zeppelin',
}

# Get the unique family values
unique_families = dataset["family"].unique()
print(f"Unique families: {unique_families}")
print(f"Total number of unique families: {len(unique_families)}")

## Convert columns 0 to 1024 as a features

In [None]:
rans_features = rans_df.iloc[:, 4:]
benign_features = benign_df.iloc[:, 4:]

rans_features.head(), benign_features.head()

## Normalize features to the range [0, 1]

In [None]:
rans_norm = rans_features.apply(lambda x: x / 255.0)
benign_norm = benign_features.apply(lambda x: x / 255.0)

print(rans_norm, benign_norm)

## Normalize features to the range [0, 1] using MixMaxScaler

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))

rans_normalized = scaler.fit_transform(rans_features)
benign_normalized = scaler.fit_transform(benign_features)

rans_features_normalized = pd.DataFrame(rans_normalized, columns=rans_features.columns)
benign_features_normalized = pd.DataFrame(benign_normalized, columns=benign_features.columns)

print(rans_features_normalized, benign_features_normalized)

## Convert each row into a 2D array of 32x32

In [None]:
rans_feature_2d = rans_norm.apply(lambda x: x.values.reshape(32, 32), axis=1)
benign_feature_2d = benign_norm.apply(lambda x: x.values.reshape(32, 32), axis=1)

rans_feature_normalized_2d = rans_features_normalized.apply(lambda x: x.values.reshape(32, 32), axis=1)
benign_feature_normalized_2d = benign_features_normalized.apply(lambda x: x.values.reshape(32, 32), axis=1)

rans_feature_2d.head(), benign_feature_2d.head(), rans_feature_normalized_2d.head(), benign_feature_normalized_2d.head()

In [None]:
def plot_heatmap(feature_2d, title):
    plt.figure(figsize=(8, 8))
    sns.heatmap(feature_2d, cmap="Spectral_r", vmin=0, vmax=1, cbar=True)
    plt.title(title)
    plt.show()
    
plot_heatmap(rans_feature_2d.iloc[0], title="Ransomware sample heatmap")

In [None]:
def plot_heatmap(feature_2d, title):
    plt.figure(figsize=(8, 8))
    sns.heatmap(feature_2d, cmap="Spectral_r", vmin=0, vmax=1, cbar=True)
    plt.title(title)
    plt.show()
    
plot_heatmap(rans_feature_normalized_2d.iloc[0], title="Ransomware sample heatmap")

# Function to plot and save a heatmap for a single 2D vector

In [None]:
rans_processed = rans_df[['filename', 'GR', 'family']]
benign_processed = benign_df[['filename', 'GR', 'family']]

def plot_and_save_heatmap(feature_2d, filename, save_dir, img_size=(333, 333)):
    # Create directory if it doesn't exist
    os.makedirs(save_dir, exist_ok=True)

    # Set the desired figure size based on img_size and dpir
    dpi = 100  # Set dpi for high-quality image
    figsize = (img_size[0] / dpi, img_size[1] / dpi)  # Calculate figsize in inches

    # Plot the heatmap
    plt.figure(figsize=figsize)
    sns.heatmap(feature_2d, cmap="Spectral_r", vmin=0, vmax=1, cbar=False, square=True, xticklabels=False, yticklabels=False)

    # Save the figure to the specified directory with the given filename
    filepath = os.path.join(save_dir, f"{filename}.png")
    plt.savefig(filepath, dpi=100, bbox_inches='tight', pad_inches=0)  # Save with specified dpi
    plt.close()  # Close the plot to free memory

    print(f"Saved heatmap as {filepath}")
    return filepath

rans_images_path = []
for i in range(0, rans_feature_2d.size):
    filepath = plot_and_save_heatmap(rans_feature_2d.iloc[i], filename=rans_df.iloc[i]["filename"], save_dir="ransomware_images")
    rans_images_path.append(filepath)
    
benign_images_path = []
for i in range(0, benign_feature_2d.size):
    filepath = plot_and_save_heatmap(benign_feature_2d.iloc[i], filename=benign_df.iloc[i]["filename"], save_dir="benign_images")
    benign_images_path.append(filepath)
    
rans_processed['images_path'] = rans_images_path
benign_processed['images_path'] = benign_images_path

In [None]:
rans_processed.isnull().sum()

## Make training and validation folders for split the data

In [None]:
from sklearn.model_selection import train_test_split

train_rans, val_rans = train_test_split(rans_processed, test_size=0.2, random_state=42, stratify=rans_processed['family'])
train_benign, val_benign = train_test_split(benign_processed, test_size=0.2, random_state=42)

print(train_rans.shape, val_rans.shape, train_benign.shape, val_benign.shape)

In [14]:
import numpy as np

base_train_dir = 'data/train'
base_val_dir = 'data/validation'

unique_families = np.unique(np.concatenate((train_benign['family'].unique(), train_rans['family'].unique())))

for family in unique_families:
    os.makedirs(os.path.join(base_train_dir, str(family)), exist_ok=True)
    os.makedirs(os.path.join(base_val_dir, str(family)), exist_ok=True)

In [None]:
import shutil

def move_files(dataframe, base_dir):
    for _, row in dataframe.iterrows():
        print(row)
        source_path = row['images_path']  # Path to the image file
        family = row['family']            # Family (subfolder name)
        
        # Construct the destination path
        dest_path = os.path.join(base_dir, str(family), os.path.basename(source_path))
        
        # Move file to the destination
        shutil.move(source_path, dest_path)
        
move_files(train_benign, base_train_dir)
move_files(val_benign, base_val_dir)

move_files(train_rans, base_train_dir)
move_files(val_rans, base_val_dir)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

model = Sequential([
    BatchNormalization(input_shape=(256,256,3)),

    Conv2D(64, (3, 3), activation='relu', padding='valid'),

    MaxPooling2D(pool_size=(2, 2)),

    Dropout(0.3),

    Conv2D(128, (3, 3), activation='relu', padding='valid'),

    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),

    Dropout(0.5),

    Dense(16, activation='relu'),

    BatchNormalization(),

    Dense(26, activation='softmax')
])

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=['accuracy']
)

model.summary()

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(
    rescale=1.0 / 255
)

train_generator = train_datagen.flow_from_directory(
    'data/train',
    target_size=(256, 256),
    batch_size=32,             # Adjust batch size as needed
    class_mode='categorical'    # Use 'categorical' for multi-class classification
)

# Load validation data from directory
validation_generator = validation_datagen.flow_from_directory(
    'data/validation',         # Path to the validation data directory
    target_size=(256, 256),    # Resize images to 256x256
    batch_size=32,
    class_mode='categorical'    # Use 'categorical' for multi-class classification
)

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=10,  # Adjust epochs based on performance
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
)