In [3]:
import os
import json
import numpy as np
from PIL import Image

# Function to extract features from JSON
def extract_features(json_file):
    with open(json_file, 'r') as file:
        data = json.load(file)

    features = [
        data.get("virustotal", {}).get("positives", 0),  # No_of_VirusTotal_Positives
        sum(sig.get("severity", 0) for sig in data.get("signatures", [])),  # Total_Signature_Severity
        len(data.get("signatures", [])),  # No_of_Triggered_Signatures
        1 if any("Application Asks For Dangerous Permissions (Static)" in sig.get("description", "") for sig in data.get("signatures", [])) else 0,
        1 if any("File has been identified by more the 10 AntiVirus on VirusTotal as malicious (Osint)" in sig.get("description", "") for sig in data.get("signatures", [])) else 0,
        1 if any("Hidden Payload Found (Static)" in sig.get("description", "") for sig in data.get("signatures", [])) else 0,
        len([perm for perm in data.get("apkinfo", {}).get("manifest", {}).get("permissions", []) if perm.get("severity") == "dangerous"]),  # No_of_dangerous_permissions
        len(data.get("apkinfo", {}).get("hidden_payload", [])),  # No_of_hidden_payloads
        sum(len(data.get("apkinfo", {}).get("files_flaged", {}).get(key, [])) for key in data.get("apkinfo", {}).get("files_flaged", {}))  # No_of_flagged_files
    ]
    return features

# Convert features to 369x369 RGB image
def features_to_rgb_image(features, output_path):
    # Normalize features to 0-255 for RGB range
    features = np.array(features)
    normalized_features = (features - features.min()) / (features.max() - features.min()) * 255 if features.max() > features.min() else features * 0
    normalized_features = normalized_features.astype(np.uint8)

    # Create a 3x3 grid for each channel based on Recommended Assignments
    red_channel = normalized_features[[0, 1, 2]].reshape((1, 3))  # R: Most critical features
    green_channel = normalized_features[[6, 7, 8]].reshape((1, 3))  # G: Moderately important features
    blue_channel = normalized_features[[3, 4, 5]].reshape((1, 3))  # B: Binary/contextual features

    # Combine channels into an RGB image
    small_image = np.zeros((3, 3, 3), dtype=np.uint8)
    small_image[:, :, 0] = np.tile(red_channel, (3, 1))  # Red channel
    small_image[:, :, 1] = np.tile(green_channel, (3, 1))  # Green channel
    small_image[:, :, 2] = np.tile(blue_channel, (3, 1))  # Blue channel

    # Resize to 369x369 pixels using high-quality upscaling
    img = Image.fromarray(small_image, 'RGB')
    img = img.resize((369, 369), Image.LANCZOS)

    # Save the image
    img.save(output_path)

# Directory paths
base_dir = r"D:\\Data Ransomware\\analysis"
output_dir = r"C:\\Users\\satya\\Desktop\\ransom\\cnn2\\MORE\\imageLast2"

# Create output directories
benign_output_dir = os.path.join(output_dir, "benign")
malicious_output_dir = os.path.join(output_dir, "malicious")
os.makedirs(benign_output_dir, exist_ok=True)
os.makedirs(malicious_output_dir, exist_ok=True)

# Process files and generate RGB images
for label, folder_name, output_subdir in [(0, "benign", benign_output_dir), (1, "malicious", malicious_output_dir)]:
    folder_path = os.path.join(base_dir, folder_name)
    for subfolder in os.listdir(folder_path):
        report_path = os.path.join(folder_path, subfolder, "reports", "report.json")
        if os.path.exists(report_path):
            features = extract_features(report_path)

            # Skip malicious rows with only Total_Signature_Severity, No_of_Triggered_Signatures, and label > 0
            if label == 1 and (features[1] > 0 and features[2] > 0 and all(f == 0 for f in features[:1] + features[3:])):
                continue

            # Generate 369x369 RGB image
            image_path = os.path.join(output_subdir, f"{subfolder}.png")
            features_to_rgb_image(features, image_path)

print("RGB image generation completed!")


RGB image generation completed!
