In [12]:
import os
import json
import csv

# Function to extract the required features from a single JSON file
def extract_features_from_file(json_file, label):
    with open(json_file, 'r') as file:
        data = json.load(file)

    # Initialize features
    features = {
        "No_of_VirusTotal_Positives": 0,
        "Total_Signature_Severity": 0,
        "No_of_Triggered_Signatures": 0,
        "Asks_For_Dangerous_Permissions": 0,
        "Identified_By_More_than_10_AV": 0,
        "Hidden_Payload_Found": 0,
        "No_of_dangerous_permissions": 0,
        "No_of_hidden_payloads": 0,
        "No_of_flagged_files": 0,
        "label": label
    }

    # 1. No_of_VirusTotal_Positives
    features["No_of_VirusTotal_Positives"] = data.get("virustotal", {}).get("positives", 0)

    # 2. Total_Signature_Severity
    features["Total_Signature_Severity"] = sum(sig.get("severity", 0) for sig in data.get("signatures", []))

    # 3. No_of_Triggered_Signatures
    features["No_of_Triggered_Signatures"] = len(data.get("signatures", []))

    # 4. Asks_For_Dangerous_Permissions
    if any("Application Asks For Dangerous Permissions (Static)" in sig.get("description", "") for sig in data.get("signatures", [])):
        features["Asks_For_Dangerous_Permissions"] = 1

    # 5. Identified_By_More_than_10_AV
    if any("File has been identified by more the 10 AntiVirus on VirusTotal as malicious (Osint)" in sig.get("description", "") for sig in data.get("signatures", [])):
        features["Identified_By_More_than_10_AV"] = 1

    # 6. Hidden_Payload_Found
    if any("Hidden Payload Found (Static)" in sig.get("description", "") for sig in data.get("signatures", [])):
        features["Hidden_Payload_Found"] = 1

    # 7. No_of_dangerous_permissions
    dangerous_permissions = 0
    permissions = data.get("apkinfo", {}).get("manifest", {}).get("permissions", [])
    for perm in permissions:
        if perm.get("severity") == "dangerous":
            dangerous_permissions += 1
    features["No_of_dangerous_permissions"] = dangerous_permissions

    # 8. No_of_hidden_payloads
    features["No_of_hidden_payloads"] = len(data.get("apkinfo", {}).get("hidden_payload", []))

    # 9. No_of_flagged_files
    flagged_files = data.get("apkinfo", {}).get("files_flaged", {})
    features["No_of_flagged_files"] = sum(len(flagged_files[key]) for key in flagged_files)

    return features

# Function to process all JSON files and save to CSV
def process_json_files(base_dir, csv_file):
    rows = []

    # Process benign files
    benign_dir = os.path.join(base_dir, "benign")
    for folder in os.listdir(benign_dir):
        report_path = os.path.join(benign_dir, folder, "reports", "report.json")
        if os.path.exists(report_path):
            row = extract_features_from_file(report_path, label=0)
            rows.append(row)

    # Process malicious files
    malicious_dir = os.path.join(base_dir, "malicious")
    for folder in os.listdir(malicious_dir):
        report_path = os.path.join(malicious_dir, folder, "reports", "report.json")
        if os.path.exists(report_path):
            row = extract_features_from_file(report_path, label=1)
            # Exclude rows where only Total_Signature_Severity, No_of_Triggered_Signatures, and label > 0
            if not (row["Total_Signature_Severity"] > 0 and row["No_of_Triggered_Signatures"] > 0 and \
                    row["No_of_VirusTotal_Positives"] == 0 and row["Asks_For_Dangerous_Permissions"] == 0 and \
                    row["Identified_By_More_than_10_AV"] == 0 and row["Hidden_Payload_Found"] == 0 and \
                    row["No_of_dangerous_permissions"] == 0 and row["No_of_hidden_payloads"] == 0 and \
                    row["No_of_flagged_files"] == 0):
                rows.append(row)

    # Write all rows to CSV
    if rows:
        with open(csv_file, 'w', newline='') as csvfile:
            fieldnames = rows[0].keys()
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(rows)

# File paths
base_dir = r"D:\\Data Ransomware\\analysis"  # Replace with your base directory path
csv_file = r"C:\\Users\\satya\\Desktop\\ransom\\cnn\\total3.csv"  # Replace with your desired CSV file path

# Process files and save to CSV
process_json_files(base_dir, csv_file)

print(f"Features extracted and saved to {csv_file}.")

Features extracted and saved to C:\\Users\\satya\\Desktop\\ransom\\cnn\\total3.csv.
