# PKL File Reader

This notebook reads and displays the contents of a pickle (.pkl) file with detailed information.

In [2]:
import pickle
import json
import os
from pprint import pprint
from typing import Any

## Specify the PKL file path

In [None]:
# Set your pkl file path here
pkl_file_path = "/home/ANONYMOUS/projects/FALCON/data/generation/snort/snort3-community-rules_v1.pkl"

# Verify file exists
if os.path.exists(pkl_file_path):
    print(f"✓ File found: {pkl_file_path}")
    print(f"  File size: {os.path.getsize(pkl_file_path)} bytes")
else:
    print(f"✗ File not found: {pkl_file_path}")

## Load and inspect the PKL file

In [4]:
# Load the pickle file
try:
    with open(pkl_file_path, 'rb') as f:
        data = pickle.load(f)
    print("✓ Successfully loaded pickle file")
except Exception as e:
    print(f"✗ Error loading file: {e}")
    data = None

✓ Successfully loaded pickle file


## Data Type and Structure

In [5]:
if data is not None:
    print(f"Data type: {type(data)}")
    print(f"Data type name: {type(data).__name__}")
    
    # Check if it's a collection
    if hasattr(data, '__len__'):
        print(f"Length/Size: {len(data)}")
    
    # For dictionaries
    if isinstance(data, dict):
        print(f"\nDictionary keys ({len(data)} total):")
        for key in list(data.keys())[:10]:  # Show first 10 keys
            print(f"  - {key}")
        if len(data) > 10:
            print(f"  ... and {len(data) - 10} more keys")
    
    # For lists/tuples
    elif isinstance(data, (list, tuple)):
        print(f"\nCollection type: {type(data).__name__}")
        if len(data) > 0:
            print(f"First element type: {type(data[0])}")
    
    # For other objects
    elif hasattr(data, '__dict__'):
        print(f"\nObject attributes:")
        for attr in list(vars(data).keys())[:10]:
            print(f"  - {attr}")

Data type: <class 'dict'>
Data type name: dict
Length/Size: 4017

Dictionary keys (4017 total):
  - alert tcp $HOME_NET 2589 -> $EXTERNAL_NET any ( msg:"MALWARE-BACKDOOR - Dagger_1.4.0"; flow:to_client,established; content:"2|00 00 00 06 00 00 00|Drives|24 00|",depth 16; metadata:ruleset community; classtype:misc-activity; sid:105; rev:14; )
  - alert tcp $EXTERNAL_NET any -> $HOME_NET 7597 ( msg:"MALWARE-BACKDOOR QAZ Worm Client Login access"; flow:to_server,established; content:"qazwsx.hsq"; metadata:ruleset community; classtype:misc-activity; sid:108; rev:12; )
  - alert tcp $EXTERNAL_NET any -> $HOME_NET 12345:12346 ( msg:"MALWARE-BACKDOOR netbus getinfo"; flow:to_server,established; content:"GetInfo|0D|"; metadata:ruleset community; classtype:trojan-activity; sid:110; rev:10; )
  - alert tcp $HOME_NET 20034 -> $EXTERNAL_NET any ( msg:"MALWARE-BACKDOOR NetBus Pro 2.0 connection established"; flow:to_client,established; flowbits:isset,backdoor.netbus_2.connect; content:"BN|10 00 02 

## Display Contents

In [None]:
if data is not None:
    print("=" * 80)
    print("FULL CONTENTS:")
    print("=" * 80)
    pprint(data, depth=3, width=100)

## Sample Data (for large datasets)

In [7]:
if data is not None:
    print("Sample of data (first few items):\n")
    
    if isinstance(data, dict):
        # Show first 3 key-value pairs
        for i, (key, value) in enumerate(list(data.items())[:3]):
            print(f"Key: {key}")
            print(f"Value type: {type(value)}")
            print(f"Value: {value}")
            print("-" * 60)
    
    elif isinstance(data, (list, tuple)):
        # Show first 3 items
        for i, item in enumerate(data[:3]):
            print(f"Item {i}:")
            print(f"  Type: {type(item)}")
            pprint(item, depth=2, width=80)
            print("-" * 60)
    
    else:
        # For other types, just show the data
        pprint(data, depth=2, width=80)

Sample of data (first few items):

Key: alert tcp $HOME_NET 2589 -> $EXTERNAL_NET any ( msg:"MALWARE-BACKDOOR - Dagger_1.4.0"; flow:to_client,established; content:"2|00 00 00 06 00 00 00|Drives|24 00|",depth 16; metadata:ruleset community; classtype:misc-activity; sid:105; rev:14; )
Value type: <class 'list'>
Value: ['alert tcp $HOME_NET any -> $EXTERNAL_NET any (msg:"UNAUTHORIZED remote access"; flow:to_client,established; content:"Drives"; classtype:trojan-activity;)', 'alert tcp $HOME_NET 2500:2600 -> $EXTERNAL_NET any (msg:"SUSPICIOUS activity on uncommon source ports"; flow:to_client,established; content:"00 00 00"; byte_test:1,=,2,0; classtype:bad-unknown;)', 'alert tcp $HOME_NET any -> $EXTERNAL_NET any (msg:"MALWARE communication attempt detected"; flow:to_client,established; byte_test:1,=,2,0; classtype:misc-activity;)']
------------------------------------------------------------
Key: alert tcp $EXTERNAL_NET any -> $HOME_NET 7597 ( msg:"MALWARE-BACKDOOR QAZ Worm Client Login 

## Export to JSON (if possible)

In [8]:
if data is not None:
    try:
        # Attempt to convert to JSON
        json_str = json.dumps(data, indent=2, default=str)
        print("✓ Data is JSON-serializable")
        print("\nJSON Preview (first 1000 characters):")
        print(json_str[:1000])
        if len(json_str) > 1000:
            print(f"\n... ({len(json_str) - 1000} more characters)")
        
        # Optionally save to file
        # json_output_path = pkl_file_path.replace('.pkl', '.json')
        # with open(json_output_path, 'w') as f:
        #     f.write(json_str)
        # print(f"\n✓ Saved to: {json_output_path}")
        
    except (TypeError, ValueError) as e:
        print(f"✗ Data contains non-JSON-serializable objects: {e}")
        print("  (This is common for custom objects, numpy arrays, etc.)")

✓ Data is JSON-serializable

JSON Preview (first 1000 characters):
{
  "alert tcp $HOME_NET 2589 -> $EXTERNAL_NET any ( msg:\"MALWARE-BACKDOOR - Dagger_1.4.0\"; flow:to_client,established; content:\"2|00 00 00 06 00 00 00|Drives|24 00|\",depth 16; metadata:ruleset community; classtype:misc-activity; sid:105; rev:14; )": [
    "alert tcp $HOME_NET any -> $EXTERNAL_NET any (msg:\"UNAUTHORIZED remote access\"; flow:to_client,established; content:\"Drives\"; classtype:trojan-activity;)",
    "alert tcp $HOME_NET 2500:2600 -> $EXTERNAL_NET any (msg:\"SUSPICIOUS activity on uncommon source ports\"; flow:to_client,established; content:\"00 00 00\"; byte_test:1,=,2,0; classtype:bad-unknown;)",
    "alert tcp $HOME_NET any -> $EXTERNAL_NET any (msg:\"MALWARE communication attempt detected\"; flow:to_client,established; byte_test:1,=,2,0; classtype:misc-activity;)"
  ],
  "alert tcp $EXTERNAL_NET any -> $HOME_NET 7597 ( msg:\"MALWARE-BACKDOOR QAZ Worm Client Login access\"; flow:to_server,establ