In [50]:
import pandas as pd
import json
from urllib.parse import urlparse, parse_qs


import hashlib
import base64
import urllib.parse
import zlib
import gzip
import html

from Crypto.Hash import MD4, RIPEMD160  # pycryptodome
import mmh3  # MurmurHash3
import base58  # Base58 encoding
from lzstring import LZString  # LZString compression

In [48]:
import json
import hashlib
import base64
import urllib.parse

def generate_variants(data):
    variants = set()

    # Raw data
    variants.add(data)

    # Hashes
    variants.add(hashlib.new("md4", data.encode()).hexdigest())
    variants.add(hashlib.md5(data.encode()).hexdigest())
    variants.add(hashlib.sha1(data.encode()).hexdigest())
    variants.add(hashlib.sha256(data.encode()).hexdigest())
    variants.add(hashlib.sha224(data.encode()).hexdigest())
    variants.add(hashlib.sha384(data.encode()).hexdigest())
    variants.add(hashlib.sha512(data.encode()).hexdigest())
    variants.add(hashlib.sha3_224(data.encode()).hexdigest())
    variants.add(hashlib.sha3_256(data.encode()).hexdigest())
    variants.add(hashlib.sha3_384(data.encode()).hexdigest())
    variants.add(hashlib.sha3_512(data.encode()).hexdigest())
    variants.add(hashlib.new("whirlpool", data.encode()).hexdigest())
    variants.add(mmh3.hash(data.encode()))
    variants.add(mmh3.hash128(data.encode()))
    variants.add(RIPEMD160.new(data.encode()).hexdigest())

    #encodings
    variants.add(base64.b16encode(data.encode()).decode())
    variants.add(base64.b32encode(data.encode()).decode())
    variants.add(base64.b64encode(data.encode()).decode())
    variants.add(urllib.parse.quote(data))

    # compressiosn
    variants.add(zlib.compress(data.encode()).hex())
    variants.add(gzip.compress(data.encode()).hex())
    variants.add(LZString().compress(data))
    variants.add(zlib.compress(data.encode()).hex())

    return {str(variant) for variant in variants}


In [57]:
file_path = '/Users/pouneh/Downloads/latimes.har'
with open(file_path, 'r') as file:
    har_data = json.load(file)

input_data = {
    "first_name": "pouneh",
    "last_name": "bahrami",
    "email": "pouneh.nb@gmail.com",
    "card_number": "4645992371716565",
    "zip_code": "96787"
}

all_variants = set()
for key, value in input_data.items():
    all_variants.update(generate_variants(value))

exfiltrated_data = []
for entry in har_data.get("log", {}).get("entries", []):
    # Check in request URL
    url = entry.get("request", {}).get("url", "")
    for variant in all_variants:
        if variant in url:
            exfiltrated_data.append({"location": "URL", "data": variant, "url": url})

    # Check query parameters in the URL
    parsed_url = urlparse(url)
    query_params = parse_qs(parsed_url.query)
    for param, values in query_params.items():
        for value in values:
            for variant in all_variants:
                if variant in value:
                    exfiltrated_data.append({
                        "location": "Query Parameter",
                        "data": variant,
                        "url": url,
                        "parameter": param
                    })

    # Check in request headers
    for header in entry.get("request", {}).get("headers", []):
        value = header.get("value", "")
        for variant in all_variants:
            if variant in value:
                exfiltrated_data.append({"location": "Header", "data": variant, "url": url})

    # Check in request payloads
    post_data = entry.get("request", {}).get("postData", {}).get("text", "")
    for variant in all_variants:
        if variant in post_data:
            exfiltrated_data.append({"location": "Payload", "data": variant, "url": url})

    # Check in cookies
    cookies = entry.get("request", {}).get("cookies", [])
    for cookie in cookies:
        cookie_value = cookie.get("value", "")
        for variant in all_variants:
            if variant in cookie_value:
                exfiltrated_data.append({"location": "Cookie", "data": variant, "url": url})


import pandas as pd
exfiltrated_df = pd.DataFrame(exfiltrated_data)

exfiltrated_df

Unnamed: 0,location,data,url,parameter
0,URL,96787,https://billing.platform.latimes.com/v1/lookup...,
1,Query Parameter,96787,https://billing.platform.latimes.com/v1/lookup...,zipCode
2,Header,96787,https://billing.platform.latimes.com/v1/lookup...,
3,Payload,4645992371716565,https://api.recurly.com/js/v1/token,
4,Payload,pouneh,https://api.recurly.com/js/v1/token,
5,Payload,bahrami,https://api.recurly.com/js/v1/token,
6,Payload,96787,https://api.recurly.com/js/v1/token,
7,Payload,pouneh,https://billing.platform.latimes.com/v1/purchase,
8,Payload,bahrami,https://billing.platform.latimes.com/v1/purchase,


In [33]:
def compute_hashes(text):
    hashes = {}
    hashes["MD4"] = hashlib.new("md4", text.encode()).hexdigest()
    hashes["MD5"] = hashlib.md5(text.encode()).hexdigest()
    hashes["SHA1"] = hashlib.sha1(text.encode()).hexdigest()
    hashes["SHA256"] = hashlib.sha256(text.encode()).hexdigest()
    hashes["SHA224"] = hashlib.sha224(text.encode()).hexdigest()
    hashes["SHA384"] = hashlib.sha384(text.encode()).hexdigest()
    hashes["SHA512"] = hashlib.sha512(text.encode()).hexdigest()
    hashes["SHA3-224"] = hashlib.sha3_224(text.encode()).hexdigest()
    hashes["SHA3-256"] = hashlib.sha3_256(text.encode()).hexdigest()
    hashes["SHA3-384"] = hashlib.sha3_384(text.encode()).hexdigest()
    hashes["SHA3-512"] = hashlib.sha3_512(text.encode()).hexdigest()
    hashes["whirlpool"] = hashlib.new("whirlpool", text.encode()).hexdigest()
    hashes["mmh3-32"] = mmh3.hash(text.encode())
    hashes["mmh3-128"] = mmh3.hash128(text.encode())
    hashes["RIPEMD160"] = RIPEMD160.new(text.encode()).hexdigest()

    return hashes

def compute_encodings(text):
    encodings = {}
    # Base encodings
    encodings["Base16"] = base64.b16encode(text.encode()).decode()
    encodings["Base32"] = base64.b32encode(text.encode()).decode()
    encodings["Base58"] = None  # Base58 is not natively supported
    encodings["Base64"] = base64.b64encode(text.encode()).decode()
    encodings["URL Encoding"] = urllib.parse.quote(text)
    encodings["Entity Encoding"] = "".join(f"&#{ord(c)};" for c in text)
    return encodings

def compute_compression(text):
    compressed = {}
    compressed["Deflate"] = zlib.compress(text.encode()).hex()
    compressed["Gzip"] = gzip.compress(text.encode()).hex()
    compressed["LZString"] = LZString().compress(text)
    compressed["Zlib"] = zlib.compress(text.encode())
    return compressed

input_text = "96787"

hash_results = compute_hashes(input_text)
encoding_results = compute_encodings(input_text)
compression_results = compute_compression(input_text)

all_results = {
    "Hashes": hash_results,
    "Encodings": encoding_results,
    "Compression": compression_results,
}

result_df = pd.DataFrame(all_results)
result_df

Unnamed: 0,Hashes,Encodings,Compression
MD4,140a50634f729fbb1f3279bf39960247,,
MD5,84b4cb43620cbbb6feb8547cc0efd282,,
SHA1,0533e897ad731f8aeea7d09b35e221eeec50528d,,
SHA256,1dd36e41092ecd212b6be289c07fd42b26a235ae7911cd...,,
SHA224,b8728e3cd9a25241da64f3807658e00ef86b7b9ee16236...,,
SHA384,12c7adfee46e14cb3048d17820d1f039af74ddba64a351...,,
SHA512,9bbd4229ab44ed6c7afa86ce46cd72975196993a530ba1...,,
SHA3-224,3118f1a9be1220fb1687e25174b0949fda3bb09f825136...,,
SHA3-256,681345dfcc7b9c863d451416fee26cd213d3f93e5fec6c...,,
SHA3-384,cd62374ace51fd0aa9d7791386da8fb1ad44ac211ad620...,,
