# Inspect Bankin .dat Files

This notebook shows how to decode and preview Bankin `.dat` files (CP862-encoded).

In [4]:
from pathlib import Path

path = Path("../data/raw/Bankin fuller.dat")
path.exists()

True

In [5]:
# Print first 5 decoded lines
lines = path.read_bytes().splitlines()
for line in lines[:5]:
    print(line.decode("cp862", errors="replace"))

0191345,220126,"                                            BIT-ב 0849-ב םייתסמה סיטרכב 13:45  19/01/26 ךיראתמ טיבד סיטרכ",-000000100.00,+000018409.68,0003,67833011333622
0192004,220126,"                         עבש ראב ישמ ןרוא תיפאמ-ב 0740-ב םייתסמה סיטרכב 20:04  19/01/26 ךיראתמ טיבד סיטרכ",-000000052.60,+000018357.08,0003,67833011333622
0201733,220126,"                          עבש ראב יר'זמורפ רשאב-ב 0849-ב םייתסמה סיטרכב 17:33  20/01/26 ךיראתמ טיבד סיטרכ",-000000215.40,+000018141.68,0003,67833011333622
0211411,230126,"                                     השבלה זכרמ-ב 0740-ב םייתסמה סיטרכב 14:11  21/01/26 ךיראתמ טיבד סיטרכ",-000000099.80,+000018041.88,0003,67833011333622
0211755,230126,"                            םיאישנה YELLOW / זפ-ב 0849-ב םייתסמה סיטרכב 17:55  21/01/26 ךיראתמ טיבד סיטרכ",-000000241.30,+000017800.58,0003,67833011333622


In [6]:
# Extract the 7th column (account number) from a few rows
for line in lines[:10]:
    parts = line.decode("cp862", errors="replace").split(",")
    if len(parts) >= 7:
        print(parts[6].strip())

67833011333622
67833011333622
67833011333622
67833011333622
67833011333622
67833011333622
67833011333622
67833011333622
67833011333622
67833011333622


In [7]:
# Collect unique account numbers
accounts = set()
for line in lines:
    parts = line.decode("cp862", errors="replace").split(",")
    if len(parts) >= 7:
        acc = parts[6].strip()
        if acc:
            accounts.add(acc)

sorted(accounts, key=str.casefold)

['0005', '67833011333622']