# Profile

# Account

In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
import json
import os
import pathlib
import re
from os import PathLike
from pathlib import Path

from jb_onboarding.constants import DOCS
from jb_onboarding.preprocessing import Preprocessor

DATA_PATH = Path(os.path.join(".", "data")).absolute()


def get_dataset(data_path: PathLike) -> list[tuple[int, pathlib.Path]]:
    """
    Get the dataset of client data.

    Scans `data_path` recursively for .zip files named in the format
    'client_<number>.zip'. Extracts the <number> and returns a list of
    tuples (client_number, file_path).

    Args:
        data_path (PathLike): Path to the dataset.

    Returns:
        List[Tuple[int, pathlib.Path]]: List of tuples where the first
        element is the client number (int) and the second is the
        zip file path (pathlib.Path).
    """
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"Dataset path {data_path} does not exist.")

    paths = []
    # Regex to match files named like "client_1234.zip" and capture the integer part
    pattern = re.compile(r"^client_(\d+)\.zip$")

    for root_dir, _, files in os.walk(data_path):
        for file in files:
            if file.endswith(".zip"):
                # Attempt to extract the client number from the filename
                match = pattern.match(file)
                if match:
                    client_number = int(match.group(1))
                    file_path = pathlib.Path(root_dir) / file
                    paths.append((client_number, file_path))
                # If filenames might vary, you could handle that here,
                # e.g. continue, log a warning, etc.

    return sorted(paths)


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
dataset = get_dataset(DATA_PATH)
print(f"Dataset collected: {len(dataset)} files.")

prep = Preprocessor()


Dataset collected: 2964 files.


Sliding Window Attention is enabled but not implemented for `eager`; unexpected results may be encountered.


FlashAttention2 is not installed.


In [163]:
print(dataset[1523][1])
client_data = prep(dataset[1523][1])

/Users/shyngys/personal/data-workspace/data/client_1501_1700/client_1524.zip


Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.


In [132]:
client_data

{'profile': {'Default': {},
  'Client Information': {'Last Name': 'Reiter',
   'First/ Middle Name (s)': 'Marcel Paul',
   'Address': 'Enzersdorfer Straße 28, 4503 Bruck an der Mur',
   'Country of Domicile': 'Austria',
   'Date of birth': '1986-05-15',
   'Nationality': 'Austrian',
   'Passport No/ Unique ID': 'YK6562036',
   'ID Type': 'passport',
   'ID Issue Date': '2017-07-01',
   'ID Expiry Date': '2027-06-30',
   'Gender': {'options': ['Female', 'Male'], 'selected': ['Male']}},
  'Account Holder – Contact Management and Services – Contact Info': {'Communication Medium': 'Telephone +43 695 185 2842',
   None: 'E-Mail marcel.reiter@hotmail.com'},
  'Account Holder – Personal Info': {'Is the client or associated person a Politically Exposed Person as defined in the Client Acceptance Policy?': [{'options': ['No',
      'Yes'],
     'selected': ['No']},
    'If ‘Yes’, please complete the Approval Form (PEP) and ensure that appropriate approvals are obtained'],
   'Marital Status': [{

In [133]:
from copy import deepcopy
import json

filtered_data = deepcopy(client_data)
filtered_data["account"]["signature"] = ""
filtered_data["passport"]["signature"] = ""
with open("filtered_data.json", "w") as f:
    json.dump(filtered_data, f, indent=4)

In [138]:
from jb_onboarding.checks.check_account_form import account_form_is_consistent
from jb_onboarding.checks.check_age_consistency import age_is_consistent
from jb_onboarding.checks.check_education_background import education_is_consistent
from jb_onboarding.checks.check_family_background_consistency import family_background_is_consistent

# from jb_onboarding.checks.check_occupation_history import
from jb_onboarding.checks.check_passport import passport_is_consistent
from jb_onboarding.checks.check_profile import profile_is_consistent
from jb_onboarding.checks.check_wealth_summary import wealth_is_consistent
from jb_onboarding.checks.cross_check_account_form_client_profile import account_form_and_client_profile_are_consistent
from jb_onboarding.checks.cross_check_account_form_passport import account_form_and_passport_are_consistent
from jb_onboarding.checks.cross_check_passport_client_profile_form import client_profile_and_passport_are_consistent


In [23]:
account_form_is_consistent(filtered_data)

Full name: Andrea Concha Fernández
First and middle names: Andrea Concha
Last name: Fernández
Expected full name: Andrea Concha Fernández


True

{'account_name': 'Andrea Concha Fernández',
 'account_holder_name': 'Andrea Concha',
 'account_holder_surname': 'Fernández',
 'passport_number': 'JZ4727152',
 'chf': '/Off',
 'eur': '/Yes',
 'usd': '/Off',
 'other_ccy': '',
 'building_number': '86',
 'postal_code': '28606',
 'city': 'Porvoo',
 'country': 'Finland',
 'name': 'Andrea Concha Fernández',
 'phone_number': '+358 042 161 84 72',
 'email': 'andrea.fernandez@finet.fi',
 'street_name': 'Telakkakatu',
 'signature': ''}

In [25]:
age_is_consistent(filtered_data)

True

In [None]:
profile["Client Information"]


"The RM first met Andrea Concha Fernández at a luxury car show in Helsinki. They both admired the latest models and discussed their interest in high-end investments, leading to a productive business relationship.\nAndrea Concha Fernández is a 68 year old retired Fund Manager from Finland. She has a diverse background, having worked in various capacities and gained extensive knowledge in the field.\nJulius Baer's commitment to client satisfaction and innovative financial strategies convinced her to join."

In [38]:
filtered_data["profile"]

[autoreload of jb_onboarding.preprocessing failed: Traceback (most recent call last):
  File "/Users/shyngys/personal/data-workspace/.venv/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "/Users/shyngys/personal/data-workspace/.venv/lib/python3.10/site-packages/IPython/extensions/autoreload.py", line 475, in superreload
    module = reload(module)
  File "/opt/homebrew/Cellar/python@3.10/3.10.15/Frameworks/Python.framework/Versions/3.10/lib/python3.10/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 619, in _exec
  File "<frozen importlib._bootstrap_external>", line 879, in exec_module
  File "<frozen importlib._bootstrap_external>", line 1017, in get_code
  File "<frozen importlib._bootstrap_external>", line 947, in source_to_code
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/Users/sh

{'Default': {},
 'Client Information': {'Last Name': 'Fernández',
  'First/ Middle Name (s)': 'Andrea Concha',
  'Address': 'Telakkakatu 86, 28606 Porvoo',
  'Country of Domicile': 'Finland',
  'Date of birth': '1956-07-18',
  'Nationality': 'Spanish',
  'Passport No/ Unique ID': 'JZ4727152',
  'ID Type': 'passport',
  'ID Issue Date': '2021-02-04',
  'ID Expiry Date': '2031-02-03',
  'Gender': {'options': ['Female', 'Male'], 'selected': ['Female']}},
 'Account Holder – Contact Management and Services – Contact Info': {'Communication Medium': 'Telephone +358 042 161 84 72',
  None: 'E-Mail andrea.fernandez@finet.fi'},
 'Account Holder – Personal Info': {'Is the client or associated person a Politically Exposed Person as defined in the Client Acceptance Policy?': [{'options': ['No',
     'Yes'],
    'selected': ['No']},
   'If ‘Yes’, please complete the Approval Form (PEP) and ensure that appropriate approvals are obtained'],
  'Marital Status': [{'options': ['Divorced', 'Married', 'Sin

In [40]:
filtered_data["account"]

{'account_name': 'Andrea Concha Fernández',
 'account_holder_name': 'Andrea Concha',
 'account_holder_surname': 'Fernández',
 'passport_number': 'JZ4727152',
 'chf': '/Off',
 'eur': '/Yes',
 'usd': '/Off',
 'other_ccy': '',
 'building_number': '86',
 'postal_code': '28606',
 'city': 'Porvoo',
 'country': 'Finland',
 'name': 'Andrea Concha Fernández',
 'phone_number': '+358 042 161 84 72',
 'email': 'andrea.fernandez@finet.fi',
 'street_name': 'Telakkakatu',
 'signature': ''}

In [None]:
filtered_data["profile"] = transform_profile()

{'name': 'Marcel Paul Reiter',
 'address': {'city': 'Bruck an der Mur',
  'street name': 'Enzersdorfer Straße',
  'street number': '28',
  'postal code': '4503'},
 'country_of_domicile': 'Austria',
 'birth_date': '1986-05-15',
 'nationality': 'Austrian',
 'passport_number': 'YK6562036',
 'passport_issue_date': '2017-07-01',
 'passport_expiry_date': '2027-06-30',
 'gender': 'M',
 'phone_number': '+43 695 185 2842',
 'email_address': 'marcel.reiter@hotmail.com',
 'marital_status': 'Married',
 'secondary_school': {'name': 'Graz University of Technology',
  'graduation_year': 2011},
 'higher_education': [],
 'employment_background': {'status': '',
  'since': None,
  'previous_profession': ''},
 'aum': {'savings': 1858000.0},
 'inheritance_details': {},
 'real_estate_details': [],
 'investment_risk_profile': 'Low',
 'investment_horizon': 'Short',
 'investment_experience': 'Experienced',
 'type_of_mandate': 'Advisory',
 'preferred_markets': ['Austria', 'Portugal'],
 'currency': '',
 'wealth'

In [102]:
filtered_data["profile"]["Account Holder – Professional and Economic Background – Business"]


KeyError: 'Account Holder – Professional and Economic Background – Business'

In [106]:
filtered_data["profile"]

{'name': 'Marcel Paul Reiter',
 'address': {'city': 'Bruck an der Mur',
  'street name': 'Enzersdorfer Straße',
  'street number': '28',
  'postal code': '4503'},
 'country_of_domicile': 'Austria',
 'birth_date': '1986-05-15',
 'nationality': 'Austrian',
 'passport_number': 'YK6562036',
 'passport_issue_date': '2017-07-01',
 'passport_expiry_date': '2027-06-30',
 'gender': 'M',
 'phone_number': '+43 695 185 2842',
 'email_address': 'marcel.reiter@hotmail.com',
 'marital_status': 'Married',
 'secondary_school': {'name': 'Graz University of Technology',
  'graduation_year': 2011},
 'higher_education': [],
 'employment_background': {'status': '',
  'since': None,
  'previous_profession': ''},
 'aum': {'savings': 1858000.0},
 'inheritance_details': {},
 'real_estate_details': [],
 'investment_risk_profile': 'Low',
 'investment_horizon': 'Short',
 'investment_experience': 'Experienced',
 'type_of_mandate': 'Advisory',
 'preferred_markets': ['Austria', 'Portugal'],
 'currency': '',
 'wealth'

In [None]:
client_data["description"]["Occupation History"]


'Marcel Paul Reiter is a 38 year old Reservoir Engineer from Austria. He has a diverse background, having worked in various capacities and gained extensive knowledge in the field.\nAt RAG Rohol-Aufsuchungs AG, Marcel Paul Reiter held the position of Drilling Engineer from 2013 to 2015, developing essential skills in the field.\nAt RAG Rohol-Aufsuchungs AG, he held the position of Reservoir Engineer from 2015 till now, making significant contributions to the organization.\nThe last position held by he came with a remuneration of 54000 EUR p.A., showcasing the value he brought to the organization.'

In [116]:
 client_data["profile"]["Account Holder – Professional and Economic Background – Business"]

{'Current employment and function': [{'options': ['Employee Since 2015'],
   'selected': ['Employee Since 2015']},
  'Name Employer RAG Rohol-Aufsuchungs AG',
  ['Position Reservoir Engineer (54000 EUR p.A.)',
   'including annual income and namecard'],
  {'options': ['Self-Employed Since'], 'selected': []},
  'Company Name',
  '% of ownership'],
 None: [{'options': ['Currently not employed Since'], 'selected': []},
  'Previous Profession:',
  {'options': ['Retired Since'], 'selected': []},
  'Previous Profession:',
  {'options': ['Homemaker/housewife Since'], 'selected': []},
  'Previous Profession:',
  {'options': ['Student Since Country of study:'], 'selected': []},
  'Previous Profession:',
  {'options': ['Diplomat Since Home country of diplomatic mission:'],
   'selected': []},
  'Previous Profession:',
  {'options': ['Military representative Since Home country of military establishment:'],
   'selected': []},
  'Previous Profession:',
  {'options': ['Other Since'], 'selected': []

In [None]:
extract_employment_history_manual(
    client_data["profile"]["Account Holder – Professional and Economic Background – Business"]
)

[{'options': ['Employee Since 2015'], 'selected': ['Employee Since 2015']}, 'Name Employer RAG Rohol-Aufsuchungs AG', ['Position Reservoir Engineer (54000 EUR p.A.)', 'including annual income and namecard'], {'options': ['Self-Employed Since'], 'selected': []}, 'Company Name', '% of ownership'] {'options': ['Employee Since 2015'], 'selected': ['Employee Since 2015']}
[{'options': ['Employee Since 2015'], 'selected': ['Employee Since 2015']}, 'Name Employer RAG Rohol-Aufsuchungs AG', ['Position Reservoir Engineer (54000 EUR p.A.)', 'including annual income and namecard'], {'options': ['Self-Employed Since'], 'selected': []}, 'Company Name', '% of ownership'] Name Employer RAG Rohol-Aufsuchungs AG
[{'options': ['Employee Since 2015'], 'selected': ['Employee Since 2015']}, 'Name Employer RAG Rohol-Aufsuchungs AG', ['Position Reservoir Engineer (54000 EUR p.A.)', 'including annual income and namecard'], {'options': ['Self-Employed Since'], 'selected': []}, 'Company Name', '% of ownership']

{'status': '', 'since': None, 'previous_profession': ''}

In [None]:
client_data["passport"]

{'Surname': 'REITER',
 'Code': 'AUT',
 'Passport Number': 'YK6562036',
 'Given Name': 'MARCEL PAUL',
 'Birth Date': '15-May-1986',
 'Citizenship': 'Austrian/OSTERREICH',
 'Sex': 'M',
 'Issue Date': '01-Jul-2017',
 'Expiry Date': '30-Jun-2027',
 'MRZ Line 1': 'P<ATREITER<MARCEL PAUL',
 'MRZ Line 2': 'YK6562036AUT680515',
 'Signature': '+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo4dDQPDc3FxUV4dDQgHd3EA8PenFx49PTPDc3EhAQxLa2+ujo+ujo+ujo+ujo+ujo+ujo+ujo+uj

In [126]:
from jb_onboarding.preprocessing import transform_profile, extract_employment_history_manual

filtered_data = transform_profile(client_data["profile"])

In [None]:
# Process names
passport_surname = "REITER"
passport_given = "MARCEL PAUL"

# MRZ processing
mrz_parts = re.split(r"[<>]+", "ATREITER<MARCEL PAUL")  # After removing P<AT
# Result: ['ATREITER', 'MARCEL PAUL']

mrz_surname = mrz_parts[0]  # "ATREITER" (should be "REITER")
mrz_given = " ".join(mrz_parts[1:])  # "MARCEL PAUL"

print(f"Surname mismatch: {passport_surname} vs {mrz_surname}")
# Output: Surname mismatch: REITER vs ATREITER

Surname mismatch: REITER vs P


In [None]:
filtered_data["passport"]

{'Surname': 'REITER',
 'Code': 'AUT',
 'Passport Number': 'YK6562036',
 'Given Name': 'MARCEL PAUL',
 'Birth Date': '15-May-1986',
 'Citizenship': 'Austrian/OSTERREICH',
 'Sex': 'M',
 'Issue Date': '01-Jul-2017',
 'Expiry Date': '30-Jun-2027',
 'MRZ Line 1': 'P<ATREITER<MARCEL PAUL',
 'MRZ Line 2': 'YK6562036AUT680515',
 'Signature': '+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo4dDQPDc3FxUV4dDQgHd3EA8PenFx49PTPDc3EhAQxLa2+ujo+ujo+ujo+ujo+ujo+ujo+ujo+uj

In [190]:
client_data["passport"]["MRZ Line 1"] = "P<AUTREITER<MARCEL PAUL"
client_data["passport"]["MRZ Line 2"] = "YK6562036AUT860515"


In [189]:
filtered_data["passport"]

{'Surname': 'REITER',
 'Code': 'AUT',
 'Passport Number': 'YK6562036',
 'Given Name': 'MARCEL PAUL',
 'Birth Date': '15-May-1986',
 'Citizenship': 'Austrian/OSTERREICH',
 'Sex': 'M',
 'Issue Date': '01-Jul-2017',
 'Expiry Date': '30-Jun-2027',
 'MRZ Line 1': 'P<AUTEITER<MARCEL PAUL',
 'MRZ Line 2': 'YK6562036AUT680515',
 'Signature': '+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo4dDQPDc3FxUV4dDQgHd3EA8PenFx49PTPDc3EhAQxLa2+ujo+ujo+ujo+ujo+ujo+ujo+ujo+uj

In [None]:
"YK6562036AUT860515"
"YK6562036AUT680515"

'YK6562036AUT860515'

In [192]:
from jb_onboarding.checks.check_passport_consistency import passport_is_consistent

passport_is_consistent(client_data)

True

In [None]:
filtered_data["passport"]

{'Surname': 'REITER',
 'Code': 'AUT',
 'Passport Number': 'YK6562036',
 'Given Name': 'MARCEL PAUL',
 'Birth Date': '15-May-1986',
 'Citizenship': 'Austrian/OSTERREICH',
 'Sex': 'M',
 'Issue Date': '01-Jul-2017',
 'Expiry Date': '30-Jun-2027',
 'MRZ Line 1': 'P<ATREITER<MARCEL PAUL',
 'MRZ Line 2': 'YK6562036AUT680515',
 'Signature': '+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo4dDQPDc3FxUV4dDQgHd3EA8PenFx49PTPDc3EhAQxLa2+ujo+ujo+ujo+ujo+ujo+ujo+ujo+uj

In [None]:
filtered_data["passport"]

{'Surname': 'REITER',
 'Code': 'AUT',
 'Passport Number': 'YK6562036',
 'Given Name': 'MARCEL PAUL',
 'Birth Date': '15-May-1986',
 'Citizenship': 'Austrian/OSTERREICH',
 'Sex': 'M',
 'Issue Date': '01-Jul-2017',
 'Expiry Date': '30-Jun-2027',
 'MRZ Line 1': 'P<ATREITER<MARCEL PAUL',
 'MRZ Line 2': 'YK6562036AUT680515',
 'Signature': '+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo+ujo4dDQPDc3FxUV4dDQgHd3EA8PenFx49PTPDc3EhAQxLa2+ujo+ujo+ujo+ujo+ujo+ujo+ujo+uj

In [None]:
from jb_onboarding.preprocessing import transform_profile, extract_employment_history_manual

filtered_data["profile"] = transform_profile(client_data["profile"])

In [None]:
from jb_onboarding.checks.check_occupation_history import employment_is_consistent

employment_is_consistent(filtered_data)

True

In [120]:
extract_employment_history_manual(
    client_data["profile"]["Account Holder – Professional and Economic Background – Business"]
)

[{'status': 'employee',
  'since': 2015,
  'previous_profession': '',
  'company': 'rag rohol-aufsuchungs ag'},
 {'status': 'self-employed',
  'since': None,
  'previous_profession': '',
  'company': ''},
 {'status': 'currently not employed',
  'since': None,
  'previous_profession': '',
  'company': ''},
 {'status': 'retired',
  'since': None,
  'previous_profession': '',
  'company': ''},
 {'status': 'homemaker/housewife',
  'since': None,
  'previous_profession': '',
  'company': ''},
 {'status': 'student',
  'since': None,
  'previous_profession': '',
  'company': ''},
 {'status': 'diplomat',
  'since': None,
  'previous_profession': '',
  'company': ''},
 {'status': 'military representative',
  'since': None,
  'previous_profession': '',
  'company': ''},
 {'status': 'other', 'since': None, 'previous_profession': '', 'company': ''}]

In [None]:
filtered_data["profile"]["employment_background"]

{'status': 'retired', 'since': 2017, 'previous_profession': 'fund manager'}

In [219]:
filtered_data

{'profile': {'name': 'Marcel Paul Reiter',
  'address': {'city': 'Bruck an der Mur',
   'street name': 'Enzersdorfer Straße',
   'street number': '28',
   'postal code': '4503'},
  'country_of_domicile': 'Austria',
  'birth_date': '1986-05-15',
  'nationality': 'Austrian',
  'passport_number': 'YK6562036',
  'passport_issue_date': '2017-07-01',
  'passport_expiry_date': '2027-06-30',
  'gender': 'M',
  'phone_number': '+43 695 185 2842',
  'email_address': 'marcel.reiter@hotmail.com',
  'marital_status': 'Married',
  'secondary_school': {'name': 'Graz University of Technology',
   'graduation_year': 2011},
  'higher_education': [],
  'employment_background': {'status': 'employee',
   'since': 2015,
   'previous_profession': '',
   'company': 'rag rohol-aufsuchungs ag'},
  'aum': {'savings': 1858000.0},
  'inheritance_details': {},
  'real_estate_details': [],
  'investment_risk_profile': 'Low',
  'investment_horizon': 'Short',
  'investment_experience': 'Experienced',
  'type_of_mandat

In [222]:
client_data["account"]

from jb_onboarding.checks.cross_check_account_form_client_profile import account_form_and_client_profile_are_consistent

print(account_form_and_client_profile_are_consistent(filtered_data))

from jb_onboarding.checks.cross_check_account_form_passport import account_form_and_passport_are_consistent

print(account_form_and_passport_are_consistent(filtered_data))
from jb_onboarding.checks.cross_check_passport_client_profile_form import client_profile_and_passport_are_consistent

client_profile_and_passport_are_consistent(filtered_data)

True
True


True

In [None]:
transform_profile(client_data["profile"])

{'name': 'Marcel Paul Reiter',
 'address': {'city': 'Bruck an der Mur',
  'street name': 'Enzersdorfer Straße',
  'street number': '28',
  'postal code': '4503'},
 'country_of_domicile': 'Austria',
 'birth_date': '1986-05-15',
 'nationality': 'Austrian',
 'passport_number': 'YK6562036',
 'passport_issue_date': '2017-07-01',
 'passport_expiry_date': '2027-06-30',
 'gender': 'M',
 'phone_number': '+43 695 185 2842',
 'email_address': 'marcel.reiter@hotmail.com',
 'marital_status': 'Married',
 'higher_education': {'name': 'Graz University of Technology',
  'graduation_year': 2011,
  'type': 'tertiary'},
 'employment_background': {'status': 'employee',
  'since': 2015,
  'previous_profession': '',
  'company': 'rag rohol-aufsuchungs ag'},
 'aum': {'savings': 1858000.0},
 'inheritance_details': {},
 'real_estate_details': [],
 'investment_risk_profile': 'Low',
 'investment_horizon': 'Short',
 'investment_experience': 'Experienced',
 'type_of_mandate': 'Advisory',
 'preferred_markets': ['Aus

In [45]:
transform_data(filtered_data["profile"])["Account Holder – Contact Management and Services – Contact Info"]

{'Communication Medium': 'Telephone +358 042 161 84 72',
 None: 'E-Mail andrea.fernandez@finet.fi'}

In [None]:
# Example dictionary
contact_info = {"Communication Medium": "Telephone +358 042 161 84 72", None: "E-Mail andrea.fernandez@finet.fi"}

# Extract the new fields
new_contact_fields = extract_contact_methods(
    transform_data(filtered_data["profile"])["Account Holder – Contact Management and Services – Contact Info"]
)
print(new_contact_fields)


{'Telephone': '+358 042 161 84 72', 'E-mail': 'andrea.fernandez@finet.fi'}


In [None]:
filtered_data["profile"]

{'Default': {},
 'Client Information': {'Last Name': 'Fernández',
  'First/ Middle Name (s)': 'Andrea Concha',
  'Address': 'Telakkakatu 86, 28606 Porvoo',
  'Country of Domicile': 'Finland',
  'Date of birth': '1956-07-18',
  'Nationality': 'Spanish',
  'Passport No/ Unique ID': 'JZ4727152',
  'ID Type': 'passport',
  'ID Issue Date': '2021-02-04',
  'ID Expiry Date': '2031-02-03',
  'Gender': {'options': ['Female', 'Male'], 'selected': ['Female']}},
 'Account Holder – Contact Management and Services – Contact Info': {'Communication Medium': 'Telephone +358 042 161 84 72',
  None: 'E-Mail andrea.fernandez@finet.fi'},
 'Account Holder – Personal Info': {'Is the client or associated person a Politically Exposed Person as defined in the Client Acceptance Policy?': [{'options': ['No',
     'Yes'],
    'selected': ['No']},
   'If ‘Yes’, please complete the Approval Form (PEP) and ensure that appropriate approvals are obtained'],
  'Marital Status': [{'options': ['Divorced', 'Married', 'Sin

In [69]:
age_is_consistent(filtered_data)


Declared age: 68
Calculated age: 68


True

In [None]:
from jb_onboarding.checks.check_passport_consistency import passport_is_consistent

my_data = 
passport_is_consistent(my_data)

{'Country': 'Czech Republic', 'Surname': 'BENES', 'Code': 'CZE', 'Passport Number': 'HV8184805', 'Given Name': 'JOSEF ROMAN', 'Birth Date': '06-Aug-1957', 'Citizenship': 'Czech/CZEŠÁ REPUBLIKA', 'Sex': 'M', 'Issue Date': '20-May-2022', 'Expiry Date': '19-May-2032', 'MRZ Line 1': 'P<CZEBENES<<JOSEF-ROMAN<<<<<<<<<<<<<<HV8184805CZE570806', 'MRZ Line 2': 'HV8184805CZE570806', 'Signature': '+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr6+vr

False

In [None]:
mrz_together = my_data["passport"]["MRZ Line 1"].upper() + my_data["passport"]["MRZ Line 2"].upper()
mrz_together = re.sub(r"[<>\s]", "", mrz_together)

In [None]:
mrz_together

f"P{country_code}{last_name}{given_name}{birth_date.strftime('%y%m%d')}"
# Construct expected MRZ
def construct_mrz_line1():
    # Format: P<COUNTRYCODE<LASTNAME<<GIVENNAME
    given_cleaned = re.sub(r'[^A-Z]', '', given_name)
    last_cleaned = re.sub(r'[^A-Z]', '', last_name)
    return f"P<{country_code}{last_cleaned}<<{given_cleaned}".ljust(44, '<')[:44]

def construct_mrz_line2():
    # Format: PASSPORTNUMBERCOUNTRYCODEYYMMDD...
    yy_mm_dd = birth_date.strftime('%y%m%d')
    checksum = str((int(passport_number[0:7]) % 10)  # Simple checksum example
    return f"{passport_number}{country_code}{yy_mm_dd}M{expiry_date.strftime('%y%m%d')}{checksum}".ljust(44, '<')[:44]

'PCZEBENESJOSEF-ROMANHV8184805CZE570806HV8184805CZE570806'

In [242]:
my_data["description"]["Education Background"]
my_data["description"].get("Education Background", "").strip()

profile = my_data["profile"]

In [244]:
profile["higher_education"]["graduation_year"]

2018

In [234]:
education_is_consistent(filtered_data)

TypeError: list indices must be integers or slices, not str

In [None]:
education_is_consistent(filtered_data)

In [None]:
filtered_data["profile"]

{'name': 'Andrea Concha Fernández',
 'address': {'city': 'Porvoo',
  'street name': 'Telakkakatu',
  'street number': '86',
  'postal code': '28606'},
 'country_of_domicile': 'Finland',
 'birth_date': '1956-07-18',
 'nationality': 'Spanish',
 'passport_number': 'JZ4727152',
 'passport_issue_date': '2021-02-04',
 'passport_expiry_date': '2031-02-03',
 'gender': 'F',
 'phone_number': '+358 042 161 84 72',
 'email_address': 'andrea.fernandez@finet.fi',
 'marital_status': 'Married',
 'secondary_school': {'name': 'Universidad Comercial de Deusto',
  'graduation_year': 1980},
 'higher_education': [],
 'employment_background': {'status': 'retired',
  'since': 2017,
  'previous_profession': 'fund manager'},
 'aum': {'savings': 4141000.0},
 'inheritance_details': {},
 'real_estate_details': [],
 'investment_risk_profile': 'Moderate',
 'investment_horizon': 'Medium',
 'investment_experience': 'Expert',
 'type_of_mandate': 'Advisory',
 'preferred_markets': ['Spain', 'Finland'],
 'currency': '',
 

In [None]:
filtered_data["description"]["Education Background"]

'Andrea graduated from Instituto de Bachillerato Artístico IES Reina Sofia Tenerife in 1976.\nIn 1980, Andrea graduated from Universidad Comercial de Deusto with a degree.'

In [None]:
family_background_is_consistent(description=filtered_data["description"], profile=filtered_data["profile"])

True

In [None]:
filtered_data["description"]["Family Background"]

'Andrea Concha Fernández and Javier have been married since 1988. They are proud parents of 1 child: Andrea.'

In [None]:
filtered_data["profile"]["marital_status"]

'Married'

In [299]:
from jb_onboarding.checks.cross_check_account_form_passport import account_form_and_passport_are_consistent


my_data = {
    "profile": {
        "name": "Lilli Hoffmann",
        "address": {"city": "Leipzig", "street name": "Schlachte", "street number": "89", "postal code": "28810"},
        "country_of_domicile": "Germany",
        "birth_date": "1969-01-14",
        "nationality": "German",
        "passport_number": "MY2227572",
        "passport_issue_date": "2022-08-02",
        "passport_expiry_date": "2032-08-01",
        "gender": "F",
        "phone_number": "+49 0737 768453",
        "email_address": "lilli.hoffmann@gmx.de",
        "marital_status": "Widowed",
        "higher_education": {
            "name": "Berlin University of Applied Sciences",
            "graduation_year": 1992,
            "type": "tertiary",
        },
        "employment_background": {
            "status": "employee",
            "since": 2009,
            "previous_profession": "",
            "company": "bayer ag",
        },
        "aum": {"savings": 6990000.0},
        "inheritance_details": {},
        "real_estate_details": [],
        "investment_risk_profile": "Moderate",
        "investment_horizon": "Short",
        "investment_experience": "Expert",
        "type_of_mandate": "Advisory",
        "preferred_markets": ["Austria", "Germany"],
        "currency": "",
        "wealth": "EUR 5m-10m",
        "assets": ["Real Estate EUR 5720000", "Business EUR 1270000"],
        "origins of wealth": ["employment"],
    },
    "passport": {
        "Country": "Germany",
        "Surname": "HOFFMANN",
        "Code": "DEU",
        "Passport Number": "MY2227572",
        "Given Name": "LILI",
        "Birth Date": "14-Jan-1969",
        "Citizenship": "German/DEUTSCH",
        "Sex": "F",
        "Issue Date": "02-Aug-2022",
        "Expiry Date": "01-Aug-2032",
        "MRZ Line 1": "P<DEUOFFHANN<<LILI<<",
        "MRZ Line 2": "MY2227572DEU609114",
    },
    "description": {
        "Summary Note": "Lilli Hoffmann and the RM met at a networking event in Berlin. They quickly realized they had complementary skills and decided to collaborate on several projects.\nWith a career spanning nearly 30 years, Lilli Hoffmann has established herself as a key player in the industry.\nLooking for a private bank with a deep understanding of wealth management, she joined Julius Baer for its expertise.",
        "Family Background": "Lilli Hoffmann is currently widowed. She has 3 kids called Fabienne, Katharina and Diana.",
        "Education Background": "Lilli completed her secondary education at Friedrich-Wilhelms-Gymnasium Potsdam in 1987.\nLilli pursued higher education at Berlin University of Applied Sciences, graduating in 1992.",
        "Occupation History": "Lilli Hoffmann is a seasoned professional with over 30 years of experience.\nLilli Hoffmann's first role was as a Research Scientist at Qiagen N.V., where she worked diligently from 1995 to 1999 to establish a strong reputation.\nAfter gaining experience, she took on the role of Biotech Startup Founder at Merck KGaA in 2000, further expanding her skill set.\nHer tenure as a CEO at Qiagen N.V. from 2002 to 2004 was marked by significant achievements and a deepening understanding of the field.\nHer appointment as a Venture Capitalist at Qiagen N.V. in 2004 marked a significant milestone in her career, as she took on increasingly complex challenges and delivered exceptional results.\nHer current role as a Industry Leader at Bayer AG, which she has held since 2009, reflects her enduring passion for the field and her unwavering dedication to making a lasting impact.\nIn her most recent role, she was compensated with a salary of 642000 EUR p.A., reflecting her expertise and contributions.",
        "Wealth Summary": "While working, she saved 1270000 EUR, which she used to build a diversified investment portfolio.\nShe owns 3 exceptional properties: townhouse located in Leipzig, valued at approximately 1,520,000.00 EUR, townhouse located in Cologne, valued at approximately 1,390,000.00 EUR, villa located in Gelsenkirchen, valued at approximately 2,810,000.00 EUR. This impressive collection showcases her flair for luxury living and investment prowess, with a notable presence in Cologne, Gelsenkirchen and Leipzig.\nShe does not have any inheritances in cash.",
        "Client Summary": "In conclusion, we are confident in the client's financial stability and future prospects.",
    },
    "account": {
        "account_name": "Lilli Hoffmann",
        "account_holder_name": "Lilli",
        "account_holder_surname": "Hoffmann",
        "passport_number": "MY2227572",
        "chf": "/Off",
        "eur": "/Yes",
        "usd": "/Off",
        "other_ccy": "",
        "building_number": "89",
        "postal_code": "28810",
        "city": "Leipzig",
        "country": "Germany",
        "name": "Lilli Hoffmann",
        "phone_number": "+49 0737 768453",
        "email": "lilli.hoffmann@gmx.de",
        "street_name": "Schlachte",
    },
}


account_form_and_passport_are_consistent(my_data)

Given name from passport: lili
Last name from passport: hoffmann
Account first name: lilli, Account last name: hoffmann


False

In [None]:
my_data["passport"]

{'Country': 'Italy',
 'Surname': 'Landi',
 'Code': 'ITA',
 'Passport Number': 'XX1827037',
 'Given Name': 'Giorgia',
 'Birth Date': '21-Aug-1981',
 'Citizenship': 'Italian/Italiana',
 'Sex': 'F',
 'Issue Date': '04-Nov-2021',
 'Expiry Date': '03-Nov-2031',
 'MRZ Line 1': 'P<ITALANDI<GIORGIA<SOFIA',
 'MRZ Line 2': 'XX1827037ITA810821',
 'Signature': '9P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/09P/0

In [286]:
employment_is_consistent(my_data)


False

In [266]:
profile = my_data.get("profile", {})
description = my_data["description"]
employment = profile["employment_background"]
occupation_text = description.get("Occupation History", "")
sentences = [s.strip() for s in occupation_text.split("\n") if s.strip()]

In [265]:
my_data.get("description", {}).get("Occupation History", "")

'With a career spanning nearly 22 years, Lucas Samuel Marques has established himself as a key player in the industry.\nAt Transinsular, Lucas Samuel Marques held the position of Ship Broker from 2003 to 2006, developing essential skills in the field.\nAfter gaining experience, he took on the role of Shipping Company Owner at Grupo Sousa in 2007, further expanding his skill set.\nIn 2010, he joined Grupo ETE as a Fleet Manager, taking on new challenges and responsibilities that helped him grow both personally and professionally.\nHis next role was as a Maritime Industry Leader at Transinsular, where he applied his extensive experience to drive innovation from 2015 to 2016.\nHe has been a Logistics Conglomerate Owner at Grupo ETE since 2017, consistently demonstrating his ability to adapt, innovate, and thrive in an ever-changing environment.\nIn his most recent role, he was compensated with a salary of 238000 EUR p.A., reflecting his expertise and contributions.'

In [None]:
print(filtered_data["profile"]["secondary_school"])

{'name': 'Graz University of Technology', 'graduation_year': 2011}


In [None]:
filtered_data["profile"]

{'name': 'Marcel Paul Reiter',
 'address': {'city': 'Bruck an der Mur',
  'street name': 'Enzersdorfer Straße',
  'street number': '28',
  'postal code': '4503'},
 'country_of_domicile': 'Austria',
 'birth_date': '1986-05-15',
 'nationality': 'Austrian',
 'passport_number': 'YK6562036',
 'passport_issue_date': '2017-07-01',
 'passport_expiry_date': '2027-06-30',
 'gender': 'M',
 'phone_number': '+43 695 185 2842',
 'email_address': 'marcel.reiter@hotmail.com',
 'marital_status': 'Married',
 'secondary_school': {'name': 'Graz University of Technology',
  'graduation_year': 2011},
 'higher_education': [],
 'employment_background': {'status': 'employee',
  'since': 2015,
  'previous_profession': '',
  'company': 'rag rohol-aufsuchungs ag'},
 'aum': {'savings': 1858000.0},
 'inheritance_details': {},
 'real_estate_details': [],
 'investment_risk_profile': 'Low',
 'investment_horizon': 'Short',
 'investment_experience': 'Experienced',
 'type_of_mandate': 'Advisory',
 'preferred_markets': ['