# Start – Bank Marketing Analyse (Stdlib)

Dieses Notebook lädt `data/DatenBank.csv` (Semikolon-getrennt) und erzeugt Start-Auswertungen – ohne externe Libraries (nur Python-Standardbibliothek).

In VS Code: oben rechts den Kernel aus `.venv` auswählen.


In [None]:
from __future__ import annotations

import csv
import statistics
from collections import defaultdict
from pathlib import Path

DATA_PATH = Path('..') / 'data' / 'DatenBank.csv'
print('CSV exists:', DATA_PATH.exists())
print('CSV path  :', DATA_PATH.resolve())


In [None]:
NUM_FIELDS = {'id', 'age', 'balance', 'day', 'duration', 'campaign', 'pdays', 'previous'}

def load_rows(path: Path) -> list[dict[str, object]]:
    rows: list[dict[str, object]] = []
    with path.open('r', encoding='utf-8', newline='') as f:
        reader = csv.DictReader(f, delimiter=';', quotechar='"')
        for raw in reader:
            row: dict[str, object] = {}
            for key, value in raw.items():
                if key in NUM_FIELDS:
                    row[key] = int(value)
                else:
                    row[key] = value
            rows.append(row)
    return rows

rows = load_rows(DATA_PATH)
print('rows:', len(rows))
rows[0]


In [None]:
# 1) Erfolgsquote der Kampagne

total = len(rows)
yes = sum(1 for r in rows if r['complete'] == 'yes')
rate = (yes / total) * 100 if total else 0
print(f'Anzahl Kunden: {total}')
print(f'Abschlüsse (yes): {yes}')
print(f'Erfolgsquote: {rate:.1f} %')


In [None]:
# 2) Gesprächsdauer-Statistiken (duration)

durations = [r['duration'] for r in rows]
print('duration min     :', min(durations))
print('duration max     :', max(durations))
print('duration mean    :', round(statistics.mean(durations), 2))
print('duration variance:', round(statistics.pvariance(durations), 2))


In [None]:
def group_summary(group_field: str):
    groups: dict[str, list[dict[str, object]]] = defaultdict(list)
    for r in rows:
        groups[str(r[group_field])].append(r)

    out = []
    for key, items in sorted(groups.items(), key=lambda kv: kv[0]):
        n = len(items)
        avg_age = statistics.mean([i['age'] for i in items])
        avg_balance = statistics.mean([i['balance'] for i in items])
        yes = sum(1 for i in items if i['complete'] == 'yes')
        rate = (yes / n) * 100 if n else 0
        out.append((key, n, avg_age, avg_balance, rate))
    return out

def print_table(title: str, items):
    print(title)
    print('Kategorie | n | avg_age | avg_balance | success_rate')
    for key, n, avg_age, avg_balance, rate in items:
        print(f'{key:>10} | {n:>4} | {avg_age:>7.2f} | {avg_balance:>11.2f} | {rate:>11.2f}%')

print_table('Education', group_summary('education'))
print()
print_table('Marital', group_summary('marital'))


In [None]:
# 3) Beispiel-Filter: balance > 1000 und kein loan

filtered = [r for r in rows if (r['balance'] > 1000 and r['loan'] == 'no')]
print('Treffer:', len(filtered))
print('Beispiel:', filtered[0] if filtered else None)
