# Appendix: Consolidated Workspace Assets
This notebook scans all notebooks in the workspace and extracts key imports, helper functions, data loading, model code, and visualization snippets into a single reference artifact.

In [None]:
# Notebook-Dateien Finden
from pathlib import Path
root = Path('..').resolve()
notebooks = list(root.rglob('*.ipynb'))
print('Such-Root:', root)
print(f'Gefundene Notebooks: {len(notebooks)}')
for p in notebooks[:10]:
    print(' -', p)

In [None]:
# Notebook-JSON Laden
import json
loaded = []
for p in notebooks:
    try:
        with open(p, 'r', encoding='utf-8') as f:
            loaded.append((p, json.load(f)))
    except json.JSONDecodeError:
        print(f'Übersprungen (defekt): {p}')
print(f'Geladene Notebooks: {len(loaded)}')

In [None]:
# Code- und Markdown-Zellen Extrahieren
code_cells = []
md_cells = []
for path, nb in loaded:
    for cell in nb.get('cells', []):
        if cell.get('cell_type') == 'code':
            code_cells.append(cell)
        elif cell.get('cell_type') == 'markdown':
            md_cells.append(cell)
print(len(code_cells), 'Codezellen', '|', len(md_cells), 'Markdownzellen')

In [None]:
# Importe Vereinheitlichen
import re
import_lines = []
for c in code_cells:
    for line in c.get('source', []):
        if re.match(r'^\s*(import |from )', line):
            import_lines.append(line.rstrip())
unique_imports = sorted(dict.fromkeys(import_lines))
merged_import_cell = {'cell_type': 'code', 'metadata': {}, 'source': [l + '\n' for l in unique_imports]}
print('Importe:', len(unique_imports))

In [None]:
# Hilfsfunktionen Zusammenführen
func_defs = []
seen = set()
for c in code_cells:
    src = ''.join(c.get('source', []))
    if src.strip().startswith('def '):
        name = src.strip().split('def ')[1].split('(')[0]
        if name not in seen:
            seen.add(name)
            func_defs.append(c)
print('Funktionen:', len(func_defs))

In [None]:
# Datenlade- und Preprocessing-Schritte Sammeln
load_cells = []
keywords = ['read_csv', 'load', 'DataLoader', 'pd.read', 'np.load']
for c in code_cells:
    text = ''.join(c.get('source', []))
    if any(k in text for k in keywords):
        load_cells.append(c)
print('Daten-/Preprocessing-Zellen:', len(load_cells))

In [None]:
# Modell-/Algorithmusdefinitionen Konsolidieren
model_cells = []
model_markers = ['class ', 'fit(', 'predict(', 'pipeline']
for c in code_cells:
    text = ''.join(c.get('source', []))
    if any(m in text for m in model_markers):
        model_cells.append(c)
print('Modellzellen:', len(model_cells))

In [None]:
# Visualisierungen Zusammenstellen
viz_cells = []
viz_keys = ['plt.', 'seaborn', 'sns.', 'plot(', 'hist(', 'bar(', 'figure(']
for c in code_cells:
    txt = ''.join(c.get('source', []))
    if any(v in txt for v in viz_keys):
        viz_cells.append(c)
print('Visualisierungen:', len(viz_cells))

In [None]:
# Zellen Reihenfolge Heuristik Anwenden
ordered_cells = []
# Markdown Intro
ordered_cells.append({'cell_type': 'markdown', 'metadata': {}, 'source': ['# Consolidated Workspace Notebook\n']})
# Imports
ordered_cells.append(merged_import_cell)
# Funktionen
ordered_cells.extend(func_defs)
# Daten Laden
ordered_cells.extend(load_cells)
# Modelle
ordered_cells.extend(model_cells)
# Visualisierung
ordered_cells.extend(viz_cells)
print('Reihenfolge erstellt:', len(ordered_cells), 'Zellen')

In [None]:
# Kombiniertes Notebook Erzeugen
combined = {
    'cells': ordered_cells,
    'metadata': {'language_info': {'name': 'python'}},
    'nbformat': 4,
    'nbformat_minor': 5
}
import json
with open('combined_workspace.ipynb', 'w', encoding='utf-8') as f:
    json.dump(combined, f, ensure_ascii=False, indent=2)
print('combined_workspace.ipynb geschrieben')

In [None]:
# Smoke-Tests Ausführen
# Optional: einfache Auswertung ob kritische Symbole definiert sind
expected_funcs = []
try:
    expected_funcs = [f.strip().split('def ')[1].split('(')[0] for f in [''.join(c['source']) for c in func_defs]]
except Exception:
    pass
print('Erwartete Funktionen:', expected_funcs)

In [None]:
# Ausgabe Validieren und Speichern
from pathlib import Path
assert Path('combined_workspace.ipynb').exists(), 'Datei fehlt'
print('Validierung OK')