In [None]:
\
# Facebook Recruiting III - Keyword Extraction (Pristine Setup)
# Single-cell, idempotent setup: imports, constants, helpers, env hygiene, and full-data caching.

import os, re, gc, time, shutil
from datetime import datetime
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

# --- Environment hygiene: suppress matplotlib_inline post-run callback noise ---
from IPython import get_ipython
print('[Env] Activating matplotlib_inline suppression...')
try:
    import matplotlib as _mpl
    if not hasattr(_mpl, 'backend_bases'):
        _mpl.backend_bases = type('backend_bases', (), {'_Backend': type('_Backend', (), {})})
    try:
        import matplotlib.pyplot as plt
        try:
            plt.switch_backend('Agg')
        except Exception:
            pass
    except Exception:
        pass
    ip = get_ipython()
    if ip is not None and hasattr(ip, 'events'):
        cbs = ip.events.callbacks.get('post_run_cell', [])
        new_cbs = []
        for cb in cbs:
            name = getattr(cb, '__name__', '')
            mod = getattr(cb, '__module__', '')
            qual = getattr(cb, '__qualname__', '')
            if ('matplotlib_inline' in mod) or ('backend_inline' in mod) or (name == 'configure_once') or ('configure_once' in qual):
                continue
            new_cbs.append(cb)
        ip.events.callbacks['post_run_cell'] = new_cbs
    print('[Env] matplotlib_inline suppression active.')
except Exception as e:
    print('[Env] matplotlib_inline suppression failed:', repr(e))

GLOBAL_SEED = 1337
np.random.seed(GLOBAL_SEED)

def backup_notebook():
    nb_path = 'agent_notebook_pristine.ipynb'
    if os.path.exists(nb_path):
        ts = datetime.now().strftime('%Y%m%d_%H%M%S')
        dst = f'agent_notebook_pristine_backup_{ts}.ipynb'
        shutil.copy2(nb_path, dst)
        print(f'[Backup] Pristine notebook copied to {dst}')
backup_notebook()

# Normalization and parsing (approved) - RAW regex strings to preserve \b, \d etc.
URL_RE   = re.compile(r'https?://\S+|www\.\S+', flags=re.IGNORECASE)
EMAIL_RE = re.compile(r'(?i)\b[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}\b')
HEX_RE   = re.compile(r'\b0x[0-9a-f]+\b', flags=re.IGNORECASE)
NUM_RE   = re.compile(r'\b\d+\b')

def normalize_text(s: str) -> str:
    if not isinstance(s, str):
        return ''
    s = s.lower()
    s = URL_RE.sub(' URL ', s)
    s = EMAIL_RE.sub(' EMAIL ', s)
    s = HEX_RE.sub(' HEX ', s)
    s = NUM_RE.sub(' 0 ', s)
    s = re.sub(r'\s+', ' ', s).strip()
    return s

def extract_text_and_code_pre_only(html: str):
    if not isinstance(html, str):
        return '', '', 0, 0, 0.0
    soup = BeautifulSoup(html, 'lxml')
    pre_blocks = soup.find_all('pre')
    code_texts = []
    for pre in pre_blocks:
        code_texts.append(pre.get_text(' ', strip=True))
        pre.extract()
    text = soup.get_text(' ', strip=True)
    text_norm = normalize_text(text)
    # Use a simple space to join code blocks to avoid escape issues in code generation
    code_norm = normalize_text(' '.join(code_texts))
    url_count = len(URL_RE.findall(text.lower()))
    puncts = re.findall(r'[\!\?\.,;:\-\(\)\[\]\{\}\#\+\*/\\\|\<\>\=\_\~\^\`\"]', text_norm)
    punct_density = (len(puncts) / max(1, len(text_norm)))
    return text_norm, code_norm, len(pre_blocks), url_count, punct_density

def build_cache(csv_path: str, is_train: bool = True, chunksize: int = 100_000) -> pd.DataFrame:
    t0 = time.time()
    all_parts = []
    usecols = ['Id', 'Title', 'Body'] + (['Tags'] if is_train else [])
    reader = pd.read_csv(csv_path, usecols=usecols, chunksize=chunksize)
    total_rows = 0
    for i, chunk in enumerate(reader):
        if 'Id' in chunk.columns:
            chunk['Id'] = pd.to_numeric(chunk['Id'], downcast='integer')
        out_records = {
            'Id': [], 'title_norm': [], 'body_text': [], 'code_text': [],
            'title_len': [], 'body_len': [], 'code_len': [], 'num_block_code': [], 'num_urls': [], 'punct_density': []
        }
        if is_train:
            out_records['Tags'] = []
        titles = chunk['Title'].fillna('').astype(str).tolist()
        titles_norm = [normalize_text(t) for t in titles]
        bodies = chunk['Body'].fillna('').astype(str).tolist()
        for idx in range(len(chunk)):
            body_txt, code_txt, n_code, n_url, pden = extract_text_and_code_pre_only(bodies[idx])
            out_records['Id'].append(int(chunk.iloc[idx]['Id']))
            out_records['title_norm'].append(titles_norm[idx])
            out_records['body_text'].append(body_txt)
            out_records['code_text'].append(code_txt)
            out_records['title_len'].append(len(titles_norm[idx]))
            out_records['body_len'].append(len(body_txt))
            out_records['code_len'].append(len(code_txt))
            out_records['num_block_code'].append(int(n_code))
            out_records['num_urls'].append(int(n_url))
            out_records['punct_density'].append(float(pden))
            if is_train:
                out_records['Tags'].append(chunk.iloc[idx]['Tags'])
        out_df = pd.DataFrame(out_records)
        all_parts.append(out_df)
        total_rows += len(out_df)
        if (i + 1) % 5 == 0:
            print(f'[Cache] Processed ~{total_rows} rows so far for {os.path.basename(csv_path)}')
        del chunk, out_df, out_records, titles, titles_norm, bodies
        gc.collect()
    result = pd.concat(all_parts, ignore_index=True) if len(all_parts) else pd.DataFrame()
    print(f'[Cache] Built DataFrame with {len(result)} rows in {time.time()-t0:.1f}s from {os.path.basename(csv_path)}')
    return result

TRAIN_CSV = 'train.csv'
TEST_CSV  = 'test.csv'
PARSED_TRAIN_FULL_PKL = 'parsed_train_full.pkl'
PARSED_TEST_PKL  = 'parsed_test.pkl'

print('[Setup] Starting pristine setup...')
built_any = False

# Load-or-build TRAIN cache, and ensure df_train_full in memory
if os.path.exists(PARSED_TRAIN_FULL_PKL):
    df_train_full = pd.read_pickle(PARSED_TRAIN_FULL_PKL)
    print(f"[Cache] Loaded {PARSED_TRAIN_FULL_PKL} with shape {df_train_full.shape}")
else:
    print('[Cache] Building FULL train cache (no subsample)...')
    df_train_full = build_cache(TRAIN_CSV, is_train=True, chunksize=75_000)
    df_train_full.to_pickle(PARSED_TRAIN_FULL_PKL)
    print(f'[Cache] Wrote {PARSED_TRAIN_FULL_PKL} with {len(df_train_full)} rows')
    built_any = True

# Load-or-build TEST cache, and ensure df_test_cache in memory
if os.path.exists(PARSED_TEST_PKL):
    df_test_cache = pd.read_pickle(PARSED_TEST_PKL)
    print(f"[Cache] Loaded {PARSED_TEST_PKL} with shape {df_test_cache.shape}")
else:
    print('[Cache] Building test cache (pickle)...')
    df_test_cache = build_cache(TEST_CSV, is_train=False, chunksize=75_000)
    df_test_cache.to_pickle(PARSED_TEST_PKL)
    print(f'[Cache] Wrote {PARSED_TEST_PKL} with {len(df_test_cache)} rows')
    built_any = True

print('[Setup] Completed. Built any:', built_any)
print('[Setup] df_train_full shape:', df_train_full.shape if 'df_train_full' in globals() else None)
print('[Setup] df_test_cache shape:', df_test_cache.shape if 'df_test_cache' in globals() else None)


In [None]:
\
# Phase 2 (Refactored v3): Scalable 5-fold CV with streaming features + online OVR via SGD (partial_fit)
# - Avoids materializing full X_tr/X_va: transforms batches on the fly per channel and hstack per-batch only
# - Per-fold label pruning + dynamic label sharding (4GB coef_ budget)
# - Streaming threshold optimization with correct FN accounting for excluded labels
# - Hygiene: no nested parallelism, sparse-safe ops, per-fold vectorizer fit, artifact persistence, safety rule
# - Micro-pilot mode (conditional): deterministic subsample and run only the first fold to validate pipeline

import os, gc, time, math
import numpy as np
import pandas as pd
from scipy import sparse
from sklearn.feature_extraction.text import TfidfVectorizer, HashingVectorizer
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
from sklearn.linear_model import SGDClassifier

# Dependency guard: iterative-stratification
try:
    from skmultilearn.model_selection import IterativeStratification
except Exception:
    import sys
    from subprocess import run
    run([sys.executable, '-m', 'pip', 'install', '--quiet', 'scikit-multilearn'])
    from skmultilearn.model_selection import IterativeStratification

GLOBAL_SEED = 1337
np.random.seed(GLOBAL_SEED)

# Expect df_train_full and df_test_cache in memory from setup cell
assert 'df_train_full' in globals() and 'df_test_cache' in globals(), 'Run setup cell first to load caches.'

# Prepare texts and labels
def to_list_tags(s):
    return s.split() if isinstance(s, str) else []
y_lists = df_train_full['Tags'].astype(str).apply(to_list_tags)
mlb = MultiLabelBinarizer(sparse_output=True)
Y_all = mlb.fit_transform(y_lists)
labels_list = mlb.classes_.tolist()
n_samples, n_labels = Y_all.shape
print('[Labels] #samples:', n_samples, '#labels:', n_labels)

# Channels
title_text = df_train_full['title_norm'].fillna('').astype(str)
body_text  = df_train_full['body_text'].fillna('').astype(str)
code_text  = df_train_full['code_text'].fillna('').astype(str)
meta_cols = ['title_len','body_len','code_len','num_block_code','num_urls','punct_density']
meta_all = df_train_full[meta_cols].astype(np.float32).values

# Micro-pilot configuration (auditor-approved): subsample deterministically and run only first fold
MICRO_PILOT = True
PILOT_N = 50_000  # target rows for micro-pilot (reduced for faster turnaround)
if MICRO_PILOT:
    n_keep = int(min(PILOT_N, n_samples))
    rng = np.random.RandomState(GLOBAL_SEED)
    idx_keep = rng.choice(n_samples, size=n_keep, replace=False)
    idx_keep.sort()
    # Subset all channels and labels consistently
    title_text = title_text.iloc[idx_keep].reset_index(drop=True)
    body_text  = body_text.iloc[idx_keep].reset_index(drop=True)
    code_text  = code_text.iloc[idx_keep].reset_index(drop=True)
    meta_all   = meta_all[idx_keep]
    Y_all      = Y_all[idx_keep]
    n_samples  = Y_all.shape[0]
    print(f'[Pilot] Subsampled to {n_samples} rows for micro-pilot.')

# CV setup
n_splits = 5
mskf = IterativeStratification(n_splits=n_splits, order=1)

# Vectorizer configs (fit within fold on train split)
title_vec_cfg = dict(analyzer='word', ngram_range=(1,3), min_df=3, max_df=0.95,
                     max_features=200_000, sublinear_tf=True, dtype=np.float32)
# Use streaming hashing with built-in l2 normalization (no IDF) to allow partial/batch processing
body_hash_cfg  = dict(analyzer='word', ngram_range=(1,3), n_features=2**19,
                      alternate_sign=False, norm='l2', dtype=np.float32)
char_hash_cfg  = dict(analyzer='char_wb', ngram_range=(3,6), n_features=2**18,
                      alternate_sign=False, norm='l2', dtype=np.float32)
code_vec_cfg   = dict(analyzer='word', ngram_range=(1,3), min_df=2, max_df=0.999,
                      token_pattern=r'(?u)\b\w[\w_\+\-\#\.]*\b', max_features=100_000,
                      sublinear_tf=True, dtype=np.float32)
title_alpha = 3.0
MIN_LABEL_FREQ_TRAIN = 50  # higher for micro-pilot to reduce labels per shard and speed up
THS = np.linspace(0.05, 0.6, 12)
COEF_BUDGET_BYTES = 4_000_000_000  # 4GB
BATCH_SIZE = 8192  # larger batch for better throughput in micro-pilot

# Global label support for per-tag thresholds
global_support = np.asarray(Y_all.sum(axis=0)).ravel()
hi_support_mask = (global_support >= 50)
hi_label_idx = np.where(hi_support_mask)[0]
print('[Labels] High-support labels (>=50):', hi_label_idx.size)

# Streaming accumulators for global micro-F1
tp_tot = np.zeros_like(THS, dtype=np.int64)
fp_tot = np.zeros_like(THS, dtype=np.int64)
fn_tot = np.zeros_like(THS, dtype=np.int64)

# Per-tag (high-support only) streaming counts per threshold
tp_hi = np.zeros((hi_label_idx.size, THS.size), dtype=np.int64)
fp_hi = np.zeros((hi_label_idx.size, THS.size), dtype=np.int64)
fn_hi = np.zeros((hi_label_idx.size, THS.size), dtype=np.int64)
hi_pos = {lab: pos for pos, lab in enumerate(hi_label_idx)}

def update_counts_batch(Y_true_batch_csr, probs_batch, label_idx_batch, ths, tp, fp, fn,
                        tp_hi, fp_hi, fn_hi, hi_pos_map):
    bs, Lb = probs_batch.shape
    for ti, thr in enumerate(ths):
        pred_bin = (probs_batch >= thr).astype(np.uint8)
        pred_csr = sparse.csr_matrix(pred_bin, dtype=np.uint8)
        tp_mat = pred_csr.multiply(Y_true_batch_csr)
        tp_count = int(tp_mat.sum())
        pred_pos = int(pred_bin.sum())
        true_pos = int(Y_true_batch_csr.sum())
        tp[ti] += tp_count
        fp[ti] += (pred_pos - tp_count)
        fn[ti] += (true_pos - tp_count)
        if hi_pos_map:
            for j_local in range(Lb):
                g_lab = int(label_idx_batch[j_local])
                pos = hi_pos_map.get(g_lab, None)
                if pos is None:
                    continue
                col_true = Y_true_batch_csr[:, j_local]
                col_pred = pred_csr[:, j_local]
                tp_j = int(col_true.multiply(col_pred).sum())
                p_j = int(col_pred.sum())
                t_j = int(col_true.sum())
                tp_hi[pos, ti] += tp_j
                fp_hi[pos, ti] += (p_j - tp_j)
                fn_hi[pos, ti] += (t_j - tp_j)

def micro_f1(tp, fp, fn):
    denom = (2*tp + fp + fn)
    return 0.0 if denom == 0 else (2.0*tp)/denom

# Helper: yield batches of indices
def batch_indices(idxs, batch_size):
    for s in range(0, idxs.size, batch_size):
        yield idxs[s:min(idxs.size, s+batch_size)]

fold_times = []
X_dummy = np.zeros((n_samples, 1))
fold_id = 0
t_all = time.time()
for tr_idx, va_idx in mskf.split(X_dummy, Y_all):
    t0 = time.time()
    Y_tr_full = Y_all[tr_idx]
    Y_va_full = Y_all[va_idx]

    # Per-fold label pruning (train split only)
    sup_tr = np.asarray(Y_tr_full.sum(axis=0)).ravel()
    kept_labels = np.where(sup_tr >= MIN_LABEL_FREQ_TRAIN)[0]
    excluded_labels = np.setdiff1d(np.arange(n_labels), kept_labels)
    print(f'[Fold {fold_id}] Train={len(tr_idx)}, Valid={len(va_idx)}, kept_labels={kept_labels.size}, excluded={excluded_labels.size}')
    if kept_labels.size == 0:
        print(f'[Fold {fold_id}] No labels meet freq >= {MIN_LABEL_FREQ_TRAIN}; skipping fold.')
        continue

    # Fit vectorizers/scaler on training split only
    title_vec = TfidfVectorizer(**title_vec_cfg)
    code_vec  = TfidfVectorizer(**code_vec_cfg)
    body_hash = HashingVectorizer(**body_hash_cfg)
    char_hash = HashingVectorizer(**char_hash_cfg)
    meta_scaler = StandardScaler(with_mean=False)

    # Fit title/code on full train split (fit only, no transform to avoid materializing large matrices)
    title_vec.fit(title_text.iloc[tr_idx])
    code_vec.fit(code_text.iloc[tr_idx])

    # Fit meta scaler in batches
    for b_idx in batch_indices(tr_idx, BATCH_SIZE):
        meta_scaler.partial_fit(meta_all[b_idx])

    # Compute feature dimension D approximately by transforming one small batch
    probe_idx = tr_idx[:min(BATCH_SIZE, tr_idx.size)]
    Xt_probe = title_vec.transform(title_text.iloc[probe_idx]).astype(np.float32)
    Xt_probe = Xt_probe.multiply(title_alpha)
    Xb_probe = body_hash.transform(body_text.iloc[probe_idx]).astype(np.float32)
    Xc_probe = char_hash.transform((title_text.iloc[probe_idx] + ' ' + body_text.iloc[probe_idx])).astype(np.float32)
    Xcode_probe = code_vec.transform(code_text.iloc[probe_idx]).astype(np.float32)
    Xmeta_probe = sparse.csr_matrix(meta_scaler.transform(meta_all[probe_idx]), dtype=np.float32)
    D = sparse.hstack([Xt_probe, Xb_probe, Xc_probe, Xcode_probe, Xmeta_probe], format='csr', dtype=np.float32).shape[1]
    del Xt_probe, Xb_probe, Xc_probe, Xcode_probe, Xmeta_probe; gc.collect()
    # SGDClassifier stores coef_ as float64 by default -> 8 bytes per weight
    shard_cap_by_budget = max(1, int(COEF_BUDGET_BYTES // (8 * D)))
    dyn_shard_size = max(1, min(2000, shard_cap_by_budget))
    print(f'[Fold {fold_id}] Approx feature dim D={D:,}. Dynamic SHARD_SIZE={dyn_shard_size} (budget {COEF_BUDGET_BYTES/1e9:.1f}GB)')

    # Shard labels
    shards = [kept_labels[i:i+dyn_shard_size] for i in range(0, kept_labels.size, dyn_shard_size)]
    print(f'[Fold {fold_id}] #shards: {len(shards)} (size {dyn_shard_size})')

    # Train + validate per shard using online binary classifiers
    for si, shard in enumerate(shards):
        Lb = len(shard)
        if Lb == 0:
            continue
        print(f'[Fold {fold_id}] Shard {si+1}/{len(shards)} with {Lb} labels')

        # Create per-label SGD models
        models = []
        for _ in range(Lb):
            models.append(SGDClassifier(loss='log_loss', penalty='l2', alpha=2e-4,
                                       max_iter=1, tol=None, random_state=GLOBAL_SEED))  # single-epoch per partial_fit call

        # Training: stream over training indices in batches
        for b_idx in batch_indices(tr_idx, BATCH_SIZE):
            # Build batch features on the fly
            X_title = title_vec.transform(title_text.iloc[b_idx]).astype(np.float32).multiply(title_alpha)
            X_body  = body_hash.transform(body_text.iloc[b_idx]).astype(np.float32)
            X_char  = char_hash.transform((title_text.iloc[b_idx] + ' ' + body_text.iloc[b_idx])).astype(np.float32)
            X_code  = code_vec.transform(code_text.iloc[b_idx]).astype(np.float32)
            X_meta  = sparse.csr_matrix(meta_scaler.transform(meta_all[b_idx]), dtype=np.float32)
            X_batch = sparse.hstack([X_title, X_body, X_char, X_code, X_meta], format='csr', dtype=np.float32)
            Y_b = Y_tr_full[b_idx][:, shard].toarray().astype(np.int8, copy=False)
            # partial_fit for each label binary model
            for j in range(Lb):
                yj = Y_b[:, j]
                models[j].partial_fit(X_batch, yj, classes=np.array([0,1], dtype=np.int32))
            del X_title, X_body, X_char, X_code, X_meta, X_batch, Y_b; gc.collect()

        # Validation: stream over validation batches, predict probs and update counts
        for b_idx in batch_indices(va_idx, BATCH_SIZE):
            X_title = title_vec.transform(title_text.iloc[b_idx]).astype(np.float32).multiply(title_alpha)
            X_body  = body_hash.transform(body_text.iloc[b_idx]).astype(np.float32)
            X_char  = char_hash.transform((title_text.iloc[b_idx] + ' ' + body_text.iloc[b_idx])).astype(np.float32)
            X_code  = code_vec.transform(code_text.iloc[b_idx]).astype(np.float32)
            X_meta  = sparse.csr_matrix(meta_scaler.transform(meta_all[b_idx]), dtype=np.float32)
            X_batch = sparse.hstack([X_title, X_body, X_char, X_code, X_meta], format='csr', dtype=np.float32)
            # Collect probs per label model
            P = np.zeros((X_batch.shape[0], Lb), dtype=np.float32)
            for j in range(Lb):
                try:
                    prob = models[j].predict_proba(X_batch)[:, 1]
                except Exception:
                    from scipy.special import expit
                    scores = models[j].decision_function(X_batch)
                    prob = expit(scores)
                P[:, j] = prob.astype(np.float32, copy=False)
            Y_true_batch = Y_va_full[b_idx][:, shard]
            update_counts_batch(Y_true_batch.tocsr(), P, shard, THS, tp_tot, fp_tot, fn_tot,
                                tp_hi, fp_hi, fn_hi, hi_pos)
            del X_title, X_body, X_char, X_code, X_meta, X_batch, Y_true_batch, P; gc.collect()

        # Free models
        del models
        gc.collect()

    # Add FN from excluded labels to avoid optimistic bias
    fn_excluded = int(Y_va_full[:, excluded_labels].sum()) if excluded_labels.size > 0 else 0
    if fn_excluded > 0:
        for ti in range(THS.size):
            fn_tot[ti] += fn_excluded
    print(f'[Fold {fold_id}] Added FN from excluded labels: {fn_excluded}')

    del Y_tr_full, Y_va_full
    gc.collect()
    dt = time.time() - t0
    fold_times.append(dt)
    print(f'[Fold {fold_id}] Completed in {dt/60:.1f} min')
    fold_id += 1

    # Micro-pilot: run only the first fold
    if MICRO_PILOT:
        print('[Pilot] Completed first fold only (micro-pilot mode).')
        break

print('[CV] Completed. Optimizing thresholds...')
f1s = [micro_f1(tp_tot[i], fp_tot[i], fn_tot[i]) for i in range(THS.size)]
best_idx = int(np.argmax(f1s))
best_thr = float(THS[best_idx])
best_f1 = float(f1s[best_idx])
print('[OOF] Global best micro-F1 = {:.5f} at thr = {:.3f}'.format(best_f1, best_thr))

# Per-tag thresholds for high-support labels only
per_tag_thr = np.full(n_labels, best_thr, dtype=np.float32)
for k, lab in enumerate(hi_label_idx):
    tps = tp_hi[k]; fps = fp_hi[k]; fns = fn_hi[k]
    f1s_lab = np.array([micro_f1(tps[i], fps[i], fns[i]) for i in range(THS.size)], dtype=np.float32)
    j = int(np.argmax(f1s_lab))
    per_tag_thr[lab] = float(THS[j])
print('[OOF] Per-tag thresholds computed for', hi_label_idx.size, 'labels; others use global.')

# Persist artifacts
pd.DataFrame({'label': labels_list}).to_csv('labels.csv', index=False)
np.save('per_tag_thresholds.npy', per_tag_thr)
np.save('global_threshold.npy', np.array([best_thr], dtype=np.float32))
pd.DataFrame({'threshold': THS, 'f1': f1s}).to_csv('oof_global_f1_curve.csv', index=False)
print('[Artifacts] Saved labels.csv, per_tag_thresholds.npy, global_threshold.npy, oof_global_f1_curve.csv')

# Safety rule for inference
def apply_thresholds_with_safety(prob_row, label_indices, per_thr_vec, global_thr, min_k=1):
    sel = []
    for j in label_indices:
        thr = per_thr_vec[j]
        if prob_row[j] >= thr:
            sel.append(j)
    if len(sel) == 0:
        if len(label_indices) == 0:
            return []
        j_top = int(max(label_indices, key=lambda jj: prob_row[jj]))
        sel = [j_top]
    return sel

print('[Phase 2 v3] DONE. Global OOF micro-F1 ~', round(best_f1, 5), 'at thr', round(best_thr, 3))
print('[Timing] Avg fold time: {:.1f} min; total: {:.1f} min'.format(np.mean(fold_times)/60.0, np.sum(fold_times)/60.0))
