In [3]:
# CE Reranker on CPU: NLI DeBERTa v3 base with asymmetric CPC-on-anchor prompt
# - Produces raw CE scores and per-fold isotonic calibrated scores
# - Overwrites oof_ce_stsb.csv/submission_ce_stsb.csv with RAW reranker scores for stacker consumption
# - Writes oof_ce_l12.csv/submission_ce_l12.csv with ce_iso for optional usage in LGBM stacker
import time, numpy as np, pandas as pd
from scipy.stats import pearsonr
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LinearRegression
from sentence_transformers import CrossEncoder

SEED = 42
np.random.seed(SEED)

# CPC mapping (4->3->2 title; RAW code fallback), truncate to 10 words
CPC_DESCRIPTIONS = {
    'A61B':'Diagnosis; Surgery; Identification','A61F':'Filters; Prostheses; Braces','A61K':'Preparations for medical, dental, or toilet purposes','A61M':'Devices for introducing/circulating media into the body',
    'B01D':'Separation; Sorption; Membranes','B29C':'Shaping or joining of plastics','B32B':'Layered products','B60K':'Arrangement or mounting of vehicle power plant or auxiliaries','B60R':'Vehicles, vehicle fittings, or vehicle parts','B60T':'Vehicle brake control systems or parts',
    'C01B':'Non-metallic elements; Compounds thereof','C04B':'Cement; Concrete; Ceramics','C07C':'Acyclic or carbocyclic compounds','C07D':'Heterocyclic compounds','C07K':'Peptides','C08F':'Macromolecular compounds by polymerising monomers','C08L':'Compositions of macromolecular compounds','C09K':'Materials for miscellaneous applications',
    'C12N':'Microorganisms or enzymes; Genetic engineering','C12P':'Fermentation or enzyme-using processes','C12Q':'Measuring/testing involving enzymes or microorganisms',
    'E04B':'Building constructions; Walls; Floors; Roofs','F16B':'Screws; Rivets; Joints','F16F':'Springs; Shock-absorbers; Dampers','F16L':'Pipes; Joints or fittings for pipes',
    'G01N':'Investigating or analysing materials','G02B':'Optical elements, systems, or apparatus','G02F':'Control of light','G06F':'Electric digital data processing','G06K':'Recognition/Presentation of data','G06Q':'Data processing for administrative/financial/managerial',
    'H01L':'Semiconductor devices','H01M':'Batteries; Fuel cells','H02J':'Power supply/distribution','H04L':'Transmission of digital information','H04N':'Pictorial communication','H04W':'Wireless communication networks','H05K':'Printed circuits; Electric assemblies',
    'A01':'Agriculture; Forestry; Animal husbandry; Hunting; Trapping; Fishing','A21':'Baking; Edible doughs','A23':'Foods or foodstuffs; Treatment thereof','A24':'Tobacco; Cigars; Cigarettes; Simulated smoking devices; Smokers\' requisites','A41':'Wearing apparel','A42':'Headwear','A43':'Footwear','A45':'Hand or travelling articles','A46':'Brushware','A47':'Furniture; Domestic articles or appliances; Coffee mills; Spice mills; Suction cleaners','A61':'Medical or veterinary science; Hygiene','A62':'Life-saving; Fire-fighting','A63':'Sports; Games; Amusements',
    'B01':'Physical or chemical processes or apparatus','B02':'Crushing; pulverising; disintegrating','B03':'Separation of solid materials using liquids or jigs','B04':'Centrifugal apparatus or machines','B05':'Spraying; atomising; Applying fluent materials to surfaces','B06':'Mechanical vibrations','B07':'Separating solids; Sorting','B08':'Cleaning','B09':'Disposal of solid waste; Soil reclamation','B21':'Mechanical metal-working without removing material','B22':'Casting; Powder metallurgy','B23':'Machine tools; Metal-working n.e.c.','B24':'Grinding; Polishing','B25':'Hand tools; Portable power-driven tools; Manipulators','B26':'Hand cutting tools; Cutting; Severing','B27':'Working/preserving wood; Nailing or stapling','B28':'Working cement, clay, or stone','B29':'Working of plastics; Plastic state substances','B30':'Presses','B31':'Making paper-like articles','B32':'Layered products','B33':'Additive manufacturing','B41':'Printing; Typewriters; Stamps','B42':'Bookbinding; Albums; Files','B43':'Writing or drawing implements; Bureau accessories','B44':'Decorative arts','B60':'Vehicles in general','B61':'Railways','B62':'Land vehicles not on rails','B63':'Ships or waterborne vessels','B64':'Aircraft; Aviation; Cosmonautics','B65':'Conveying; Packing; Storing','B66':'Hoisting; Lifting; Hauling','B67':'Opening/closing containers; Liquid handling','B68':'Saddlery; Upholstery','B81':'Microstructural technology','B82':'Nanotechnology',
    'C01':'Inorganic chemistry','C02':'Treatment of water, wastewater, sewage, sludge','C03':'Glass; Mineral or slag wool','C04':'Cements; Concrete; Ceramics; Refractories','C05':'Fertilisers','C06':'Explosives; Matches','C07':'Organic chemistry','C08':'Organic macromolecular compounds; Polymers','C09':'Dyes; Paints; Polishes; Adhesives; Misc compositions','C10':'Petroleum/gas/coke industries; Fuels; Lubricants; Peat','C11':'Oils, fats, waxes; Detergents; Candles','C12':'Biochemistry; Microbiology; Enzymology; Fermentation','C13':'Sugar industry','C14':'Skins; Hides; Leather',
    'C21':'Metallurgy of iron','C22':'Metallurgy; Alloys; Treatment of non-ferrous metals','C23':'Coating metallic material; Chemical surface treatment','C25':'Electrolytic/electrophoretic processes','C30':'Crystal growth','C40':'Combinatorial technology',
    'D01':'Threads or fibres; Spinning','D02':'Yarns; Finishing; Warping or beaming','D03':'Weaving','D04':'Braiding; Lace-making; Knitting; Non-wovens','D05':'Sewing; Embroidering; Tufting','D06':'Textile treatment; Laundering; Flexible materials','D07':'Ropes; Cables other than electric','D21':'Paper-making; Cellulose production',
    'E01':'Road/rail/bridge construction','E02':'Hydraulic engineering; Foundations; Soil shifting','E03':'Water supply; Sewerage','E04':'Building','E05':'Locks; Keys; Fittings; Safes','E06':'Doors; Windows; Shutters; Ladders','E21':'Earth/rock drilling; Mining',
    'F01':'Machines or engines in general','F02':'Combustion engines; Hot-gas plants','F03':'Machines or engines for liquids; Wind/spring motors','F04':'Positive-displacement machines; Pumps','F15':'Fluid-pressure actuators; Hydraulics; Pneumatics','F16':'Engineering elements or units','F17':'Storing/distributing gases or liquids','F21':'Lighting','F22':'Steam generation','F23':'Combustion apparatus; Processes','F24':'Heating; Ranges; Ventilating','F25':'Refrigeration or cooling; Heat pumps','F26':'Drying','F27':'Furnaces; Kilns; Ovens','F28':'Heat exchange in general','F41':'Weapons','F42':'Ammunition; Blasting',
    'G01':'Measuring; Testing','G02':'Optics','G03':'Photography; Cinematography; Non-optical waves','G04':'Horology','G05':'Controlling; Regulating','G06':'Computing; Calculating; Counting','G07':'Checking-devices','G08':'Signalling','G09':'Educating; Cryptography; Display; Advertising; Seals','G10':'Musical instruments; Acoustics','G11':'Information storage','G12':'Instrument details','G16':'ICT adapted for specific fields','G21':'Nuclear physics; Nuclear engineering',
    'H01':'Electric elements','H02':'Electric power: generation, conversion, distribution','H03':'Electronic circuitry','H04':'Electric communication technique','H05':'Electric techniques not otherwise provided for'
}

def get_cpc_text(code: str) -> str:
    if not isinstance(code, str) or not code:
        return 'GENERAL'
    code = code.strip().upper()
    for k in (4,3,2):
        pref = code[:k]
        if pref in CPC_DESCRIPTIONS:
            return CPC_DESCRIPTIONS[pref]
    return code  # RAW fallback

def truncate_words(text: str, n: int = 10) -> str:
    w = text.split() if isinstance(text, str) else []
    return ' '.join(w[:n])

def build_pairs_asym(df: pd.DataFrame):
    cpc_full = df['context'].astype(str).map(get_cpc_text)
    cpc_10w = cpc_full.map(lambda x: truncate_words(x, 10))
    a_side = (cpc_10w + ' [SEP] ' + df['anchor'].astype(str)).tolist()
    b_side = df['target'].astype(str).tolist()
    return list(zip(a_side, b_side))

t0 = time.time()
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
folds = pd.read_csv('folds_by_id.csv')
train = train.merge(folds, on='id', how='left', validate='one_to_one')
assert (train['fold']>=0).all(), 'Fold merge by id failed'

train_pairs = build_pairs_asym(train)
test_pairs = build_pairs_asym(test)

# Public reranker backbone suggested by experts; CPU-friendly
model_name = 'cross-encoder/nli-deberta-v3-base'
ce = CrossEncoder(model_name, device='cpu', max_length=256, tokenizer_args={'use_fast': False})

def predict_entailment_prob(ce: CrossEncoder, pairs, batch_size=64):
    # Get softmax probabilities and select the 'entailment' index using id2label when available
    probs = np.asarray(ce.predict(pairs, batch_size=batch_size, show_progress_bar=True, apply_softmax=True))
    ent_idx = None
    try:
        id2label = getattr(ce.model.config, 'id2label', None)
        if isinstance(id2label, dict) and len(id2label) >= 2:
            for k, v in id2label.items():
                name = str(v).lower()
                if 'entail' in name:
                    ent_idx = int(k) if isinstance(k, (int, np.integer)) or str(k).isdigit() else None
                    break
    except Exception:
        pass
    if ent_idx is None:
        # Fallback: common mapping for NLI is 0:contradiction,1:neutral,2:entailment
        ent_idx = 2 if probs.shape[1] == 3 else probs.shape[1]-1
    return probs[:, ent_idx].astype(np.float32)

print('Scoring train pairs (nli-deberta-v3-base, context on anchor)...', flush=True)
train_scores_raw = predict_entailment_prob(ce, train_pairs, batch_size=64)
print('Scoring test pairs (nli-deberta-v3-base, context on anchor)...', flush=True)
test_scores_raw = predict_entailment_prob(ce, test_pairs, batch_size=64)

# Per-fold isotonic vs linear calibration for diagnostics
y = train['score'].values.astype(np.float32)
oof_iso = np.zeros_like(y, dtype=np.float32)
oof_lin = np.zeros_like(y, dtype=np.float32)
test_iso_f, test_lin_f = [], []
for f in sorted(train['fold'].unique()):
    tr_idx = np.where(train['fold'] != f)[0]
    va_idx = np.where(train['fold'] == f)[0]
    raw_p = pearsonr(train_scores_raw[va_idx], y[va_idx])[0]
    iso = IsotonicRegression(out_of_bounds='clip')
    iso.fit(train_scores_raw[tr_idx], y[tr_idx])
    oof_iso[va_idx] = iso.predict(train_scores_raw[va_idx]).astype(np.float32)
    test_iso_f.append(iso.predict(test_scores_raw).astype(np.float32))
    lr = LinearRegression()
    lr.fit(train_scores_raw[tr_idx].reshape(-1,1), y[tr_idx])
    oof_lin[va_idx] = lr.predict(train_scores_raw[va_idx].reshape(-1,1)).astype(np.float32)
    test_lin_f.append(lr.predict(test_scores_raw.reshape(-1,1)).astype(np.float32))
    print(f'NLI-CE Fold {int(f)}: raw={raw_p:.6f} | iso={pearsonr(oof_iso[va_idx], y[va_idx])[0]:.6f} | lin={pearsonr(oof_lin[va_idx], y[va_idx])[0]:.6f}', flush=True)

p_raw = pearsonr(train_scores_raw, y)[0]
p_iso = pearsonr(oof_iso, y)[0]
p_lin = pearsonr(oof_lin, y)[0]
print('NLI-CE OOF Pearson: raw=', round(float(p_raw),6), ' iso=', round(float(p_iso),6), ' lin=', round(float(p_lin),6))

# Save dedicated artifacts
pd.DataFrame({'id': train['id'], 'oof_raw': train_scores_raw, 'oof_iso': oof_iso}).to_csv('oof_ce_bge_rerank.csv', index=False)
pd.DataFrame({'id': test['id'], 'score_raw': test_scores_raw, 'score_iso': np.mean(np.vstack(test_iso_f), axis=0).astype(np.float32)}).to_csv('submission_ce_bge_rerank.csv', index=False)

# Overwrite CE artifacts used by stackers:
pd.DataFrame({'id': train['id'], 'oof': train_scores_raw}).to_csv('oof_ce_stsb.csv', index=False)
pd.DataFrame({'id': test['id'], 'score': test_scores_raw}).to_csv('submission_ce_stsb.csv', index=False)
pd.DataFrame({'id': train['id'], 'oof_raw': train_scores_raw, 'oof_iso': oof_iso}).to_csv('oof_ce_l12.csv', index=False)
pd.DataFrame({'id': test['id'], 'score_raw': test_scores_raw, 'score_iso': np.mean(np.vstack(test_iso_f), axis=0).astype(np.float32)}).to_csv('submission_ce_l12.csv', index=False)

print('Saved NLI DeBERTa CE artifacts and overwrote CE files for stacker. Elapsed', round((time.time()-t0)/60,2), 'min')

Scoring train pairs (nli-deberta-v3-base, context on anchor)...


Batches:   0%|          | 0/513 [00:00<?, ?it/s]

Batches:   0%|          | 1/513 [00:00<04:14,  2.01it/s]

Batches:   0%|          | 2/513 [00:00<03:22,  2.52it/s]

Batches:   1%|          | 3/513 [00:01<03:09,  2.69it/s]

Batches:   1%|          | 4/513 [00:01<03:00,  2.81it/s]

Batches:   1%|          | 5/513 [00:01<03:07,  2.71it/s]

Batches:   1%|          | 6/513 [00:02<02:59,  2.82it/s]

Batches:   1%|▏         | 7/513 [00:02<02:54,  2.90it/s]

Batches:   2%|▏         | 8/513 [00:02<02:53,  2.91it/s]

Batches:   2%|▏         | 9/513 [00:03<03:01,  2.78it/s]

Batches:   2%|▏         | 10/513 [00:03<02:58,  2.82it/s]

Batches:   2%|▏         | 11/513 [00:03<02:53,  2.89it/s]

Batches:   2%|▏         | 12/513 [00:04<03:07,  2.68it/s]

Batches:   3%|▎         | 13/513 [00:04<03:03,  2.72it/s]

Batches:   3%|▎         | 14/513 [00:05<02:57,  2.82it/s]

Batches:   3%|▎         | 15/513 [00:05<02:52,  2.89it/s]

Batches:   3%|▎         | 16/513 [00:05<02:59,  2.77it/s]

Batches:   3%|▎         | 17/513 [00:06<02:53,  2.85it/s]

Batches:   4%|▎         | 18/513 [00:06<03:11,  2.59it/s]

Batches:   4%|▎         | 19/513 [00:06<03:02,  2.70it/s]

Batches:   4%|▍         | 20/513 [00:07<02:56,  2.80it/s]

Batches:   4%|▍         | 21/513 [00:07<02:50,  2.88it/s]

Batches:   4%|▍         | 22/513 [00:07<02:49,  2.90it/s]

Batches:   4%|▍         | 23/513 [00:08<02:46,  2.94it/s]

Batches:   5%|▍         | 24/513 [00:08<02:45,  2.95it/s]

Batches:   5%|▍         | 25/513 [00:08<02:41,  3.02it/s]

Batches:   5%|▌         | 26/513 [00:09<02:40,  3.03it/s]

Batches:   5%|▌         | 27/513 [00:09<02:41,  3.01it/s]

Batches:   5%|▌         | 28/513 [00:09<02:37,  3.09it/s]

Batches:   6%|▌         | 29/513 [00:10<02:36,  3.09it/s]

Batches:   6%|▌         | 30/513 [00:10<02:34,  3.12it/s]

Batches:   6%|▌         | 31/513 [00:10<02:44,  2.92it/s]

Batches:   6%|▌         | 32/513 [00:11<02:44,  2.92it/s]

Batches:   6%|▋         | 33/513 [00:11<02:41,  2.97it/s]

Batches:   7%|▋         | 34/513 [00:11<02:41,  2.96it/s]

Batches:   7%|▋         | 35/513 [00:12<02:47,  2.85it/s]

Batches:   7%|▋         | 36/513 [00:12<02:42,  2.93it/s]

Batches:   7%|▋         | 37/513 [00:12<02:38,  3.00it/s]

Batches:   7%|▋         | 38/513 [00:13<02:37,  3.02it/s]

Batches:   8%|▊         | 39/513 [00:13<02:43,  2.89it/s]

Batches:   8%|▊         | 40/513 [00:13<02:42,  2.92it/s]

Batches:   8%|▊         | 41/513 [00:14<02:39,  2.97it/s]

Batches:   8%|▊         | 42/513 [00:14<02:37,  3.00it/s]

Batches:   8%|▊         | 43/513 [00:14<02:34,  3.04it/s]

Batches:   9%|▊         | 44/513 [00:15<02:33,  3.06it/s]

Batches:   9%|▉         | 45/513 [00:15<02:33,  3.06it/s]

Batches:   9%|▉         | 46/513 [00:15<02:32,  3.07it/s]

Batches:   9%|▉         | 47/513 [00:16<02:31,  3.08it/s]

Batches:   9%|▉         | 48/513 [00:16<02:33,  3.04it/s]

Batches:  10%|▉         | 49/513 [00:16<02:33,  3.01it/s]

Batches:  10%|▉         | 50/513 [00:17<02:34,  3.00it/s]

Batches:  10%|▉         | 51/513 [00:17<02:34,  2.99it/s]

Batches:  10%|█         | 52/513 [00:17<02:32,  3.02it/s]

Batches:  10%|█         | 53/513 [00:18<02:43,  2.81it/s]

Batches:  11%|█         | 54/513 [00:18<02:48,  2.73it/s]

Batches:  11%|█         | 55/513 [00:19<02:46,  2.75it/s]

Batches:  11%|█         | 56/513 [00:19<02:42,  2.81it/s]

Batches:  11%|█         | 57/513 [00:19<02:45,  2.76it/s]

Batches:  11%|█▏        | 58/513 [00:20<02:46,  2.73it/s]

Batches:  12%|█▏        | 59/513 [00:20<02:41,  2.81it/s]

Batches:  12%|█▏        | 60/513 [00:20<02:43,  2.77it/s]

Batches:  12%|█▏        | 61/513 [00:21<02:40,  2.82it/s]

Batches:  12%|█▏        | 62/513 [00:21<02:38,  2.84it/s]

Batches:  12%|█▏        | 63/513 [00:21<02:35,  2.90it/s]

Batches:  12%|█▏        | 64/513 [00:22<02:33,  2.93it/s]

Batches:  13%|█▎        | 65/513 [00:22<02:31,  2.95it/s]

Batches:  13%|█▎        | 66/513 [00:22<02:35,  2.87it/s]

Batches:  13%|█▎        | 67/513 [00:23<02:35,  2.87it/s]

Batches:  13%|█▎        | 68/513 [00:23<02:35,  2.86it/s]

Batches:  13%|█▎        | 69/513 [00:23<02:38,  2.80it/s]

Batches:  14%|█▎        | 70/513 [00:24<02:33,  2.88it/s]

Batches:  14%|█▍        | 71/513 [00:24<02:30,  2.94it/s]

Batches:  14%|█▍        | 72/513 [00:24<02:32,  2.89it/s]

Batches:  14%|█▍        | 73/513 [00:25<02:40,  2.74it/s]

Batches:  14%|█▍        | 74/513 [00:25<02:38,  2.78it/s]

Batches:  15%|█▍        | 75/513 [00:26<02:38,  2.76it/s]

Batches:  15%|█▍        | 76/513 [00:26<02:40,  2.72it/s]

Batches:  15%|█▌        | 77/513 [00:26<02:37,  2.76it/s]

Batches:  15%|█▌        | 78/513 [00:27<02:33,  2.84it/s]

Batches:  15%|█▌        | 79/513 [00:27<02:28,  2.92it/s]

Batches:  16%|█▌        | 80/513 [00:27<02:26,  2.96it/s]

Batches:  16%|█▌        | 81/513 [00:28<02:24,  2.98it/s]

Batches:  16%|█▌        | 82/513 [00:28<02:32,  2.83it/s]

Batches:  16%|█▌        | 83/513 [00:28<02:29,  2.87it/s]

Batches:  16%|█▋        | 84/513 [00:29<02:32,  2.82it/s]

Batches:  17%|█▋        | 85/513 [00:29<02:28,  2.87it/s]

Batches:  17%|█▋        | 86/513 [00:29<02:26,  2.92it/s]

Batches:  17%|█▋        | 87/513 [00:30<02:29,  2.84it/s]

Batches:  17%|█▋        | 88/513 [00:30<02:31,  2.80it/s]

Batches:  17%|█▋        | 89/513 [00:30<02:32,  2.78it/s]

Batches:  18%|█▊        | 90/513 [00:31<02:35,  2.72it/s]

Batches:  18%|█▊        | 91/513 [00:31<02:31,  2.79it/s]

Batches:  18%|█▊        | 92/513 [00:32<02:30,  2.79it/s]

Batches:  18%|█▊        | 93/513 [00:32<02:30,  2.80it/s]

Batches:  18%|█▊        | 94/513 [00:32<02:29,  2.81it/s]

Batches:  19%|█▊        | 95/513 [00:33<02:26,  2.86it/s]

Batches:  19%|█▊        | 96/513 [00:33<02:24,  2.89it/s]

Batches:  19%|█▉        | 97/513 [00:33<02:22,  2.93it/s]

Batches:  19%|█▉        | 98/513 [00:34<02:20,  2.95it/s]

Batches:  19%|█▉        | 99/513 [00:34<02:22,  2.90it/s]

Batches:  19%|█▉        | 100/513 [00:34<02:25,  2.84it/s]

Batches:  20%|█▉        | 101/513 [00:35<02:29,  2.75it/s]

Batches:  20%|█▉        | 102/513 [00:35<02:36,  2.63it/s]

Batches:  20%|██        | 103/513 [00:36<02:32,  2.70it/s]

Batches:  20%|██        | 104/513 [00:36<02:27,  2.77it/s]

Batches:  20%|██        | 105/513 [00:36<02:22,  2.86it/s]

Batches:  21%|██        | 106/513 [00:37<02:20,  2.89it/s]

Batches:  21%|██        | 107/513 [00:37<02:27,  2.76it/s]

Batches:  21%|██        | 108/513 [00:37<02:25,  2.77it/s]

Batches:  21%|██        | 109/513 [00:38<02:22,  2.84it/s]

Batches:  21%|██▏       | 110/513 [00:38<02:18,  2.92it/s]

Batches:  22%|██▏       | 111/513 [00:38<02:17,  2.93it/s]

Batches:  22%|██▏       | 112/513 [00:39<02:14,  2.98it/s]

Batches:  22%|██▏       | 113/513 [00:39<02:15,  2.95it/s]

Batches:  22%|██▏       | 114/513 [00:39<02:14,  2.97it/s]

Batches:  22%|██▏       | 115/513 [00:40<02:19,  2.86it/s]

Batches:  23%|██▎       | 116/513 [00:40<02:19,  2.84it/s]

Batches:  23%|██▎       | 117/513 [00:40<02:17,  2.88it/s]

Batches:  23%|██▎       | 118/513 [00:41<02:16,  2.90it/s]

Batches:  23%|██▎       | 119/513 [00:41<02:14,  2.92it/s]

Batches:  23%|██▎       | 120/513 [00:41<02:13,  2.94it/s]

Batches:  24%|██▎       | 121/513 [00:42<02:14,  2.92it/s]

Batches:  24%|██▍       | 122/513 [00:42<02:18,  2.83it/s]

Batches:  24%|██▍       | 123/513 [00:42<02:16,  2.87it/s]

Batches:  24%|██▍       | 124/513 [00:43<02:14,  2.88it/s]

Batches:  24%|██▍       | 125/513 [00:43<02:14,  2.88it/s]

Batches:  25%|██▍       | 126/513 [00:43<02:13,  2.90it/s]

Batches:  25%|██▍       | 127/513 [00:44<02:13,  2.90it/s]

Batches:  25%|██▍       | 128/513 [00:44<02:13,  2.88it/s]

Batches:  25%|██▌       | 129/513 [00:44<02:11,  2.91it/s]

Batches:  25%|██▌       | 130/513 [00:45<02:11,  2.92it/s]

Batches:  26%|██▌       | 131/513 [00:45<02:11,  2.91it/s]

Batches:  26%|██▌       | 132/513 [00:46<02:17,  2.77it/s]

Batches:  26%|██▌       | 133/513 [00:46<02:18,  2.74it/s]

Batches:  26%|██▌       | 134/513 [00:46<02:14,  2.81it/s]

Batches:  26%|██▋       | 135/513 [00:47<02:12,  2.85it/s]

Batches:  27%|██▋       | 136/513 [00:47<02:11,  2.86it/s]

Batches:  27%|██▋       | 137/513 [00:47<02:13,  2.82it/s]

Batches:  27%|██▋       | 138/513 [00:48<02:11,  2.86it/s]

Batches:  27%|██▋       | 139/513 [00:48<02:07,  2.93it/s]

Batches:  27%|██▋       | 140/513 [00:48<02:03,  3.01it/s]

Batches:  27%|██▋       | 141/513 [00:49<02:04,  2.99it/s]

Batches:  28%|██▊       | 142/513 [00:49<02:04,  2.98it/s]

Batches:  28%|██▊       | 143/513 [00:49<02:03,  2.99it/s]

Batches:  28%|██▊       | 144/513 [00:50<02:03,  2.98it/s]

Batches:  28%|██▊       | 145/513 [00:50<02:02,  2.99it/s]

Batches:  28%|██▊       | 146/513 [00:50<02:04,  2.96it/s]

Batches:  29%|██▊       | 147/513 [00:51<02:06,  2.88it/s]

Batches:  29%|██▉       | 148/513 [00:51<02:12,  2.76it/s]

Batches:  29%|██▉       | 149/513 [00:51<02:11,  2.78it/s]

Batches:  29%|██▉       | 150/513 [00:52<02:06,  2.87it/s]

Batches:  29%|██▉       | 151/513 [00:52<02:03,  2.94it/s]

Batches:  30%|██▉       | 152/513 [00:52<02:02,  2.96it/s]

Batches:  30%|██▉       | 153/513 [00:53<02:01,  2.95it/s]

Batches:  30%|███       | 154/513 [00:53<01:58,  3.03it/s]

Batches:  30%|███       | 155/513 [00:53<01:58,  3.02it/s]

Batches:  30%|███       | 156/513 [00:54<01:58,  3.01it/s]

Batches:  31%|███       | 157/513 [00:54<02:00,  2.97it/s]

Batches:  31%|███       | 158/513 [00:54<01:59,  2.98it/s]

Batches:  31%|███       | 159/513 [00:55<01:59,  2.95it/s]

Batches:  31%|███       | 160/513 [00:55<01:59,  2.95it/s]

Batches:  31%|███▏      | 161/513 [00:55<01:59,  2.96it/s]

Batches:  32%|███▏      | 162/513 [00:56<01:58,  2.97it/s]

Batches:  32%|███▏      | 163/513 [00:56<01:56,  3.02it/s]

Batches:  32%|███▏      | 164/513 [00:56<01:55,  3.01it/s]

Batches:  32%|███▏      | 165/513 [00:57<01:55,  3.01it/s]

Batches:  32%|███▏      | 166/513 [00:57<01:54,  3.04it/s]

Batches:  33%|███▎      | 167/513 [00:57<01:57,  2.94it/s]

Batches:  33%|███▎      | 168/513 [00:58<01:58,  2.92it/s]

Batches:  33%|███▎      | 169/513 [00:58<02:01,  2.83it/s]

Batches:  33%|███▎      | 170/513 [00:59<02:02,  2.81it/s]

Batches:  33%|███▎      | 171/513 [00:59<01:59,  2.86it/s]

Batches:  34%|███▎      | 172/513 [00:59<01:56,  2.93it/s]

Batches:  34%|███▎      | 173/513 [01:00<01:53,  2.99it/s]

Batches:  34%|███▍      | 174/513 [01:00<01:53,  2.98it/s]

Batches:  34%|███▍      | 175/513 [01:00<01:53,  2.97it/s]

Batches:  34%|███▍      | 176/513 [01:01<02:15,  2.49it/s]

Batches:  35%|███▍      | 177/513 [01:01<02:22,  2.35it/s]

Batches:  35%|███▍      | 178/513 [01:02<02:14,  2.50it/s]

Batches:  35%|███▍      | 179/513 [01:02<02:07,  2.62it/s]

Batches:  35%|███▌      | 180/513 [01:02<02:02,  2.71it/s]

Batches:  35%|███▌      | 181/513 [01:03<02:02,  2.70it/s]

Batches:  35%|███▌      | 182/513 [01:03<01:59,  2.78it/s]

Batches:  36%|███▌      | 183/513 [01:03<01:56,  2.82it/s]

Batches:  36%|███▌      | 184/513 [01:04<01:53,  2.91it/s]

Batches:  36%|███▌      | 185/513 [01:04<01:51,  2.94it/s]

Batches:  36%|███▋      | 186/513 [01:04<01:51,  2.94it/s]

Batches:  36%|███▋      | 187/513 [01:05<01:50,  2.95it/s]

Batches:  37%|███▋      | 188/513 [01:05<01:50,  2.95it/s]

Batches:  37%|███▋      | 189/513 [01:05<01:48,  3.00it/s]

Batches:  37%|███▋      | 190/513 [01:06<01:48,  2.99it/s]

Batches:  37%|███▋      | 191/513 [01:06<01:48,  2.97it/s]

Batches:  37%|███▋      | 192/513 [01:06<01:46,  3.02it/s]

Batches:  38%|███▊      | 193/513 [01:07<01:51,  2.88it/s]

Batches:  38%|███▊      | 194/513 [01:07<01:54,  2.78it/s]

Batches:  38%|███▊      | 195/513 [01:07<01:49,  2.89it/s]

Batches:  38%|███▊      | 196/513 [01:08<01:50,  2.86it/s]

Batches:  38%|███▊      | 197/513 [01:08<01:49,  2.90it/s]

Batches:  39%|███▊      | 198/513 [01:08<01:48,  2.90it/s]

Batches:  39%|███▉      | 199/513 [01:09<01:47,  2.93it/s]

Batches:  39%|███▉      | 200/513 [01:09<01:46,  2.93it/s]

Batches:  39%|███▉      | 201/513 [01:09<01:45,  2.95it/s]

Batches:  39%|███▉      | 202/513 [01:10<01:45,  2.95it/s]

Batches:  40%|███▉      | 203/513 [01:10<01:49,  2.83it/s]

Batches:  40%|███▉      | 204/513 [01:10<01:47,  2.86it/s]

Batches:  40%|███▉      | 205/513 [01:11<01:46,  2.89it/s]

Batches:  40%|████      | 206/513 [01:11<01:44,  2.93it/s]

Batches:  40%|████      | 207/513 [01:11<01:44,  2.94it/s]

Batches:  41%|████      | 208/513 [01:12<01:43,  2.94it/s]

Batches:  41%|████      | 209/513 [01:12<01:41,  2.99it/s]

Batches:  41%|████      | 210/513 [01:12<01:42,  2.95it/s]

Batches:  41%|████      | 211/513 [01:13<01:47,  2.80it/s]

Batches:  41%|████▏     | 212/513 [01:13<01:46,  2.83it/s]

Batches:  42%|████▏     | 213/513 [01:14<01:44,  2.88it/s]

Batches:  42%|████▏     | 214/513 [01:14<01:42,  2.91it/s]

Batches:  42%|████▏     | 215/513 [01:14<01:43,  2.89it/s]

Batches:  42%|████▏     | 216/513 [01:15<01:41,  2.91it/s]

Batches:  42%|████▏     | 217/513 [01:15<01:42,  2.88it/s]

Batches:  42%|████▏     | 218/513 [01:15<01:40,  2.94it/s]

Batches:  43%|████▎     | 219/513 [01:16<01:37,  3.02it/s]

Batches:  43%|████▎     | 220/513 [01:16<01:37,  3.01it/s]

Batches:  43%|████▎     | 221/513 [01:16<01:37,  3.00it/s]

Batches:  43%|████▎     | 222/513 [01:17<01:37,  2.99it/s]

Batches:  43%|████▎     | 223/513 [01:17<01:37,  2.98it/s]

Batches:  44%|████▎     | 224/513 [01:17<01:38,  2.94it/s]

Batches:  44%|████▍     | 225/513 [01:18<01:37,  2.95it/s]

Batches:  44%|████▍     | 226/513 [01:18<01:37,  2.95it/s]

Batches:  44%|████▍     | 227/513 [01:18<01:36,  2.97it/s]

Batches:  44%|████▍     | 228/513 [01:19<01:36,  2.97it/s]

Batches:  45%|████▍     | 229/513 [01:19<01:35,  2.98it/s]

Batches:  45%|████▍     | 230/513 [01:19<01:36,  2.92it/s]

Batches:  45%|████▌     | 231/513 [01:20<01:34,  2.98it/s]

Batches:  45%|████▌     | 232/513 [01:20<01:33,  3.02it/s]

Batches:  45%|████▌     | 233/513 [01:20<01:33,  3.00it/s]

Batches:  46%|████▌     | 234/513 [01:21<01:34,  2.95it/s]

Batches:  46%|████▌     | 235/513 [01:21<01:35,  2.91it/s]

Batches:  46%|████▌     | 236/513 [01:21<01:35,  2.91it/s]

Batches:  46%|████▌     | 237/513 [01:22<01:42,  2.70it/s]

Batches:  46%|████▋     | 238/513 [01:22<01:39,  2.75it/s]

Batches:  47%|████▋     | 239/513 [01:22<01:38,  2.79it/s]

Batches:  47%|████▋     | 240/513 [01:23<01:36,  2.82it/s]

Batches:  47%|████▋     | 241/513 [01:23<01:35,  2.84it/s]

Batches:  47%|████▋     | 242/513 [01:23<01:33,  2.89it/s]

Batches:  47%|████▋     | 243/513 [01:24<01:31,  2.95it/s]

Batches:  48%|████▊     | 244/513 [01:24<01:30,  2.96it/s]

Batches:  48%|████▊     | 245/513 [01:24<01:32,  2.91it/s]

Batches:  48%|████▊     | 246/513 [01:25<01:32,  2.88it/s]

Batches:  48%|████▊     | 247/513 [01:25<01:31,  2.91it/s]

Batches:  48%|████▊     | 248/513 [01:26<01:31,  2.88it/s]

Batches:  49%|████▊     | 249/513 [01:26<01:32,  2.86it/s]

Batches:  49%|████▊     | 250/513 [01:26<01:30,  2.90it/s]

Batches:  49%|████▉     | 251/513 [01:27<01:30,  2.89it/s]

Batches:  49%|████▉     | 252/513 [01:27<01:28,  2.95it/s]

Batches:  49%|████▉     | 253/513 [01:27<01:27,  2.96it/s]

Batches:  50%|████▉     | 254/513 [01:28<01:29,  2.89it/s]

Batches:  50%|████▉     | 255/513 [01:28<01:27,  2.94it/s]

Batches:  50%|████▉     | 256/513 [01:28<01:25,  3.02it/s]

Batches:  50%|█████     | 257/513 [01:29<01:27,  2.94it/s]

Batches:  50%|█████     | 258/513 [01:29<01:26,  2.94it/s]

Batches:  50%|█████     | 259/513 [01:29<01:25,  2.99it/s]

Batches:  51%|█████     | 260/513 [01:30<01:25,  2.95it/s]

Batches:  51%|█████     | 261/513 [01:30<01:45,  2.39it/s]

Batches:  51%|█████     | 262/513 [01:31<01:37,  2.59it/s]

Batches:  51%|█████▏    | 263/513 [01:31<01:33,  2.69it/s]

Batches:  51%|█████▏    | 264/513 [01:31<01:28,  2.81it/s]

Batches:  52%|█████▏    | 265/513 [01:32<01:25,  2.90it/s]

Batches:  52%|█████▏    | 266/513 [01:32<01:26,  2.87it/s]

Batches:  52%|█████▏    | 267/513 [01:32<01:24,  2.89it/s]

Batches:  52%|█████▏    | 268/513 [01:33<01:28,  2.78it/s]

Batches:  52%|█████▏    | 269/513 [01:33<01:25,  2.84it/s]

Batches:  53%|█████▎    | 270/513 [01:33<01:23,  2.92it/s]

Batches:  53%|█████▎    | 271/513 [01:34<01:22,  2.93it/s]

Batches:  53%|█████▎    | 272/513 [01:34<01:20,  2.99it/s]

Batches:  53%|█████▎    | 273/513 [01:34<01:20,  3.00it/s]

Batches:  53%|█████▎    | 274/513 [01:35<01:21,  2.93it/s]

Batches:  54%|█████▎    | 275/513 [01:35<01:20,  2.96it/s]

Batches:  54%|█████▍    | 276/513 [01:35<01:19,  2.98it/s]

Batches:  54%|█████▍    | 277/513 [01:36<01:19,  2.97it/s]

Batches:  54%|█████▍    | 278/513 [01:36<01:21,  2.88it/s]

Batches:  54%|█████▍    | 279/513 [01:36<01:20,  2.89it/s]

Batches:  55%|█████▍    | 280/513 [01:37<01:21,  2.84it/s]

Batches:  55%|█████▍    | 281/513 [01:37<01:20,  2.88it/s]

Batches:  55%|█████▍    | 282/513 [01:37<01:19,  2.92it/s]

Batches:  55%|█████▌    | 283/513 [01:38<01:19,  2.90it/s]

Batches:  55%|█████▌    | 284/513 [01:38<01:19,  2.87it/s]

Batches:  56%|█████▌    | 285/513 [01:38<01:18,  2.91it/s]

Batches:  56%|█████▌    | 286/513 [01:39<01:16,  2.96it/s]

Batches:  56%|█████▌    | 287/513 [01:39<01:15,  2.97it/s]

Batches:  56%|█████▌    | 288/513 [01:39<01:18,  2.86it/s]

Batches:  56%|█████▋    | 289/513 [01:40<01:17,  2.90it/s]

Batches:  57%|█████▋    | 290/513 [01:40<01:16,  2.93it/s]

Batches:  57%|█████▋    | 291/513 [01:40<01:15,  2.94it/s]

Batches:  57%|█████▋    | 292/513 [01:41<01:14,  2.96it/s]

Batches:  57%|█████▋    | 293/513 [01:41<01:15,  2.93it/s]

Batches:  57%|█████▋    | 294/513 [01:41<01:17,  2.83it/s]

Batches:  58%|█████▊    | 295/513 [01:42<01:15,  2.89it/s]

Batches:  58%|█████▊    | 296/513 [01:42<01:14,  2.93it/s]

Batches:  58%|█████▊    | 297/513 [01:42<01:13,  2.94it/s]

Batches:  58%|█████▊    | 298/513 [01:43<01:13,  2.92it/s]

Batches:  58%|█████▊    | 299/513 [01:43<01:12,  2.96it/s]

Batches:  58%|█████▊    | 300/513 [01:44<01:13,  2.91it/s]

Batches:  59%|█████▊    | 301/513 [01:44<01:14,  2.84it/s]

Batches:  59%|█████▉    | 302/513 [01:44<01:13,  2.87it/s]

Batches:  59%|█████▉    | 303/513 [01:45<01:17,  2.69it/s]

Batches:  59%|█████▉    | 304/513 [01:45<01:14,  2.80it/s]

Batches:  59%|█████▉    | 305/513 [01:45<01:16,  2.73it/s]

Batches:  60%|█████▉    | 306/513 [01:46<01:14,  2.79it/s]

Batches:  60%|█████▉    | 307/513 [01:46<01:13,  2.80it/s]

Batches:  60%|██████    | 308/513 [01:46<01:11,  2.88it/s]

Batches:  60%|██████    | 309/513 [01:47<01:09,  2.92it/s]

Batches:  60%|██████    | 310/513 [01:47<01:11,  2.82it/s]

Batches:  61%|██████    | 311/513 [01:47<01:10,  2.86it/s]

Batches:  61%|██████    | 312/513 [01:48<01:12,  2.77it/s]

Batches:  61%|██████    | 313/513 [01:48<01:10,  2.84it/s]

Batches:  61%|██████    | 314/513 [01:49<01:09,  2.85it/s]

Batches:  61%|██████▏   | 315/513 [01:49<01:07,  2.95it/s]

Batches:  62%|██████▏   | 316/513 [01:49<01:06,  2.96it/s]

Batches:  62%|██████▏   | 317/513 [01:50<01:08,  2.85it/s]

Batches:  62%|██████▏   | 318/513 [01:50<01:07,  2.88it/s]

Batches:  62%|██████▏   | 319/513 [01:50<01:09,  2.78it/s]

Batches:  62%|██████▏   | 320/513 [01:51<01:08,  2.82it/s]

Batches:  63%|██████▎   | 321/513 [01:51<01:07,  2.86it/s]

Batches:  63%|██████▎   | 322/513 [01:51<01:09,  2.76it/s]

Batches:  63%|██████▎   | 323/513 [01:52<01:13,  2.57it/s]

Batches:  63%|██████▎   | 324/513 [01:52<01:09,  2.73it/s]

Batches:  63%|██████▎   | 325/513 [01:52<01:09,  2.69it/s]

Batches:  64%|██████▎   | 326/513 [01:53<01:07,  2.76it/s]

Batches:  64%|██████▎   | 327/513 [01:53<01:05,  2.86it/s]

Batches:  64%|██████▍   | 328/513 [01:53<01:03,  2.91it/s]

Batches:  64%|██████▍   | 329/513 [01:54<01:03,  2.90it/s]

Batches:  64%|██████▍   | 330/513 [01:54<01:02,  2.91it/s]

Batches:  65%|██████▍   | 331/513 [01:55<01:03,  2.85it/s]

Batches:  65%|██████▍   | 332/513 [01:55<01:03,  2.83it/s]

Batches:  65%|██████▍   | 333/513 [01:55<01:02,  2.88it/s]

Batches:  65%|██████▌   | 334/513 [01:56<01:00,  2.94it/s]

Batches:  65%|██████▌   | 335/513 [01:56<00:59,  2.99it/s]

Batches:  65%|██████▌   | 336/513 [01:56<01:01,  2.86it/s]

Batches:  66%|██████▌   | 337/513 [01:57<01:02,  2.80it/s]

Batches:  66%|██████▌   | 338/513 [01:57<01:01,  2.85it/s]

Batches:  66%|██████▌   | 339/513 [01:57<01:00,  2.89it/s]

Batches:  66%|██████▋   | 340/513 [01:58<01:00,  2.87it/s]

Batches:  66%|██████▋   | 341/513 [01:58<00:59,  2.90it/s]

Batches:  67%|██████▋   | 342/513 [01:58<00:57,  2.96it/s]

Batches:  67%|██████▋   | 343/513 [01:59<00:56,  2.98it/s]

Batches:  67%|██████▋   | 344/513 [01:59<00:57,  2.93it/s]

Batches:  67%|██████▋   | 345/513 [01:59<00:58,  2.89it/s]

Batches:  67%|██████▋   | 346/513 [02:00<00:57,  2.92it/s]

Batches:  68%|██████▊   | 347/513 [02:00<00:56,  2.96it/s]

Batches:  68%|██████▊   | 348/513 [02:00<00:58,  2.80it/s]

Batches:  68%|██████▊   | 349/513 [02:01<00:58,  2.80it/s]

Batches:  68%|██████▊   | 350/513 [02:01<00:57,  2.85it/s]

Batches:  68%|██████▊   | 351/513 [02:01<00:55,  2.91it/s]

Batches:  69%|██████▊   | 352/513 [02:02<00:54,  2.94it/s]

Batches:  69%|██████▉   | 353/513 [02:02<00:53,  2.99it/s]

Batches:  69%|██████▉   | 354/513 [02:02<00:53,  2.99it/s]

Batches:  69%|██████▉   | 355/513 [02:03<00:57,  2.77it/s]

Batches:  69%|██████▉   | 356/513 [02:03<00:55,  2.82it/s]

Batches:  70%|██████▉   | 357/513 [02:03<00:53,  2.91it/s]

Batches:  70%|██████▉   | 358/513 [02:04<00:55,  2.78it/s]

Batches:  70%|██████▉   | 359/513 [02:04<00:54,  2.83it/s]

Batches:  70%|███████   | 360/513 [02:05<00:52,  2.89it/s]

Batches:  70%|███████   | 361/513 [02:05<00:51,  2.94it/s]

Batches:  71%|███████   | 362/513 [02:05<00:51,  2.95it/s]

Batches:  71%|███████   | 363/513 [02:06<00:53,  2.81it/s]

Batches:  71%|███████   | 364/513 [02:06<00:52,  2.86it/s]

Batches:  71%|███████   | 365/513 [02:06<00:50,  2.93it/s]

Batches:  71%|███████▏  | 366/513 [02:07<00:50,  2.91it/s]

Batches:  72%|███████▏  | 367/513 [02:07<00:51,  2.83it/s]

Batches:  72%|███████▏  | 368/513 [02:07<00:50,  2.87it/s]

Batches:  72%|███████▏  | 369/513 [02:08<00:49,  2.92it/s]

Batches:  72%|███████▏  | 370/513 [02:08<00:48,  2.94it/s]

Batches:  72%|███████▏  | 371/513 [02:08<00:48,  2.95it/s]

Batches:  73%|███████▎  | 372/513 [02:09<00:48,  2.90it/s]

Batches:  73%|███████▎  | 373/513 [02:09<00:47,  2.95it/s]

Batches:  73%|███████▎  | 374/513 [02:09<00:46,  3.00it/s]

Batches:  73%|███████▎  | 375/513 [02:10<00:46,  2.99it/s]

Batches:  73%|███████▎  | 376/513 [02:10<00:46,  2.94it/s]

Batches:  73%|███████▎  | 377/513 [02:10<00:46,  2.92it/s]

Batches:  74%|███████▎  | 378/513 [02:11<00:45,  2.94it/s]

Batches:  74%|███████▍  | 379/513 [02:11<00:46,  2.90it/s]

Batches:  74%|███████▍  | 380/513 [02:11<00:45,  2.93it/s]

Batches:  74%|███████▍  | 381/513 [02:12<00:45,  2.91it/s]

Batches:  74%|███████▍  | 382/513 [02:12<00:45,  2.90it/s]

Batches:  75%|███████▍  | 383/513 [02:13<00:47,  2.74it/s]

Batches:  75%|███████▍  | 384/513 [02:13<00:48,  2.64it/s]

Batches:  75%|███████▌  | 385/513 [02:13<00:46,  2.76it/s]

Batches:  75%|███████▌  | 386/513 [02:14<00:44,  2.85it/s]

Batches:  75%|███████▌  | 387/513 [02:14<00:43,  2.89it/s]

Batches:  76%|███████▌  | 388/513 [02:14<00:42,  2.93it/s]

Batches:  76%|███████▌  | 389/513 [02:15<00:41,  2.96it/s]

Batches:  76%|███████▌  | 390/513 [02:15<00:42,  2.92it/s]

Batches:  76%|███████▌  | 391/513 [02:15<00:41,  2.93it/s]

Batches:  76%|███████▋  | 392/513 [02:16<00:40,  2.98it/s]

Batches:  77%|███████▋  | 393/513 [02:16<00:41,  2.91it/s]

Batches:  77%|███████▋  | 394/513 [02:16<00:40,  2.96it/s]

Batches:  77%|███████▋  | 395/513 [02:17<00:39,  2.95it/s]

Batches:  77%|███████▋  | 396/513 [02:17<00:39,  3.00it/s]

Batches:  77%|███████▋  | 397/513 [02:17<00:40,  2.90it/s]

Batches:  78%|███████▊  | 398/513 [02:18<00:39,  2.91it/s]

Batches:  78%|███████▊  | 399/513 [02:18<00:38,  2.97it/s]

Batches:  78%|███████▊  | 400/513 [02:18<00:38,  2.97it/s]

Batches:  78%|███████▊  | 401/513 [02:19<00:38,  2.93it/s]

Batches:  78%|███████▊  | 402/513 [02:19<00:37,  2.99it/s]

Batches:  79%|███████▊  | 403/513 [02:19<00:36,  3.00it/s]

Batches:  79%|███████▉  | 404/513 [02:20<00:36,  3.00it/s]

Batches:  79%|███████▉  | 405/513 [02:20<00:35,  3.01it/s]

Batches:  79%|███████▉  | 406/513 [02:20<00:35,  3.03it/s]

Batches:  79%|███████▉  | 407/513 [02:21<00:35,  3.01it/s]

Batches:  80%|███████▉  | 408/513 [02:21<00:34,  3.02it/s]

Batches:  80%|███████▉  | 409/513 [02:21<00:36,  2.87it/s]

Batches:  80%|███████▉  | 410/513 [02:22<00:34,  2.98it/s]

Batches:  80%|████████  | 411/513 [02:22<00:34,  2.97it/s]

Batches:  80%|████████  | 412/513 [02:22<00:33,  3.01it/s]

Batches:  81%|████████  | 413/513 [02:23<00:34,  2.92it/s]

Batches:  81%|████████  | 414/513 [02:23<00:33,  2.93it/s]

Batches:  81%|████████  | 415/513 [02:23<00:34,  2.88it/s]

Batches:  81%|████████  | 416/513 [02:24<00:35,  2.70it/s]

Batches:  81%|████████▏ | 417/513 [02:24<00:34,  2.77it/s]

Batches:  81%|████████▏ | 418/513 [02:24<00:34,  2.79it/s]

Batches:  82%|████████▏ | 419/513 [02:25<00:33,  2.85it/s]

Batches:  82%|████████▏ | 420/513 [02:25<00:32,  2.90it/s]

Batches:  82%|████████▏ | 421/513 [02:25<00:31,  2.93it/s]

Batches:  82%|████████▏ | 422/513 [02:26<00:30,  2.96it/s]

Batches:  82%|████████▏ | 423/513 [02:26<00:29,  3.00it/s]

Batches:  83%|████████▎ | 424/513 [02:26<00:29,  3.03it/s]

Batches:  83%|████████▎ | 425/513 [02:27<00:29,  3.01it/s]

Batches:  83%|████████▎ | 426/513 [02:27<00:29,  3.00it/s]

Batches:  83%|████████▎ | 427/513 [02:27<00:28,  2.98it/s]

Batches:  83%|████████▎ | 428/513 [02:28<00:28,  2.99it/s]

Batches:  84%|████████▎ | 429/513 [02:28<00:28,  3.00it/s]

Batches:  84%|████████▍ | 430/513 [02:28<00:28,  2.96it/s]

Batches:  84%|████████▍ | 431/513 [02:29<00:27,  2.94it/s]

Batches:  84%|████████▍ | 432/513 [02:29<00:28,  2.83it/s]

Batches:  84%|████████▍ | 433/513 [02:30<00:27,  2.88it/s]

Batches:  85%|████████▍ | 434/513 [02:30<00:27,  2.92it/s]

Batches:  85%|████████▍ | 435/513 [02:30<00:26,  2.95it/s]

Batches:  85%|████████▍ | 436/513 [02:31<00:25,  3.00it/s]

Batches:  85%|████████▌ | 437/513 [02:31<00:26,  2.90it/s]

Batches:  85%|████████▌ | 438/513 [02:31<00:26,  2.84it/s]

Batches:  86%|████████▌ | 439/513 [02:32<00:25,  2.91it/s]

Batches:  86%|████████▌ | 440/513 [02:32<00:25,  2.88it/s]

Batches:  86%|████████▌ | 441/513 [02:32<00:24,  2.91it/s]

Batches:  86%|████████▌ | 442/513 [02:33<00:24,  2.94it/s]

Batches:  86%|████████▋ | 443/513 [02:33<00:23,  2.94it/s]

Batches:  87%|████████▋ | 444/513 [02:33<00:23,  2.90it/s]

Batches:  87%|████████▋ | 445/513 [02:34<00:23,  2.90it/s]

Batches:  87%|████████▋ | 446/513 [02:34<00:23,  2.80it/s]

Batches:  87%|████████▋ | 447/513 [02:34<00:23,  2.86it/s]

Batches:  87%|████████▋ | 448/513 [02:35<00:22,  2.92it/s]

Batches:  88%|████████▊ | 449/513 [02:35<00:22,  2.91it/s]

Batches:  88%|████████▊ | 450/513 [02:35<00:21,  2.95it/s]

Batches:  88%|████████▊ | 451/513 [02:36<00:20,  2.96it/s]

Batches:  88%|████████▊ | 452/513 [02:36<00:20,  2.98it/s]

Batches:  88%|████████▊ | 453/513 [02:36<00:19,  3.02it/s]

Batches:  88%|████████▊ | 454/513 [02:37<00:19,  3.02it/s]

Batches:  89%|████████▊ | 455/513 [02:37<00:19,  2.97it/s]

Batches:  89%|████████▉ | 456/513 [02:37<00:19,  2.89it/s]

Batches:  89%|████████▉ | 457/513 [02:38<00:19,  2.89it/s]

Batches:  89%|████████▉ | 458/513 [02:38<00:18,  2.93it/s]

Batches:  89%|████████▉ | 459/513 [02:38<00:18,  2.94it/s]

Batches:  90%|████████▉ | 460/513 [02:39<00:17,  2.98it/s]

Batches:  90%|████████▉ | 461/513 [02:39<00:17,  2.99it/s]

Batches:  90%|█████████ | 462/513 [02:39<00:16,  3.02it/s]

Batches:  90%|█████████ | 463/513 [02:40<00:16,  3.01it/s]

Batches:  90%|█████████ | 464/513 [02:40<00:16,  3.02it/s]

Batches:  91%|█████████ | 465/513 [02:40<00:15,  3.07it/s]

Batches:  91%|█████████ | 466/513 [02:41<00:15,  3.07it/s]

Batches:  91%|█████████ | 467/513 [02:41<00:15,  2.93it/s]

Batches:  91%|█████████ | 468/513 [02:41<00:15,  2.95it/s]

Batches:  91%|█████████▏| 469/513 [02:42<00:14,  2.98it/s]

Batches:  92%|█████████▏| 470/513 [02:42<00:14,  2.94it/s]

Batches:  92%|█████████▏| 471/513 [02:42<00:14,  2.96it/s]

Batches:  92%|█████████▏| 472/513 [02:43<00:14,  2.93it/s]

Batches:  92%|█████████▏| 473/513 [02:43<00:13,  2.93it/s]

Batches:  92%|█████████▏| 474/513 [02:43<00:13,  2.95it/s]

Batches:  93%|█████████▎| 475/513 [02:44<00:12,  2.97it/s]

Batches:  93%|█████████▎| 476/513 [02:44<00:12,  2.98it/s]

Batches:  93%|█████████▎| 477/513 [02:44<00:12,  3.00it/s]

Batches:  93%|█████████▎| 478/513 [02:45<00:11,  2.93it/s]

Batches:  93%|█████████▎| 479/513 [02:45<00:11,  2.94it/s]

Batches:  94%|█████████▎| 480/513 [02:45<00:11,  2.97it/s]

Batches:  94%|█████████▍| 481/513 [02:46<00:10,  3.03it/s]

Batches:  94%|█████████▍| 482/513 [02:46<00:10,  2.98it/s]

Batches:  94%|█████████▍| 483/513 [02:46<00:09,  3.00it/s]

Batches:  94%|█████████▍| 484/513 [02:47<00:09,  3.04it/s]

Batches:  95%|█████████▍| 485/513 [02:47<00:09,  3.03it/s]

Batches:  95%|█████████▍| 486/513 [02:47<00:08,  3.01it/s]

Batches:  95%|█████████▍| 487/513 [02:48<00:08,  3.06it/s]

Batches:  95%|█████████▌| 488/513 [02:48<00:08,  3.04it/s]

Batches:  95%|█████████▌| 489/513 [02:48<00:07,  3.04it/s]

Batches:  96%|█████████▌| 490/513 [02:49<00:08,  2.81it/s]

Batches:  96%|█████████▌| 491/513 [02:49<00:07,  2.85it/s]

Batches:  96%|█████████▌| 492/513 [02:50<00:07,  2.78it/s]

Batches:  96%|█████████▌| 493/513 [02:50<00:07,  2.84it/s]

Batches:  96%|█████████▋| 494/513 [02:50<00:06,  2.79it/s]

Batches:  96%|█████████▋| 495/513 [02:51<00:06,  2.86it/s]

Batches:  97%|█████████▋| 496/513 [02:51<00:05,  2.90it/s]

Batches:  97%|█████████▋| 497/513 [02:51<00:05,  2.96it/s]

Batches:  97%|█████████▋| 498/513 [02:52<00:05,  2.99it/s]

Batches:  97%|█████████▋| 499/513 [02:52<00:04,  3.05it/s]

Batches:  97%|█████████▋| 500/513 [02:52<00:04,  3.07it/s]

Batches:  98%|█████████▊| 501/513 [02:53<00:03,  3.07it/s]

Batches:  98%|█████████▊| 502/513 [02:53<00:03,  2.97it/s]

Batches:  98%|█████████▊| 503/513 [02:53<00:03,  2.82it/s]

Batches:  98%|█████████▊| 504/513 [02:54<00:03,  2.86it/s]

Batches:  98%|█████████▊| 505/513 [02:54<00:02,  2.91it/s]

Batches:  99%|█████████▊| 506/513 [02:54<00:02,  2.91it/s]

Batches:  99%|█████████▉| 507/513 [02:55<00:02,  2.94it/s]

Batches:  99%|█████████▉| 508/513 [02:55<00:01,  2.93it/s]

Batches:  99%|█████████▉| 509/513 [02:55<00:01,  2.88it/s]

Batches:  99%|█████████▉| 510/513 [02:56<00:01,  2.90it/s]

Batches: 100%|█████████▉| 511/513 [02:56<00:00,  2.81it/s]

Batches: 100%|█████████▉| 512/513 [02:56<00:00,  2.82it/s]

Batches: 100%|██████████| 513/513 [02:57<00:00,  2.96it/s]

Batches: 100%|██████████| 513/513 [02:57<00:00,  2.89it/s]




Scoring test pairs (nli-deberta-v3-base, context on anchor)...


Batches:   0%|          | 0/57 [00:00<?, ?it/s]

Batches:   2%|▏         | 1/57 [00:00<00:18,  2.96it/s]

Batches:   4%|▎         | 2/57 [00:00<00:18,  2.92it/s]

Batches:   5%|▌         | 3/57 [00:01<00:18,  2.97it/s]

Batches:   7%|▋         | 4/57 [00:01<00:17,  3.00it/s]

Batches:   9%|▉         | 5/57 [00:01<00:17,  3.01it/s]

Batches:  11%|█         | 6/57 [00:02<00:16,  3.00it/s]

Batches:  12%|█▏        | 7/57 [00:02<00:17,  2.90it/s]

Batches:  14%|█▍        | 8/57 [00:02<00:17,  2.82it/s]

Batches:  16%|█▌        | 9/57 [00:03<00:16,  2.87it/s]

Batches:  18%|█▊        | 10/57 [00:03<00:16,  2.89it/s]

Batches:  19%|█▉        | 11/57 [00:03<00:16,  2.79it/s]

Batches:  21%|██        | 12/57 [00:04<00:15,  2.87it/s]

Batches:  23%|██▎       | 13/57 [00:04<00:15,  2.87it/s]

Batches:  25%|██▍       | 14/57 [00:04<00:14,  2.92it/s]

Batches:  26%|██▋       | 15/57 [00:05<00:14,  2.91it/s]

Batches:  28%|██▊       | 16/57 [00:05<00:14,  2.86it/s]

Batches:  30%|██▉       | 17/57 [00:05<00:14,  2.70it/s]

Batches:  32%|███▏      | 18/57 [00:06<00:14,  2.77it/s]

Batches:  33%|███▎      | 19/57 [00:06<00:13,  2.83it/s]

Batches:  35%|███▌      | 20/57 [00:06<00:12,  2.85it/s]

Batches:  37%|███▋      | 21/57 [00:07<00:12,  2.90it/s]

Batches:  39%|███▊      | 22/57 [00:07<00:11,  2.92it/s]

Batches:  40%|████      | 23/57 [00:08<00:11,  2.85it/s]

Batches:  42%|████▏     | 24/57 [00:08<00:11,  2.92it/s]

Batches:  44%|████▍     | 25/57 [00:08<00:10,  2.94it/s]

Batches:  46%|████▌     | 26/57 [00:08<00:10,  2.95it/s]

Batches:  47%|████▋     | 27/57 [00:09<00:10,  2.98it/s]

Batches:  49%|████▉     | 28/57 [00:09<00:09,  2.98it/s]

Batches:  51%|█████     | 29/57 [00:09<00:09,  2.99it/s]

Batches:  53%|█████▎    | 30/57 [00:10<00:08,  3.01it/s]

Batches:  54%|█████▍    | 31/57 [00:10<00:08,  2.99it/s]

Batches:  56%|█████▌    | 32/57 [00:11<00:08,  2.89it/s]

Batches:  58%|█████▊    | 33/57 [00:11<00:08,  2.93it/s]

Batches:  60%|█████▉    | 34/57 [00:11<00:07,  2.96it/s]

Batches:  61%|██████▏   | 35/57 [00:12<00:07,  2.99it/s]

Batches:  63%|██████▎   | 36/57 [00:12<00:06,  3.06it/s]

Batches:  65%|██████▍   | 37/57 [00:12<00:06,  2.97it/s]

Batches:  67%|██████▋   | 38/57 [00:13<00:06,  2.97it/s]

Batches:  68%|██████▊   | 39/57 [00:13<00:06,  2.95it/s]

Batches:  70%|███████   | 40/57 [00:13<00:05,  2.98it/s]

Batches:  72%|███████▏  | 41/57 [00:14<00:05,  2.98it/s]

Batches:  74%|███████▎  | 42/57 [00:14<00:05,  2.86it/s]

Batches:  75%|███████▌  | 43/57 [00:14<00:04,  2.81it/s]

Batches:  77%|███████▋  | 44/57 [00:15<00:04,  2.82it/s]

Batches:  79%|███████▉  | 45/57 [00:15<00:04,  2.90it/s]

Batches:  81%|████████  | 46/57 [00:15<00:03,  2.96it/s]

Batches:  82%|████████▏ | 47/57 [00:16<00:03,  2.96it/s]

Batches:  84%|████████▍ | 48/57 [00:16<00:03,  2.98it/s]

Batches:  86%|████████▌ | 49/57 [00:16<00:02,  3.02it/s]

Batches:  88%|████████▊ | 50/57 [00:17<00:02,  3.01it/s]

Batches:  89%|████████▉ | 51/57 [00:17<00:01,  3.02it/s]

Batches:  91%|█████████ | 52/57 [00:17<00:01,  2.98it/s]

Batches:  93%|█████████▎| 53/57 [00:18<00:01,  2.97it/s]

Batches:  95%|█████████▍| 54/57 [00:18<00:01,  2.99it/s]

Batches:  96%|█████████▋| 55/57 [00:18<00:00,  2.96it/s]

Batches:  98%|█████████▊| 56/57 [00:19<00:00,  2.96it/s]

Batches: 100%|██████████| 57/57 [00:19<00:00,  3.00it/s]

Batches: 100%|██████████| 57/57 [00:19<00:00,  2.93it/s]

NLI-CE Fold 0: raw=0.439924 | iso=0.461154 | lin=0.439924


NLI-CE Fold 1: raw=0.424813 | iso=0.442479 | lin=0.424813


NLI-CE Fold 2: raw=0.418987 | iso=0.434908 | lin=0.418987


NLI-CE Fold 3: raw=0.421060 | iso=0.446963 | lin=0.421060


NLI-CE Fold 4: raw=0.428902 | iso=0.445477 | lin=0.428902


NLI-CE OOF Pearson: raw= 0.426387  iso= 0.445772  lin= 0.42618
Saved NLI DeBERTa CE artifacts and overwrote CE files for stacker. Elapsed 3.3 min





In [4]:
# BM25 features (CPU, no leakage): per-fold IDF on train-only; compute bm25_ab (anchor->target) and bm25_ba (target->anchor)
# Test features are averaged over folds (fit IDF on train!=fold, score all test rows).
import time, re, math, numpy as np, pandas as pd
from collections import Counter
from tqdm.auto import tqdm

SEED = 42
np.random.seed(SEED)

def tokenize_words(s: str):
    return re.findall(r"\w+", str(s).lower())

def build_df(tokens_list):
    df = Counter()
    for toks in tokens_list:
        df.update(set(toks))  # document frequency counts presence once per doc
    return df

def bm25_idf(df_counter, N):
    # idf(t) = log((N - df + 0.5)/(df + 0.5) + 1)
    idf = {}
    for t, df in df_counter.items():
        idf[t] = math.log((N - df + 0.5)/(df + 0.5) + 1.0)
    return idf

def bm25_score(query_tokens, doc_tokens, idf, avgdl, k1=1.5, b=0.75):
    if not query_tokens or not doc_tokens:
        return 0.0
    f = Counter(doc_tokens)
    dl = len(doc_tokens)
    score = 0.0
    # Use unique terms from query to avoid overweighting repeated query terms (common practice in IR scoring)
    for term in set(query_tokens):
        if term not in idf:
            continue
        tf = f.get(term, 0)
        if tf == 0:
            continue
        denom = tf + k1 * (1.0 - b + b * (dl / (avgdl + 1e-12)))
        score += idf[term] * (tf * (k1 + 1.0)) / (denom + 1e-12)
    return float(score)

t0 = time.time()
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
folds = pd.read_csv('folds_by_id.csv')
train = train.merge(folds, on='id', how='left', validate='one_to_one')
assert (train['fold']>=0).all(), 'Fold merge by id failed'

# Pre-tokenize
A_tr_tok = [tokenize_words(x) for x in train['anchor'].astype(str).tolist()]
B_tr_tok = [tokenize_words(x) for x in train['target'].astype(str).tolist()]
A_te_tok = [tokenize_words(x) for x in test['anchor'].astype(str).tolist()]
B_te_tok = [tokenize_words(x) for x in test['target'].astype(str).tolist()]

fold_arr = train['fold'].values.astype(int)
n_tr = len(train); n_te = len(test)

bm25_ab_oof = np.zeros(n_tr, dtype=np.float32)
bm25_ba_oof = np.zeros(n_tr, dtype=np.float32)
bm25_ab_te_folds = []
bm25_ba_te_folds = []

for f in sorted(np.unique(fold_arr)):
    f0 = time.time()
    tr_idx = np.where(fold_arr != f)[0]
    va_idx = np.where(fold_arr == f)[0]
    # Fit IDF on train-only for targets (for A->B) and anchors (for B->A)
    corpus_B = [B_tr_tok[i] for i in tr_idx]
    corpus_A = [A_tr_tok[i] for i in tr_idx]
    N_B = len(corpus_B); N_A = len(corpus_A)
    df_B = build_df(corpus_B); idf_B = bm25_idf(df_B, N_B)
    df_A = build_df(corpus_A); idf_A = bm25_idf(df_A, N_A)
    avgdl_B = float(np.mean([len(t) for t in corpus_B]) if corpus_B else 0.0)
    avgdl_A = float(np.mean([len(t) for t in corpus_A]) if corpus_A else 0.0)

    # Compute OOF for this fold
    for i in va_idx:
        # bm25_ab: query = anchor, doc = target
        bm25_ab_oof[i] = bm25_score(A_tr_tok[i], B_tr_tok[i], idf_B, avgdl_B)
        # bm25_ba: query = target, doc = anchor
        bm25_ba_oof[i] = bm25_score(B_tr_tok[i], A_tr_tok[i], idf_A, avgdl_A)

    # Compute test features for this fold
    te_ab = np.zeros(n_te, dtype=np.float32)
    te_ba = np.zeros(n_te, dtype=np.float32)
    for j in range(n_te):
        te_ab[j] = bm25_score(A_te_tok[j], B_te_tok[j], idf_B, avgdl_B)
        te_ba[j] = bm25_score(B_te_tok[j], A_te_tok[j], idf_A, avgdl_A)
    bm25_ab_te_folds.append(te_ab)
    bm25_ba_te_folds.append(te_ba)
    print(f'BM25 fold {int(f)} done in {time.time()-f0:.1f}s', flush=True)

# Aggregate test across folds (mean)
bm25_ab_te = np.mean(np.vstack(bm25_ab_te_folds), axis=0).astype(np.float32)
bm25_ba_te = np.mean(np.vstack(bm25_ba_te_folds), axis=0).astype(np.float32)

# Save artifacts
pd.DataFrame({'id': train['id'], 'bm25_ab': bm25_ab_oof, 'bm25_ba': bm25_ba_oof}).to_csv('oof_bm25.csv', index=False)
pd.DataFrame({'id': test['id'], 'bm25_ab': bm25_ab_te, 'bm25_ba': bm25_ba_te}).to_csv('bm25_test.csv', index=False)
print('Saved oof_bm25.csv and bm25_test.csv; elapsed', round((time.time()-t0)/60,2), 'min')

BM25 fold 0 done in 0.1s


BM25 fold 1 done in 0.1s


BM25 fold 2 done in 0.1s


BM25 fold 3 done in 0.1s


BM25 fold 4 done in 0.1s


Saved oof_bm25.csv and bm25_test.csv; elapsed 0.01 min
