In [9]:
import os
import webdataset as wds
import logging
import numpy
import json
import torch
import numpy as np
import pandas as pd
import random
from tqdm import tqdm
from pathlib import Path
from typing import Optional
from itertools import islice
from glob import glob
from torch.utils.data import DataLoader, IterableDataset
from webdataset.filters import pipelinefilter
from webdataset.utils import pytorch_worker_info

import pickle
from traceback_with_variables import activate_in_ipython_by_import
from sklearn.preprocessing import StandardScaler

In [2]:
NSECS_IN_SEC = 1000000000


In [3]:
!ls YandexCup2024v2/YaCupTrain/0/
!ls YandexCup2024v2/YaCupTest/0/

control.csv			  localization-quant20mc.csv
control_norm_v1.csv		  localization-quant20mc_v2.csv
control_norm_v1-quant20mc.csv	  metadata.json
control_norm_v1-quant20mc_v2.csv  quant20mc
localization.csv		  speed-quant20mc_v2.csv
control.csv			  metadata.json
control_norm_v1.csv		  quant20mc
control_norm_v1-quant20mc.csv	  requested_stamps.csv
control_norm_v1-quant20mc_v2.csv  requested_stamps-quant20mc.csv
localization.csv		  requested_stamps-quant20mc_v2.csv
localization-quant20mc.csv	  speed-quant20mc_v2.csv
localization-quant20mc_v2.csv


In [4]:
!head YandexCup2024v2/YaCupTest/0/localization-quant20mc.csv

stamp_ns,x,y,z,roll,pitch,yaw
-1.0,-1482.5964825428584,-1321.9558961291534,-16.018466158558372,0.02882125843497826,-0.002113178076574012,2.2399228709997785
0.0,-1482.6526825596848,-1321.8833767041253,-16.014847819020254,0.02750229563586146,-0.0019653949516745296,2.2403362296228497
1.0,-1482.7093438252623,-1321.8115620279045,-16.01401760380355,0.02716286517762281,-0.0017751331689762919,2.2407700297655886
2.0,-1482.7661157231817,-1321.7399163884795,-16.013856131260358,0.027058379435229263,-0.0015746827117754207,2.241208732887871
3.0,-1482.8220401857484,-1321.6695184390016,-16.01243100813739,0.027364666933622554,-0.0014992336510339253,2.241686976786969
4.0,-1482.8777210708006,-1321.5994791118244,-16.010642675164604,0.027789022582724573,-0.0014597136193783956,2.2421765858528615
5.0,-1482.9334373320291,-1321.5294253602938,-16.010197848543484,0.02775735207475388,-0.0014166340809576583,2.2426516496027378
6.0,-1482.9891618164627,-1321.4593682558168,-16.010065320469447,0.027619678118336502,-0.0

In [5]:
!head -2 YandexCup2024v2/YaCupTrain/0/speed-quant20mc_v2.csv

frame_id,stamp_ns,x,y,z,roll,pitch,yaw,stamp_ms,v_x,v_y,mod_v,v_yaw,v_direct
0.0,0.0,-4292.313704696657,-14527.266319463157,66.0433143307047,0.0039258153246947,-0.0541978569971037,-1.9368095592835344,0.0,-0.08813401186853298,-0.22993993703857996,0.2462518602840369,0.0,-1.9368163247695047


In [7]:
dataset_info = []
key2feats = {'v_x': [], 'v_y': [], 'mod_v': [], 'v_yaw': []} #, 'v_direct': []}
for d in tqdm([*glob('YandexCup2024v2/YaCupTrain/*'), *glob('YandexCup2024v2/YaCupTest/*')]):
    speed_df = pd.read_csv(f'{d}/speed-quant20mc_v2.csv')
    for k in ['v_x', 'v_y', 'mod_v', 'v_yaw']:
        key2feats[k].append(speed_df[k].values)
key2feats = {k: np.concatenate(v) for k,v in key2feats.items()}

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [12:21<00:00, 67.44it/s]


In [11]:
key2feats['v_x'].shape

(134818939,)

In [12]:
key2scaler = {}
for k,v in key2feats.items():
    scaler = StandardScaler()
    scaler.fit(v.reshape(-1, 1))
    key2scaler[k] = scaler

In [13]:
with open('YandexCup2024v2/v_scaler.pkl', 'wb') as f:
    pickle.dump(key2scaler, f)
    

In [15]:
for k,scaler in key2scaler.items():
    print(k, scaler.mean_, scaler.var_)

v_x [-0.00556363] [0.01267788]
v_y [-0.00024126] [0.0129934]
mod_v [0.12581441] [0.00987303]
v_yaw [2.46346956e-06] [0.00144251]


In [17]:
dataset_info = []
for d in tqdm([*glob('YandexCup2024v2/YaCupTrain/*'), *glob('YandexCup2024v2/YaCupTest/*')]):
    speed_df = pd.read_csv(f'{d}/speed-quant20mc_v2.csv')
    for k,scaler in key2scaler.items():
        v = speed_df[k].values
        speed_df[k] = scaler.transform(v.reshape(-1, 1)).reshape(-1)
        iv = scaler.inverse_transform(speed_df[k].values.reshape(-1, 1)).reshape(-1)
        assert np.allclose(v, iv), f"{d=}, {k=}, {v=}, {iv=}"
    speed_df.to_csv(f'{d}/speed-norm-quant20mc_v2.csv', index=False)

100%|██████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [1:06:15<00:00, 12.58it/s]


In [20]:
a = torch.stack([torch.zeros(1,2), torch.ones(1,2)], axis=-1)
print(a, a.shape)


tensor([[[0., 1.],
         [0., 1.]]]) torch.Size([1, 2, 2])


In [68]:
FRAME_STEP = 20000000
def get_pred_from_score(d):
    seed_df = pd.read_csv(f'{d}/speed-quant20mc_v2.csv')
    #with open(f'{d}/quant20mc') as f:
    #    start_ns=float(f.read())
    
    start = seed_df.iloc[0]
    #stamp_ns=start.frame_id * FRAME_STEP + start_ns
    x=start.x
    y=start.y
    yaw=start.yaw
    pred = [(start.frame_id, x, y, yaw)]
    for i, row in seed_df.iloc[1:].iterrows():
        #v = row.v_proj_yaw
        #stamp_ns = row.frame_id * FRAME_STEP + start_ns
        v = row.mod_v
        v_yaw = row.v_yaw
        _, x, y, yaw = pred[-1]
        new_yaw = yaw + v_yaw
        new_x = x + v * np.cos(row.v_direct)
        new_y = y + v * np.sin(row.v_direct)
        pred.append((row.frame_id, new_x, new_y, new_yaw))
    pred = np.asarray(pred, dtype=float)
    return pred


def measure_score(df_pred, loc_df):
    df_join = df_pred.set_index('stamp_ns').join(loc_df[['stamp_ns', 'x', 'y', 'yaw']].set_index('stamp_ns'))
    df_join['mseX'] = ((df_join['pred_x'] - df_join.x)**2 + (df_join['pred_y'] - df_join.y)**2)**(1/2)
    df_join['mse_yaw'] = ((np.cos(df_join.pred_yaw) - np.cos(df_join.yaw))**2 + (np.sin(df_join.pred_yaw) - np.sin(df_join.yaw))**2)**(1/2)
    df_join['score'] = df_join.mseX+df_join.mse_yaw
    return df_join['score'].mean(), df_join


def make_prediction(pred, stamp_ns):                
    pred_loc = []
    frame_id2predid = {int(row[0]): i for i, row in enumerate(pred)}
    #print(frame_id2predid)
    for t in stamp_ns:                                          
        t = int(t)                                  
        left_pred_id = frame_id2predid[t // FRAME_STEP]
        right_pred_id = left_pred_id + 1
        delta = t % FRAME_STEP
        if right_pred_id >= pred.shape[0]:
            #print(f"Warning {right_pred_id=}, {pred.shape=}")
            bounds = np.concatenate([pred[-2:-1], pred[-1:]], axis=0)
        else:
            bounds = pred[left_pred_id:right_pred_id+1]  
        if len(bounds) != 2:
            raise RuntimeError(f"{bounds}, {t}, {left_pred_id}, {right_pred_id}, {pred.shape}")
        db = bounds[1] - bounds[0]                        
        ddot = db * delta / FRAME_STEP                     
        dot = bounds[0] + ddot                             
        pred_loc.append((t, dot[1], dot[2], dot[3]))
    df_pred = pd.DataFrame(pred_loc, columns=['stamp_ns', 'pred_x', 'pred_y', 'pred_yaw'])
    return df_pred
    

def calculate_ideal_score(d):
    pred = get_pred_from_score(d)
    loc_df = pd.read_csv(f'{d}/localization.csv') 
    with open(f'{d}/quant20mc') as f:
        start_ns=float(f.read())
    loc_df['stamp_ns'] = loc_df.stamp_ns - start_ns
    #print(start_ns)
    df_pred = make_prediction(pred, loc_df.stamp_ns)
    return measure_score(df_pred, loc_df)
score, df = calculate_ideal_score('YandexCup2024v2/YaCupTest/0')
score

np.float64(0.0004005369813479011)

In [69]:
def calculate_ideal_score_quant(d):
    #pred = get_pred_from_score(d)
    pred = pd.read_csv(f'{d}/localization-quant20mc_v2.csv')[['frame_id', 'x', 'y', 'yaw']].values
    with open(f'{d}/quant20mc') as f:
        start_ns=float(f.read())
    loc_df = pd.read_csv(f'{d}/localization.csv') 
    loc_df['stamp_ns'] = loc_df.stamp_ns - start_ns
    df_pred = make_prediction(pred, loc_df.stamp_ns)
    return measure_score(df_pred, loc_df)
score, df = calculate_ideal_score_quant('YandexCup2024v2/YaCupTest/0')
score

np.float64(0.0004005369813479011)

In [70]:
def calculate_zero_score(d):
    #pred = get_pred_from_score(d)
    loc_df = pd.read_csv(f'{d}/localization.csv') 
    with open(f'{d}/quant20mc') as f:
        start_ns=float(f.read())
    loc_df['stamp_ns'] = loc_df.stamp_ns - start_ns
    df_pred = pd.DataFrame(loc_df[['stamp_ns', 'x', 'y', 'yaw']].values, columns=['stamp_ns', 'pred_x', 'pred_y', 'pred_yaw'])
    return measure_score(df_pred, loc_df)
score, df = calculate_zero_score('YandexCup2024v2/YaCupTest/0')
score

np.float64(0.0)

In [73]:
results = []
for d in tqdm([*[*glob('YandexCup2024v2/YaCupTest/*')][::5], *[*glob('YandexCup2024v2/YaCupTrain/10*')][::5]]):
    zero_score, _ = calculate_zero_score(d)
    quant_score, _ = calculate_ideal_score_quant(d)
    full_score, df = calculate_ideal_score(d)
    #print(quant_score)
    results.append((d, zero_score, quant_score, full_score))
    #print(results[-1])


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 1823/1823 [03:10<00:00,  9.59it/s]


In [74]:
results_np = np.asarray([list(map(float, t)) for _, *t in results])
results_np.mean(axis=0)

array([0.        , 0.00112619, 0.00112619])

In [78]:
results_np.argmax(axis=0)

array([   0, 1222, 1222])

In [79]:
results[1222]

('YandexCup2024v2/YaCupTest/6110',
 np.float64(0.0),
 np.float64(0.012944903705919275),
 np.float64(0.012944903705919145))

In [35]:
[t for _, *t in results]

[[np.float64(0.0),
  np.float64(0.00016788747205601665),
  np.float64(0.00016788747205601665)],
 [np.float64(0.0),
  np.float64(0.00015092273623239412),
  np.float64(0.00015092273623239601)],
 [np.float64(0.0),
  np.float64(3.06858980594585e-05),
  np.float64(3.068589805861405e-05)],
 [np.float64(0.0),
  np.float64(2.53257656473556e-05),
  np.float64(2.5325765647477143e-05)],
 [np.float64(0.0),
  np.float64(1.3856499826860494e-05),
  np.float64(1.3856499826860494e-05)],
 [np.float64(0.0),
  np.float64(1.707229259087967e-05),
  np.float64(1.7072292590973448e-05)],
 [np.float64(0.0),
  np.float64(2.6221288186197513e-05),
  np.float64(2.6221288185875515e-05)],
 [np.float64(0.0),
  np.float64(0.00012575538555093535),
  np.float64(0.00012575538555107356)],
 [np.float64(0.0),
  np.float64(0.00011067045785737214),
  np.float64(0.00011067045785737214)],
 [np.float64(0.0),
  np.float64(0.0010413503179069596),
  np.float64(0.0010413503179069596)],
 [np.float64(0.0),
  np.float64(2.66996832493322

In [37]:
d='YandexCup2024v2/YaCupTrain/0'
pred = get_pred_from_score(d)
print(pred.shape)
loc_df = pd.read_csv(f'{d}/localization.csv') 
df_pred = make_prediction(pred, loc_df.stamp_ns)

(3162, 4)


In [38]:
pred[0]

array([ 7.44073600e+06, -4.29234649e+03, -1.45273519e+04, -1.93681851e+00])

In [39]:
df_pred.head()

Unnamed: 0,stamp_ns,pred_x,pred_y,pred_yaw
0,0,-4292.346494,-14527.351866,-1.936819
1,39989868,-4292.522209,-14527.811452,-1.936852
2,79819886,-4292.695447,-14528.268391,-1.936838
3,125154671,-4292.894371,-14528.788169,-1.93689
4,159636974,-4293.044571,-14529.183051,-1.936895


In [3]:
!head YandexCup2024v2/YaCupTrain/0/control_norm_v1-quant20mc.csv

stamp_ns,acceleration_level,steering,norm_acceleration_level,norm_steering
0.0,-114.0,-2.655139533023165,-0.4514997100029219,-0.0615109367890647
1.0,-118.51124043222295,-2.6265831542518203,-0.4812146939425406,-0.06101266412300235
2.0,-123.02273633437042,-2.5980333021733926,-0.5109313606310214,-0.060514505339470656
3.0,-127.58524197730276,-2.57078663642703,-0.5409840221863152,-0.06003908550800475
4.0,-132.14585451381703,-2.5444222994575325,-0.5710242140832847,-0.0595790611969412
5.0,-136.64990027292015,-2.5444222994575325,-0.6006918075990458,-0.0595790611969412
6.0,-141.10246811465205,-2.5434682242216784,-0.6300203224266029,-0.05956241379163819
7.0,-144.10040882917525,-2.5155545556234253,-0.6497673879005857,-0.05907535560462499
8.0,-147.08219304041114,-2.487715056006009,-0.6694080324814873,-0.05858959157245253


In [93]:
!head YandexCup2024v2/YaCupTrain/0/localization-quant20mc_v2.csv

frame_id,stamp_ns,x,y,z,roll,pitch,yaw,stamp_ms
0.0,0.0,-4292.313704696657,-14527.266319463157,66.0433143307047,0.0039258153246947,-0.0541978569971037,-1.9368095592835344,0.0
1.0,39989868.0,-4292.401838708525,-14527.496259400197,66.05667176164636,0.0038138626599912807,-0.05418474034177471,-1.9368336157619956,39.989868
2.0,79819886.0,-4292.4899720289495,-14527.726199096734,66.07002759362825,0.00370163660102829,-0.054171746403958755,-1.9368576521712249,79.81988600000001
3.0,79819886.0,-4292.576741170462,-14527.955664297288,66.08022876761915,0.003050018994415659,-0.05440086723983958,-1.9368420931110024,79.81988600000001
4.0,125154671.0,-4292.663520722658,-14528.185128509973,66.09045915339448,0.002405479543977433,-0.05462764900342842,-1.9368269431121925,125.154671
5.0,125154671.0,-4292.751445874733,-14528.414484017052,66.10390402686734,0.0025398259573082118,-0.054597037415180676,-1.9368568065524407,125.154671
6.0,125154671.0,-4292.8393710268065,-14528.64383952413,66.1173489003402,0.0026741

In [5]:
!head YandexCup2024v2/YaCupTrain/0/metadata.json

{"ride_date": "2022-03-14", "tires": {"front": 0, "rear": 0}, "vehicle_id": 0, "vehicle_model": 0, "vehicle_model_modification": 0, "location_reference_point_id": 0}

In [6]:
os.listdir('YandexCup2024v2/YaCupTest/0')

['metadata.json',
 'control.csv',
 'localization.csv',
 'requested_stamps.csv',
 'control_norm_v1.csv',
 'control_norm_v1-quant20mc.csv',
 'localization-quant20mc.csv',
 'requested_stamps-quant20mc.csv',
 'quant20mc']

In [21]:
def configure_logging(log_level='INFO'):
    handlers =  {
            "base_out": {
                "class": "logging.StreamHandler",
                "formatter": "basic",
                "stream": "ext://sys.stderr",
            }
    }
    CONFIG = {
        "version": 1,
        "disable_existing_loggers": False,
        "formatters": {"basic": {"format": '%(asctime)s %(name)s %(pathname)s:%(lineno)d - %(levelname)s - %(message)s'}},
        "handlers": handlers,
        "loggers": {"base": {"handlers": handlers.keys(), "level": log_level}},
        "root": {"handlers": handlers.keys(), "level": log_level}
    }
    logging.config.dictConfig(CONFIG)
configure_logging()


In [22]:
class DirLoader(IterableDataset):
    def __init__(self, root_dir, 
                 control_bname='control_norm_v1-quant20mc_v2.csv',
                req_bname=None,
                meta_bname='metadata.json',
                start_bname='quant20mc',
                speed_bname='speed-norm-quant20mc_v2.csv', 
                seed=None,
                pad_value= -1000000):
        super().__init__()
        self.root_dir = root_dir
        self.control_bname = control_bname
        self.req_bname = req_bname
        self.meta_bname = meta_bname
        self.start_bname = start_bname
        self.speed_bname = speed_bname
        self.seed = seed
        self.pad_value = pad_value
        self.dirlist = [*os.listdir(root_dir)]

    def __len__(self):
        return len(self.dirlist)

    def __iter__(self):
        dirlist = self.dirlist
        rank, world_size, worker, num_workers = pytorch_worker_info()
        assert world_size == 1, "Do not use this class for DDP"
        if num_workers > 1:
            full_len = len(dirlist)
            dirlist = list(islice(dirlist, worker, None, num_workers))
            logging.info(
                f"Subset for {worker} worker contains {len(dirlist)}/{full_len} annotations"
            )
            logging.debug(f"First dir is {dirlist[0]}")
        if len(dirlist) == 0:
            logging.warning(
                f"Zero len dirs list! {worker=}, {num_workers=}, {len(dirlist)=}, {len(self.dirlist)}"
            )
            return

        if self.seed is not None:
            random.Random(self.seed).shuffle(dirlist)
        for dbname in dirlist:
            d = f'{self.root_dir}/{dbname}'
            control_df = pd.read_csv(f"{d}/{self.control_bname}")
            speed_ds = pd.read_csv(f"{d}/{self.speed_bname}")
            assert (control_df.frame_id >= 0).all(), f"{control_df.frame_id=}"
            assert (speed_ds.frame_id >= 0).all(), f"{speed_ds.frame_id=}"
            df = control_df.set_index('frame_id').join(speed_ds.set_index('frame_id'), 
                                                       lsuffix='_c', 
                                                       rsuffix='_r')            
            with open(f"{d}/{self.meta_bname}") as f:
                meta = json.load(f)
            df = df.fillna(self.pad_value)
            assert np.allclose(df.index, np.arange(len(df))), f"{len(df)=}, \n{df.index=}"
            out = {
                '__key__': f"{Path(self.root_dir).stem}__{Path(d).stem}",
                'frame_ids.pth': torch.from_numpy(df.index.values),
                'control_frame_ids.pth': torch.from_numpy(control_df.frame_id.values),
                'loc_frame_ids.pth': torch.from_numpy(speed_ds.frame_id.values),
                'acceleration_level.pth': torch.from_numpy(df.norm_acceleration_level.values),
                'steering.pth' : torch.from_numpy(df.norm_steering.values),
                'x.pth': torch.from_numpy(df.x.values),
                'y.pth': torch.from_numpy(df.y.values),
                'z.pth': torch.from_numpy(df.z.values),
                'roll.pth': torch.from_numpy(df.roll.values),
                'pitch.pth': torch.from_numpy(df.pitch.values),
                'yaw.pth': torch.from_numpy(df.yaw.values),
                'v_x.pth': torch.from_numpy(df.v_x.values),
                'v_y.pth': torch.from_numpy(df.v_y.values),
                'mod_v.pth': torch.from_numpy(df.mod_v.values),
                'v_yaw.pth': torch.from_numpy(df.v_yaw.values),
                'v_direct.pth': torch.from_numpy(df.v_direct.values),
                'ride_date.txt': meta['ride_date'],
                "tires.pickle": meta['tires'],
                "vehicle_id.id": meta['vehicle_id'], 
                "vehicle_model.id": meta['vehicle_model'], 
                "vehicle_model_modification.id": meta['vehicle_model_modification'], 
                "location_reference_point_id.id": meta['location_reference_point_id']
            }
            if self.req_bname is not None:
                req_df = pd.read_csv(f"{d}/{self.req_bname}")
                out['req_stamps.pth'] = torch.from_numpy(req_df.stamp_ns.values)
            if self.start_bname is not None:
                with open(f"{d}/{self.start_bname}") as f:
                    start_ns = int(float(f.read()))
                out['start_ns.id'] = start_ns
            
            yield out

In [23]:
#!rm -rf exp_v1

In [24]:
for d in ['YandexCup2024v2/YaCupTrain/']:
    out_d = f'exp_v2.2/{d}'
    os.makedirs(out_d, exist_ok=True)
    num_e = len(os.listdir(d))
    with wds.ShardWriter(f'{out_d}/dump-%06d.tar', maxcount=400) as sink:
        for e in tqdm(
                DataLoader(
                    DirLoader(d, seed=42), 
                    batch_size=None, 
                    #num_workers=6,
                    sampler=None
                ), 
                total=num_e
        ):
            sink.write(e)
    print(f'Done {out_d}')

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000000.tar 0 0.0 GB 0


  1%|▉                                                                                             | 406/42000 [00:12<20:33, 33.73it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000001.tar 400 0.2 GB 400


  2%|█▊                                                                                            | 806/42000 [00:23<20:45, 33.06it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000002.tar 400 0.2 GB 800


  3%|██▋                                                                                          | 1206/42000 [00:35<21:09, 32.13it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000003.tar 400 0.2 GB 1200


  4%|███▌                                                                                         | 1604/42000 [00:55<37:01, 18.19it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000004.tar 400 0.2 GB 1600


  5%|████▍                                                                                        | 2005/42000 [01:08<19:56, 33.42it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000005.tar 400 0.2 GB 2000


  6%|█████▎                                                                                       | 2405/42000 [01:20<19:43, 33.45it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000006.tar 400 0.2 GB 2400


  7%|██████▏                                                                                      | 2805/42000 [01:31<19:30, 33.49it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000007.tar 400 0.2 GB 2800


  8%|███████                                                                                      | 3203/42000 [01:46<35:31, 18.21it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000008.tar 400 0.2 GB 3200


  9%|███████▉                                                                                     | 3603/42000 [02:09<36:11, 17.68it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000009.tar 400 0.2 GB 3600


 10%|████████▊                                                                                    | 4006/42000 [02:28<18:46, 33.73it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000010.tar 400 0.2 GB 4000


 10%|█████████▋                                                                                   | 4402/42000 [02:40<30:41, 20.41it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000011.tar 400 0.2 GB 4400


 11%|██████████▋                                                                                  | 4804/42000 [02:54<19:02, 32.56it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000012.tar 400 0.2 GB 4800


 12%|███████████▌                                                                                 | 5204/42000 [03:15<20:18, 30.20it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000013.tar 400 0.2 GB 5200


 13%|████████████▍                                                                                | 5604/42000 [03:27<19:13, 31.54it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000014.tar 400 0.2 GB 5600


 14%|█████████████▎                                                                               | 6004/42000 [03:40<18:11, 32.99it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000015.tar 400 0.2 GB 6000


 15%|██████████████▏                                                                              | 6404/42000 [03:52<17:57, 33.03it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000016.tar 400 0.2 GB 6400


 16%|███████████████                                                                              | 6804/42000 [04:04<17:23, 33.72it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000017.tar 400 0.2 GB 6800


 17%|███████████████▉                                                                             | 7206/42000 [04:17<17:09, 33.80it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000018.tar 400 0.2 GB 7200


 18%|████████████████▊                                                                            | 7606/42000 [04:29<17:16, 33.17it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000019.tar 400 0.2 GB 7600


 19%|█████████████████▋                                                                           | 8006/42000 [04:41<16:57, 33.41it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000020.tar 400 0.2 GB 8000


 20%|██████████████████▌                                                                          | 8406/42000 [04:53<17:52, 31.33it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000021.tar 400 0.2 GB 8400


 21%|███████████████████▍                                                                         | 8804/42000 [05:06<19:23, 28.52it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000022.tar 400 0.2 GB 8800


 22%|████████████████████▍                                                                        | 9206/42000 [05:22<16:28, 33.16it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000023.tar 400 0.2 GB 9200


 23%|█████████████████████▎                                                                       | 9606/42000 [05:35<16:50, 32.06it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000024.tar 400 0.2 GB 9600


 24%|█████████████████████▉                                                                      | 10003/42000 [05:49<29:50, 17.88it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000025.tar 400 0.2 GB 10000


 25%|██████████████████████▊                                                                     | 10403/42000 [06:10<31:03, 16.96it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000026.tar 400 0.2 GB 10400


 26%|███████████████████████▋                                                                    | 10806/42000 [06:29<15:33, 33.42it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000027.tar 400 0.2 GB 10800


 27%|████████████████████████▌                                                                   | 11206/42000 [06:42<15:50, 32.41it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000028.tar 400 0.2 GB 11200


 28%|█████████████████████████▍                                                                  | 11606/42000 [06:54<15:06, 33.51it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000029.tar 400 0.2 GB 11600


 29%|██████████████████████████▎                                                                 | 12006/42000 [07:06<15:09, 32.98it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000030.tar 400 0.2 GB 12000


 30%|███████████████████████████▏                                                                | 12406/42000 [07:18<14:33, 33.87it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000031.tar 400 0.2 GB 12400


 30%|████████████████████████████                                                                | 12806/42000 [07:30<14:18, 33.99it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000032.tar 400 0.2 GB 12800


 31%|████████████████████████████▉                                                               | 13206/42000 [07:42<14:18, 33.56it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000033.tar 400 0.2 GB 13200


 32%|█████████████████████████████▊                                                              | 13606/42000 [07:53<13:44, 34.42it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000034.tar 400 0.2 GB 13600


 33%|██████████████████████████████▋                                                             | 14007/42000 [08:09<14:13, 32.80it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000035.tar 400 0.2 GB 14000


 34%|███████████████████████████████▌                                                            | 14403/42000 [08:21<13:49, 33.27it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000036.tar 400 0.2 GB 14400


 35%|████████████████████████████████▍                                                           | 14807/42000 [08:33<13:25, 33.75it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000037.tar 400 0.2 GB 14800


 36%|█████████████████████████████████▎                                                          | 15204/42000 [08:51<24:50, 17.98it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000038.tar 400 0.2 GB 15200


 37%|██████████████████████████████████▏                                                         | 15603/42000 [09:06<13:39, 32.22it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000039.tar 400 0.2 GB 15600


 38%|███████████████████████████████████                                                         | 16007/42000 [09:18<13:05, 33.09it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000040.tar 400 0.2 GB 16000


 39%|███████████████████████████████████▉                                                        | 16407/42000 [09:30<13:00, 32.79it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000041.tar 400 0.2 GB 16400


 40%|████████████████████████████████████▊                                                       | 16807/42000 [09:42<12:38, 33.22it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000042.tar 400 0.2 GB 16800


 41%|█████████████████████████████████████▋                                                      | 17203/42000 [09:54<12:48, 32.27it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000043.tar 400 0.2 GB 17200


 42%|██████████████████████████████████████▌                                                     | 17607/42000 [10:06<12:17, 33.08it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000044.tar 400 0.2 GB 17600


 43%|███████████████████████████████████████▍                                                    | 18007/42000 [10:18<12:03, 33.15it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000045.tar 400 0.2 GB 18000


 44%|████████████████████████████████████████▎                                                   | 18407/42000 [10:30<12:25, 31.66it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000046.tar 400 0.2 GB 18400


 45%|█████████████████████████████████████████▏                                                  | 18807/42000 [10:42<11:31, 33.52it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000047.tar 400 0.2 GB 18800


 46%|██████████████████████████████████████████                                                  | 19203/42000 [11:03<21:42, 17.50it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000048.tar 400 0.2 GB 19200


 47%|██████████████████████████████████████████▉                                                 | 19607/42000 [11:16<11:42, 31.90it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000049.tar 400 0.2 GB 19600


 48%|███████████████████████████████████████████▊                                                | 20007/42000 [11:34<12:05, 30.32it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000050.tar 400 0.2 GB 20000


 49%|████████████████████████████████████████████▋                                               | 20406/42000 [11:52<11:43, 30.70it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000051.tar 400 0.2 GB 20400


 50%|█████████████████████████████████████████████▌                                              | 20806/42000 [12:04<10:23, 33.99it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000052.tar 400 0.2 GB 20800


 50%|██████████████████████████████████████████████▍                                             | 21203/42000 [12:17<18:42, 18.53it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000053.tar 400 0.2 GB 21200


 51%|███████████████████████████████████████████████▎                                            | 21607/42000 [12:30<09:43, 34.93it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000054.tar 400 0.2 GB 21600


 52%|████████████████████████████████████████████████▏                                           | 22007/42000 [12:42<09:47, 34.04it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000055.tar 400 0.2 GB 22000


 53%|█████████████████████████████████████████████████                                           | 22407/42000 [12:56<10:13, 31.94it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000056.tar 400 0.2 GB 22400


 54%|█████████████████████████████████████████████████▉                                          | 22804/42000 [13:10<17:43, 18.05it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000057.tar 400 0.2 GB 22800


 55%|██████████████████████████████████████████████████▊                                         | 23204/42000 [13:32<14:27, 21.67it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000058.tar 400 0.2 GB 23200


 56%|███████████████████████████████████████████████████▋                                        | 23604/42000 [13:52<14:42, 20.85it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000059.tar 400 0.2 GB 23600


 57%|████████████████████████████████████████████████████▌                                       | 24007/42000 [14:05<09:23, 31.94it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000060.tar 400 0.2 GB 24000


 58%|█████████████████████████████████████████████████████▍                                      | 24403/42000 [14:20<16:20, 17.94it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000061.tar 400 0.2 GB 24400


 59%|██████████████████████████████████████████████████████▎                                     | 24807/42000 [14:40<08:36, 33.30it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000062.tar 400 0.2 GB 24800


 60%|███████████████████████████████████████████████████████▏                                    | 25204/42000 [14:53<12:51, 21.76it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000063.tar 400 0.2 GB 25200


 61%|████████████████████████████████████████████████████████                                    | 25606/42000 [15:13<08:57, 30.51it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000064.tar 400 0.2 GB 25600


 62%|████████████████████████████████████████████████████████▉                                   | 26006/42000 [15:25<07:51, 33.95it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000065.tar 400 0.2 GB 26000


 63%|█████████████████████████████████████████████████████████▊                                  | 26406/42000 [15:37<07:59, 32.51it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000066.tar 400 0.2 GB 26400


 64%|██████████████████████████████████████████████████████████▋                                 | 26806/42000 [15:50<07:30, 33.71it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000067.tar 400 0.2 GB 26800


 65%|███████████████████████████████████████████████████████████▌                                | 27206/42000 [16:02<07:20, 33.60it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000068.tar 400 0.2 GB 27200


 66%|████████████████████████████████████████████████████████████▍                               | 27605/42000 [16:14<07:27, 32.19it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000069.tar 400 0.2 GB 27600


 67%|█████████████████████████████████████████████████████████████▎                              | 28003/42000 [16:28<11:18, 20.64it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000070.tar 400 0.2 GB 28000


 68%|██████████████████████████████████████████████████████████████▏                             | 28404/42000 [16:47<07:27, 30.36it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000071.tar 400 0.2 GB 28400


 69%|███████████████████████████████████████████████████████████████                             | 28807/42000 [16:59<06:41, 32.84it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000072.tar 400 0.2 GB 28800


 70%|███████████████████████████████████████████████████████████████▉                            | 29207/42000 [17:12<06:43, 31.70it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000073.tar 400 0.2 GB 29200


 70%|████████████████████████████████████████████████████████████████▊                           | 29607/42000 [17:30<06:33, 31.51it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000074.tar 400 0.2 GB 29600


 71%|█████████████████████████████████████████████████████████████████▋                          | 30006/42000 [17:42<06:05, 32.79it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000075.tar 400 0.2 GB 30000


 72%|██████████████████████████████████████████████████████████████████▌                         | 30406/42000 [17:54<05:42, 33.86it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000076.tar 400 0.2 GB 30400


 73%|███████████████████████████████████████████████████████████████████▍                        | 30806/42000 [18:06<05:28, 34.04it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000077.tar 400 0.2 GB 30800


 74%|████████████████████████████████████████████████████████████████████▎                       | 31206/42000 [18:18<05:16, 34.08it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000078.tar 400 0.2 GB 31200


 75%|█████████████████████████████████████████████████████████████████████▏                      | 31606/42000 [18:31<05:49, 29.77it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000079.tar 400 0.2 GB 31600


 76%|██████████████████████████████████████████████████████████████████████                      | 32003/42000 [18:50<08:00, 20.82it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000080.tar 400 0.2 GB 32000


 77%|██████████████████████████████████████████████████████████████████████▉                     | 32404/42000 [19:02<04:52, 32.83it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000081.tar 400 0.2 GB 32400


 78%|███████████████████████████████████████████████████████████████████████▊                    | 32804/42000 [19:16<08:46, 17.45it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000082.tar 400 0.2 GB 32800


 79%|████████████████████████████████████████████████████████████████████████▋                   | 33204/42000 [19:33<08:24, 17.42it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000083.tar 400 0.2 GB 33200


 80%|█████████████████████████████████████████████████████████████████████████▌                  | 33607/42000 [19:55<04:54, 28.50it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000084.tar 400 0.2 GB 33600


 81%|██████████████████████████████████████████████████████████████████████████▍                 | 34006/42000 [20:07<03:57, 33.62it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000085.tar 400 0.2 GB 34000


 82%|███████████████████████████████████████████████████████████████████████████▎                | 34406/42000 [20:19<03:45, 33.72it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000086.tar 400 0.2 GB 34400


 83%|████████████████████████████████████████████████████████████████████████████▏               | 34806/42000 [20:31<03:41, 32.45it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000087.tar 400 0.2 GB 34800


 84%|█████████████████████████████████████████████████████████████████████████████               | 35206/42000 [20:44<03:31, 32.19it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000088.tar 400 0.2 GB 35200


 85%|█████████████████████████████████████████████████████████████████████████████▉              | 35604/42000 [21:05<06:00, 17.76it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000089.tar 400 0.2 GB 35600


 86%|██████████████████████████████████████████████████████████████████████████████▊             | 36004/42000 [21:28<05:43, 17.46it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000090.tar 400 0.2 GB 36000


 87%|███████████████████████████████████████████████████████████████████████████████▋            | 36407/42000 [21:44<02:55, 31.84it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000091.tar 400 0.2 GB 36400


 88%|████████████████████████████████████████████████████████████████████████████████▌           | 36807/42000 [21:57<02:38, 32.77it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000092.tar 400 0.2 GB 36800


 89%|█████████████████████████████████████████████████████████████████████████████████▍          | 37203/42000 [22:09<02:38, 30.31it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000093.tar 400 0.2 GB 37200


 90%|██████████████████████████████████████████████████████████████████████████████████▎         | 37602/42000 [22:30<04:28, 16.37it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000094.tar 400 0.2 GB 37600


 90%|███████████████████████████████████████████████████████████████████████████████████▎        | 38006/42000 [22:46<02:05, 31.75it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000095.tar 400 0.2 GB 38000


 91%|████████████████████████████████████████████████████████████████████████████████████▏       | 38406/42000 [22:58<01:51, 32.38it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000096.tar 400 0.2 GB 38400


 92%|█████████████████████████████████████████████████████████████████████████████████████       | 38806/42000 [23:11<01:42, 31.29it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000097.tar 400 0.2 GB 38800


 93%|█████████████████████████████████████████████████████████████████████████████████████▉      | 39206/42000 [23:23<01:26, 32.38it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000098.tar 400 0.2 GB 39200


 94%|██████████████████████████████████████████████████████████████████████████████████████▊     | 39605/42000 [23:35<01:14, 32.34it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000099.tar 400 0.2 GB 39600


 95%|███████████████████████████████████████████████████████████████████████████████████████▋    | 40005/42000 [23:48<01:02, 31.99it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000100.tar 400 0.2 GB 40000


 96%|████████████████████████████████████████████████████████████████████████████████████████▌   | 40403/42000 [24:04<01:32, 17.25it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000101.tar 400 0.2 GB 40400


 97%|█████████████████████████████████████████████████████████████████████████████████████████▍  | 40804/42000 [24:18<00:43, 27.44it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000102.tar 400 0.2 GB 40800


 98%|██████████████████████████████████████████████████████████████████████████████████████████▎ | 41203/42000 [24:36<00:38, 20.71it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000103.tar 400 0.2 GB 41200


 99%|███████████████████████████████████████████████████████████████████████████████████████████▏| 41604/42000 [24:51<00:12, 31.27it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTrain//dump-000104.tar 400 0.2 GB 41600


100%|████████████████████████████████████████████████████████████████████████████████████████████| 42000/42000 [25:03<00:00, 27.93it/s]

Done exp_v2.2/YandexCup2024v2/YaCupTrain/





In [25]:
for d in ['YandexCup2024v2/YaCupTest/']:
    out_d = f'exp_v2.2/{d}'
    os.makedirs(out_d, exist_ok=True)
    max_elements_per_shard=50
    num_e = len(os.listdir(d))
    with wds.ShardWriter(f'{out_d}/dump-%06d.tar', maxcount=400) as sink:
        for e in tqdm(
                DataLoader(
                    DirLoader(d, req_bname='requested_stamps-quant20mc_v2.csv', seed=None), 
                    batch_size=None, 
                    num_workers=6,
                    sampler=None
                
                ), 
                total=num_e
        ):
            sink.write(e)
    print(f'Done {out_d}')

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000000.tar 0 0.0 GB 0


  0%|                                                                                                         | 0/8000 [00:00<?, ?it/s]2024-11-01 22:23:49,883 root /tmp/ipykernel_3026947/3204215380.py:31 - INFO - Subset for 3 worker contains 1333/8000 annotations
2024-11-01 22:23:49,882 root /tmp/ipykernel_3026947/3204215380.py:31 - INFO - Subset for 0 worker contains 1334/8000 annotations
2024-11-01 22:23:49,883 root /tmp/ipykernel_3026947/3204215380.py:31 - INFO - Subset for 2 worker contains 1333/8000 annotations
2024-11-01 22:23:49,883 root /tmp/ipykernel_3026947/3204215380.py:31 - INFO - Subset for 1 worker contains 1334/8000 annotations
2024-11-01 22:23:49,885 root /tmp/ipykernel_3026947/3204215380.py:31 - INFO - Subset for 4 worker contains 1333/8000 annotations
2024-11-01 22:23:49,923 root /tmp/ipykernel_3026947/3204215380.py:31 - INFO - Subset for 5 worker contains 1333/8000 annotations
  5%|████▊                                                                                 

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000001.tar 400 0.1 GB 400


 10%|█████████▌                                                                                     | 809/8000 [00:17<02:12, 54.45it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000002.tar 400 0.1 GB 800


 15%|██████████████▏                                                                               | 1208/8000 [00:23<02:24, 46.94it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000003.tar 400 0.1 GB 1200


 20%|██████████████████▉                                                                           | 1612/8000 [00:28<01:22, 77.33it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000004.tar 400 0.1 GB 1600


 25%|███████████████████████▌                                                                      | 2006/8000 [00:36<02:10, 46.05it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000005.tar 400 0.1 GB 2000


 30%|████████████████████████████▏                                                                 | 2403/8000 [00:45<02:53, 32.22it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000006.tar 400 0.1 GB 2400


 35%|████████████████████████████████▉                                                             | 2808/8000 [00:55<01:20, 64.82it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000007.tar 400 0.1 GB 2800


 40%|█████████████████████████████████████▋                                                        | 3210/8000 [01:02<01:08, 70.25it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000008.tar 400 0.1 GB 3200


 45%|██████████████████████████████████████████▍                                                   | 3608/8000 [01:11<01:03, 69.70it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000009.tar 400 0.1 GB 3600


 50%|███████████████████████████████████████████████▏                                              | 4017/8000 [01:16<00:48, 81.82it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000010.tar 400 0.1 GB 4000


 55%|███████████████████████████████████████████████████▊                                          | 4414/8000 [01:21<00:44, 81.44it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000011.tar 400 0.1 GB 4400


 60%|████████████████████████████████████████████████████████▌                                     | 4814/8000 [01:27<00:41, 76.38it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000012.tar 400 0.1 GB 4800


 65%|█████████████████████████████████████████████████████████████▏                                | 5205/8000 [01:34<00:51, 54.43it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000013.tar 400 0.1 GB 5200


 70%|█████████████████████████████████████████████████████████████████▉                            | 5611/8000 [01:40<00:38, 61.57it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000014.tar 400 0.1 GB 5600


 75%|██████████████████████████████████████████████████████████████████████▌                       | 6006/8000 [01:46<00:29, 67.55it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000015.tar 400 0.1 GB 6000


 80%|███████████████████████████████████████████████████████████████████████████▎                  | 6407/8000 [01:53<00:34, 46.29it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000016.tar 400 0.1 GB 6400


 85%|███████████████████████████████████████████████████████████████████████████████▉              | 6808/8000 [01:58<00:16, 73.82it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000017.tar 400 0.1 GB 6800


 90%|████████████████████████████████████████████████████████████████████████████████████▋         | 7207/8000 [02:05<00:12, 65.57it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000018.tar 400 0.1 GB 7200


 95%|█████████████████████████████████████████████████████████████████████████████████████████▍    | 7616/8000 [02:11<00:04, 78.73it/s]

# writing exp_v2.2/YandexCup2024v2/YaCupTest//dump-000019.tar 400 0.1 GB 7600


100%|██████████████████████████████████████████████████████████████████████████████████████████████| 8000/8000 [02:19<00:00, 57.23it/s]

Done exp_v2.2/YandexCup2024v2/YaCupTest/





In [65]:
df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
                   'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})
other = pd.DataFrame({'key': ['K0', 'K1', 'K2'],
                      'B': ['B0', 'B1', 'B2']})
df.set_index('key').join(other.set_index('key'))

Unnamed: 0_level_0,A,B
key,Unnamed: 1_level_1,Unnamed: 2_level_1
K0,A0,B0
K1,A1,B1
K2,A2,B2
K3,A3,
K4,A4,
K5,A5,


In [66]:
other.set_index('key').join(df.set_index('key'))

Unnamed: 0_level_0,B,A
key,Unnamed: 1_level_1,Unnamed: 2_level_1
K0,B0,A0
K1,B1,A1
K2,B2,A2


In [77]:
a = torch.nn.functional.pad(torch.arange(30).view(2,3,5), (0,0, 1, 0), value=-1000)
print(a.shape)
a

torch.Size([2, 4, 5])


tensor([[[-1000, -1000, -1000, -1000, -1000],
         [    0,     1,     2,     3,     4],
         [    5,     6,     7,     8,     9],
         [   10,    11,    12,    13,    14]],

        [[-1000, -1000, -1000, -1000, -1000],
         [   15,    16,    17,    18,    19],
         [   20,    21,    22,    23,    24],
         [   25,    26,    27,    28,    29]]])

In [122]:
data = torch.load('tmp/YaCupTrain__11687.pth')

  data = torch.load('tmp/YaCupTrain__11687.pth')


In [123]:
data.keys()

dict_keys(['batch', 'attention_mask', 'mask', 'inputs_embeds', 'labels_w_mask', 'outputs', 'logits_w_mask', 'loss'])

In [124]:
data['outputs']['loc.pth'][0]

tensor([[ 2.2097e+04, -6.5113e+04,  3.4293e+01,  1.0708e-01, -1.7448e-01,
         -2.8186e+00],
        [ 2.2097e+04, -6.5113e+04,  3.4290e+01, -2.9861e-02,  4.5324e-02,
         -2.7439e+00],
        [ 2.2096e+04, -6.5111e+04,  3.4629e+01,  8.5670e-02, -2.4974e-02,
         -2.5952e+00],
        ...,
        [ 2.2154e+04, -6.5281e+04,  3.1793e+01, -1.6713e+00,  2.4100e+00,
         -4.7205e+00],
        [ 2.2163e+04, -6.5283e+04,  3.6307e+01, -2.5581e+00, -1.8996e+00,
         -4.8193e+00],
        [ 2.2160e+04, -6.5283e+04,  3.5243e+01, -4.0093e+00, -2.4420e-01,
         -1.3853e+00]], device='cuda:0', grad_fn=<SelectBackward0>)

In [125]:
data['outputs']['logits.pth'][0]

tensor([[-7.4201e-01, -2.1288e-01,  2.4525e-01,  9.6820e-02, -1.6709e-01,
          4.4357e-02],
        [-1.0309e+00, -1.6344e-02,  2.4240e-01, -4.0124e-02,  5.2717e-02,
          1.1906e-01],
        [-1.5823e+00,  1.2993e+00,  5.8091e-01,  7.5407e-02, -1.7581e-02,
          2.6778e-01],
        ...,
        [ 5.5628e+01, -1.6816e+02, -2.2544e+00, -1.6816e+00,  2.4174e+00,
         -1.8575e+00],
        [ 6.5307e+01, -1.7079e+02,  2.2589e+00, -2.5684e+00, -1.8922e+00,
         -1.9563e+00],
        [ 6.2129e+01, -1.7051e+02,  1.1957e+00, -4.0196e+00, -2.3681e-01,
          1.4776e+00]], device='cuda:0', grad_fn=<SelectBackward0>)

In [126]:
data['batch'].keys()

dict_keys(['__key__', 'loc.pth', 'control_feats.pth'])

In [127]:
data['labels_w_mask'][0]

tensor([-0.1309, -0.0352,  0.0010,  0.0003, -0.0003,  0.0027], device='cuda:0')

In [128]:
data['logits_w_mask'][0]

tensor([-0.7420, -0.2129,  0.2453,  0.0968, -0.1671,  0.0444], device='cuda:0',
       grad_fn=<SelectBackward0>)

In [129]:
data['batch']['loc.pth'][0]

tensor([[ 2.2098e+04, -6.5113e+04,  3.4048e+01,  1.0263e-02, -7.3928e-03,
         -2.8629e+00],
        [ 2.2098e+04, -6.5113e+04,  3.4049e+01,  1.0582e-02, -7.7387e-03,
         -2.8603e+00],
        [ 2.2098e+04, -6.5113e+04,  3.4051e+01,  1.0162e-02, -7.9580e-03,
         -2.8575e+00],
        ...,
        [ 2.2183e+04, -6.5354e+04,  3.5068e+01,  1.4019e-02,  2.0624e-03,
         -7.9009e-01],
        [ 2.2183e+04, -6.5354e+04,  3.5063e+01,  1.6044e-02,  1.6661e-03,
         -7.8786e-01],
        [-1.0000e+06, -1.0000e+06, -1.0000e+06, -1.0000e+06, -1.0000e+06,
         -1.0000e+06]], device='cuda:0')

In [113]:
data['inputs_embeds'][0]

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -8.1063e-02,
          1.0000e+00,  2.0000e+00],
        [-1.5625e-01, -4.8340e-02,  4.4851e-03,  ..., -8.2834e-02,
          1.0000e+00,  2.0000e+00],
        [-3.1152e-01, -9.6191e-02,  8.5697e-03,  ..., -8.4605e-02,
          1.0000e+00,  2.0000e+00],
        ...,
        [ 2.3125e+02, -7.4085e+02,  1.0253e+01,  ..., -1.7573e-02,
          1.0000e+00,  2.0000e+00],
        [ 2.3135e+02, -7.4114e+02,  1.0262e+01,  ..., -1.7573e-02,
          1.0000e+00,  2.0000e+00],
        [ 2.3144e+02, -7.4142e+02,  1.0270e+01,  ..., -1.7573e-02,
          1.0000e+00,  2.0000e+00]], device='cuda:0')