In [25]:
import numpy as np
import polars as pl
from tqdm import tqdm
from polimi.utils._polars import reduce_polars_df_memory_size

In [26]:
from pathlib import Path


dpath = Path('../../dataset')
emb_dir = dpath
dtype = 'small'
articles = pl.read_parquet(f'{dpath}/ebnerd_{dtype}/articles.parquet')

behaviors_train = pl.read_parquet(f'{dpath}/ebnerd_{dtype}/train/behaviors.parquet')
history_train = pl.read_parquet(f'{dpath}/ebnerd_{dtype}/train/history.parquet')

# Test

In [3]:
embeddings = pl.read_parquet(emb_dir / 'Ekstra_Bladet_image_embeddings' / 'image_embeddings.parquet').sort('article_id')
embeddings.columns = ['article_id', 'embedding']
emb_size = len(embeddings['embedding'][0])
missing_articles_in_embedding = list(set(articles['article_id'].to_numpy()) - set(embeddings['article_id'].to_numpy()))
null_vector = np.zeros(emb_size, dtype=np.float32)
embeddings = embeddings.vstack(pl.DataFrame({'article_id': missing_articles_in_embedding, 'embedding': [null_vector] * len(missing_articles_in_embedding)}))
embeddings = embeddings.with_row_index()
embeddings.head(2)

index,article_id,embedding
u32,i32,list[f32]
0,3000022,"[-0.033208, -0.013787, … -0.036042]"
1,3000063,"[-0.047797, -0.025657, … 0.018883]"


In [4]:
all_zero_embeddings = embeddings.with_columns(pl.col('embedding').list.eval(pl.element() == 0.0).list.all().alias('check'))
are_all_zero_embeddings_present = len(all_zero_embeddings.filter(pl.col('check') == True)) > 0
are_all_zero_embeddings_present

True

In [5]:
m_non_norm = np.array([np.array(x) for x in embeddings['embedding'].to_numpy()])
row_norms = np.linalg.norm(m_non_norm, axis=1, keepdims=True)
m = m_non_norm / (row_norms + 1e-6)
m.shape

(106346, 1024)

In [6]:
article_emb_mapping = embeddings.select('index', 'article_id')
article_emb_mapping.head(1)

index,article_id
u32,i32
0,3000022


In [7]:
history_m = history_train.select('user_id', pl.col('article_id_fixed').list.eval(pl.element().replace(article_emb_mapping['article_id'], article_emb_mapping['index'], default=None).drop_nulls())).with_row_index('user_index')
user_history_map = history_m.select('user_id', 'user_index')
history_m = history_m['article_id_fixed'].to_numpy()
history_m.shape

(15143,)

In [8]:
df = behaviors_train.select('impression_id', 'user_id', pl.col('article_ids_inview').alias('article'))\
    .join(user_history_map, on='user_id')\
    .with_columns(
        pl.col('article').list.eval(pl.element().replace(article_emb_mapping['article_id'], article_emb_mapping['index'], default=None)).name.suffix('_index'),
    ).drop('impression_time_fixed', 'scroll_percentage_fixed', 'read_time_fixed')

df = reduce_polars_df_memory_size(df)
df.head(2)

Memory usage of dataframe is 25.95 MB
Memory usage after optimization is: 25.95 MB
Decreased by 0.0%


impression_id,user_id,article,user_index,article_index
u32,u32,list[i32],u32,list[u32]
149474,139836,"[9778623, 9778682, … 9778728]",11894,"[100868, 100874, … 100879]"
150528,143471,"[9778718, 9778728, … 9778682]",7016,"[105241, 100879, … 100874]"


In [9]:
scores_df = pl.concat([
    slice.explode(['article_index', 'article']).with_columns(scores = np.dot(
        m[slice['article_index'].explode().to_numpy()], 
        m[history_m[key[0]]].T))\
    .group_by(['impression_id', 'user_id', 'user_index'])\
    .agg(pl.all())
    for key, slice in tqdm(df[:1000].partition_by(by=['user_index'], as_dict=True).items(), total=df['user_index'].n_unique())
]).drop('article_index')
scores_df

  3%|▎         | 466/15143 [00:01<00:43, 335.16it/s]


impression_id,user_id,user_index,article,scores
u32,u32,u32,list[i32],list[list[f32]]
149474,139836,11894,"[9778623, 9778682, … 9778728]","[[0.546279, 0.456443, … 0.068054], [0.108021, -0.134935, … 0.308029], … [0.789861, 0.399746, … -0.11546]]"
150528,143471,7016,"[9778718, 9778728, … 9778682]","[[0.0, 0.0, … 0.0], [0.766936, 0.0, … 0.0], … [0.028365, 0.0, … 0.0]]"
153070,151570,7074,"[9020783, 9778444, … 9778628]","[[0.552781, 0.0, … 0.519307], [0.0, 0.0, … 0.0], … [-0.077273, 0.0, … 0.123942]]"
153078,151570,7074,"[9778021, 9778627, … 7213923]","[[0.427206, 0.0, … 0.154384], [0.239121, 0.0, … 0.156372], … [-0.068958, 0.0, … -0.38356]]"
153075,151570,7074,"[9778500, 9776420, … 9020783]","[[0.290248, 0.0, … -0.126946], [-0.170831, 0.0, … 0.305366], … [0.552781, 0.0, … 0.519307]]"
…,…,…,…,…
2433248,1606050,14460,"[9552181, 9779263, … 9547869]","[[0.0, 0.0, … 0.0], [0.37884, 0.271457, … -0.0275], … [0.62122, 0.070636, … 0.213934]]"
2433256,1606050,14460,"[9483850, 9779648, … 9779777]","[[0.337382, -0.065383, … 0.307274], [0.0, 0.0, … 0.0], … [0.0, 0.0, … 0.0]]"
2435848,1692081,10750,"[9779263, 9779205, … 9779577]","[[-0.135781, 0.201273, … 0.452139], [0.197592, 0.566438, … 0.604189], … [0.629099, 0.220339, … 0.12199]]"
2435885,1695195,10254,"[9658252, 9569934, … 9775885]","[[0.59537, 0.322892, … 0.576517], [-0.082606, -0.137399, … -0.313506], … [0.004153, 0.27604, … 0.32076]]"


In [10]:
simple_agg_df = scores_df.with_columns(
    pl.col('scores').list.eval(pl.element().list.mean()).name.suffix('_mean'),
    pl.col('scores').list.eval(pl.element().list.max()).name.suffix('_max'),
    pl.col('scores').list.eval(pl.element().list.max()).name.suffix('_min'),
    pl.col('scores').list.eval(pl.element().list.std()).name.suffix('_std'),
)
simple_agg_df.head(2)

impression_id,user_id,user_index,article,scores,scores_mean,scores_max,scores_min,scores_std
u32,u32,u32,list[i32],list[list[f32]],list[f32],list[f32],list[f32],list[f32]
149474,139836,11894,"[9778623, 9778682, … 9778728]","[[0.546279, 0.456443, … 0.068054], [0.108021, -0.134935, … 0.308029], … [0.789861, 0.399746, … -0.11546]]","[0.108393, 0.125424, … 0.097202]","[0.619155, 0.729108, … 0.789861]","[0.619155, 0.729108, … 0.789861]","[0.241325, 0.239725, … 0.221128]"
150528,143471,7016,"[9778718, 9778728, … 9778682]","[[0.0, 0.0, … 0.0], [0.766936, 0.0, … 0.0], … [0.028365, 0.0, … 0.0]]","[0.0, 0.170874, … 0.119192]","[0.0, 0.880629, … 0.856975]","[0.0, 0.880629, … 0.856975]","[0.0, 0.240824, … 0.22315]"


In [11]:
explode_cols = ['article'] + [col for col in simple_agg_df.columns if col.startswith('scores_')]
res = simple_agg_df.drop('user_index', 'scores')\
    .explode(explode_cols)\
    .sort('user_id', 'impression_id', 'article')
res.head(2)

impression_id,user_id,article,scores_mean,scores_max,scores_min,scores_std
u32,u32,i32,f32,f32,f32,f32
2097252,63123,9761926,0.194606,0.911962,0.911962,0.247309
2097252,63123,9769370,0.0,0.0,0.0,0.0


# Weightening

### Scroll Percentage weight

In [12]:
history_w = history_train.select('user_id', 'scroll_percentage_fixed').with_columns(
    pl.col('scroll_percentage_fixed').list.eval(pl.element().fill_null(0.0))\
        .list.eval(pl.element().sqrt()).alias('scroll_percentage_fixed_norm'),
    pl.col('scroll_percentage_fixed').list.eval(pl.element().fill_null(0.0))\
        .list.eval((pl.element() - pl.element().min()).truediv(pl.element().max() - pl.element().min())).alias('scroll_percentage_fixed_mmnorm')
    ).with_columns(
        pl.col('scroll_percentage_fixed_norm').list.eval(pl.element().truediv(pl.element().sum())).alias('scroll_percentage_fixed_norm_l1_w'),
        pl.col('scroll_percentage_fixed_mmnorm').list.eval(pl.element().truediv(pl.element().sum())).alias('scroll_percentage_fixed_mmnorm_l1_w'),
    )
history_w.head(2)

user_id,scroll_percentage_fixed,scroll_percentage_fixed_norm,scroll_percentage_fixed_mmnorm,scroll_percentage_fixed_norm_l1_w,scroll_percentage_fixed_mmnorm_l1_w
u32,list[f32],list[f32],list[f32],list[f32],list[f32]
13538,"[100.0, 35.0, … 100.0]","[10.0, 5.91608, … 10.0]","[1.0, 0.35, … 1.0]","[0.003138, 0.001856, … 0.003138]","[0.004735, 0.001657, … 0.004735]"
14241,"[100.0, 46.0, … 100.0]","[10.0, 6.78233, … 10.0]","[1.0, 0.46, … 1.0]","[0.007106, 0.00482, … 0.007106]","[0.007959, 0.003661, … 0.007959]"


### Read time weight

In [13]:
history_w_articles = history_train.explode(pl.all().exclude('user_id')).join(
    articles.select('article_id', 
        (pl.col('body') + pl.col('title') + pl.col('subtitle')).str.len_chars().alias('article_id_fixed_article_len'),
        'last_modified_time', 'published_time'), left_on='article_id_fixed', right_on='article_id'
    )\
    .with_columns(
        (pl.col('impression_time_fixed') - pl.col('published_time')).alias('time_to_impression'),
    ).group_by('user_id').agg(pl.all())

In [14]:
history_w = history_w_articles.select('user_id', 'read_time_fixed', 'article_id_fixed_article_len')\
    .explode(pl.all().exclude('user_id'))\
    .with_columns(
        pl.col('read_time_fixed').truediv('article_id_fixed_article_len').fill_nan(0.0).alias('read_time_fixed_article_len_ratio'),
    ).with_columns(
        pl.when(pl.col('read_time_fixed_article_len_ratio').is_infinite()).then(0.0).otherwise(pl.col('read_time_fixed_article_len_ratio')).alias('read_time_fixed_article_len_ratio')
    ).group_by('user_id').agg(pl.all())\
    .with_columns(
        pl.col('read_time_fixed_article_len_ratio').list.eval(pl.element().truediv(pl.element().sum())).alias('read_time_fixed_article_len_ratio_l1_w'),
    )
history_w.head(2)

user_id,read_time_fixed,article_id_fixed_article_len,read_time_fixed_article_len_ratio,read_time_fixed_article_len_ratio_l1_w
u32,list[f32],list[u32],list[f64],list[f64]
1532472,"[41.0, 34.0, … 85.0]","[2385, 1692, … 2626]","[0.017191, 0.020095, … 0.032369]","[0.002094, 0.002447, … 0.003942]"
219986,"[25.0, 0.0, … 1077.0]","[1846, 1846, … 2672]","[0.013543, 0.0, … 0.403069]","[0.004822, 0.0, … 0.143524]"


### Impression time

In [15]:
history_w = history_w_articles.select('user_id', 'time_to_impression')\
    .explode(pl.all().exclude('user_id'))\
    .with_columns(
        pl.col('time_to_impression').dt.total_minutes().sqrt().alias('time_to_impression_sqrt'),
    ).group_by('user_id').agg(pl.all())\
    .with_columns(
        pl.col('time_to_impression_sqrt').list.eval(pl.element().truediv(pl.element().sum())).alias('time_to_impression_l1_w')
    )
history_w.head(2)

user_id,time_to_impression,time_to_impression_sqrt,time_to_impression_l1_w
u32,list[duration[μs]],list[f64],list[f64]
2155094,"[14m 34s, 10m 53s, … 3h 20m 4s]","[3.741657, 3.162278, … 14.142136]","[0.002097, 0.001772, … 0.007926]"
1324866,"[9m 44s, 10h 5m 59s, … 1h 6m 56s]","[3.0, 24.596748, … 8.124038]","[0.001583, 0.012977, … 0.004286]"


### Last k

In [16]:
history_len = history_w_articles['read_time_fixed'].list.len().to_list()
history_w = history_w_articles.select('user_id').with_columns(
    *[pl.Series([[1] * min(k, l) + [0] * max(0, l - k) for l in history_len], dtype=pl.List(pl.Int8)).alias(f'mask_w_{k}') for k in [5, 10, 15]]
)
history_w.head(2)

user_id,mask_w_5,mask_w_10,mask_w_15
u32,list[i8],list[i8],list[i8]
2424634,"[1, 1, … 0]","[1, 1, … 0]","[1, 1, … 0]"
1495533,"[1, 1, … 0]","[1, 1, … 0]","[1, 1, … 0]"


### Last k hours

In [17]:
behaviors_w = behaviors_train.select('impression_id', 'user_id', 'impression_time')\
    .join(history_w_articles.select('user_id', 'impression_time_fixed'), on='user_id')\
    .explode(pl.all().exclude('impression_id', 'user_id', 'impression_time'))\
    .with_columns(
        *[(pl.col('impression_time').sub(pl.col('impression_time_fixed')).dt.total_hours() <= k).cast(pl.Int8).alias(f'impression_time_last_{k}_hours_mask') for k in [24, 24*2, 24*3, 24*7, 24*14]]
    ).group_by('impression_id', 'user_id', 'impression_time').agg(pl.all())
behaviors_w.head(2)

impression_id,user_id,impression_time,impression_time_fixed,impression_time_last_24_hours_mask,impression_time_last_48_hours_mask,impression_time_last_72_hours_mask,impression_time_last_168_hours_mask,impression_time_last_336_hours_mask
u32,u32,datetime[μs],list[datetime[μs]],list[i8],list[i8],list[i8],list[i8],list[i8]
168444639,565751,2023-05-23 20:11:26,"[2023-04-27 15:13:10, 2023-04-28 11:01:07, … 2023-05-17 20:12:57]","[0, 0, … 0]","[0, 0, … 0]","[0, 0, … 0]","[0, 0, … 1]","[0, 0, … 1]"
223152368,2273080,2023-05-23 07:29:32,"[2023-05-05 06:10:16, 2023-05-05 06:10:55, … 2023-05-18 03:35:06]","[0, 0, … 0]","[0, 0, … 0]","[0, 0, … 0]","[0, 0, … 1]","[0, 0, … 1]"


In [18]:
history_w = history_w_articles.select('user_id', 'time_to_impression')\
    .explode(pl.all().exclude('user_id'))\
    .with_columns(
        pl.col('time_to_impression').dt.total_minutes().sqrt().alias('time_to_impression_sqrt'),
    ).group_by('user_id').agg(pl.all())\
    .with_columns(
        pl.col('time_to_impression_sqrt').list.eval(pl.element().truediv(pl.element().sum())).alias('time_to_impression_l1_w')
    )
history_w.head(2)

user_id,time_to_impression,time_to_impression_sqrt,time_to_impression_l1_w
u32,list[duration[μs]],list[f64],list[f64]
1743910,"[7h 21m 16s, 12h 1m 8s, … 7h 21m 4s]","[21.0, 26.851443, … 21.0]","[0.021262, 0.027187, … 0.021262]"
1101084,"[1h 44m 41s, 53m 20s, … 26m 14s]","[10.198039, 7.28011, … 5.09902]","[0.023936, 0.017087, … 0.011968]"


# Add all

In [19]:
history_w_articles = history_train.explode(pl.all().exclude('user_id')).join(
    articles.select('article_id', 
        (pl.col('body') + pl.col('title') + pl.col('subtitle')).str.len_chars().alias('article_id_fixed_article_len'),
        'last_modified_time', 'published_time'), left_on='article_id_fixed', right_on='article_id'
    )\
    .with_columns(
        (pl.col('impression_time_fixed') - pl.col('published_time')).alias('time_to_impression'),
    ).group_by('user_id').agg(pl.all())

In [20]:
history_w_articles.head(1)

user_id,impression_time_fixed,scroll_percentage_fixed,article_id_fixed,read_time_fixed,article_id_fixed_article_len,last_modified_time,published_time,time_to_impression
u32,list[datetime[μs]],list[f32],list[i32],list[f32],list[u32],list[datetime[μs]],list[datetime[μs]],list[duration[μs]]
959776,"[2023-04-27 10:14:57, 2023-04-27 10:15:24, … 2023-05-18 05:57:30]","[100.0, 100.0, … 100.0]","[9738663, 9738569, … 9770798]","[26.0, 48.0, … 1149.0]","[2091, 1608, … 1813]","[2023-06-29 06:48:22, 2023-06-29 06:48:22, … 2023-06-29 06:48:53]","[2023-04-27 10:08:17, 2023-04-27 09:33:16, … 2023-05-17 21:38:09]","[6m 40s, 42m 8s, … 8h 19m 21s]"


In [21]:
history_all_w = history_w_articles.select('user_id', 'time_to_impression', 'impression_time_fixed', 'scroll_percentage_fixed', 'read_time_fixed', 'article_id_fixed_article_len')\
    .explode(pl.all().exclude('user_id'))\
    .with_columns(pl.col('scroll_percentage_fixed').fill_null(0.0))\
    .with_columns(
        pl.col('read_time_fixed').truediv('article_id_fixed_article_len').fill_nan(0.0).alias('read_time_fixed_article_len_ratio'),
        # scroll_percentage
        (pl.col('scroll_percentage_fixed') - pl.col('scroll_percentage_fixed').min()).truediv(pl.col('scroll_percentage_fixed').max() - pl.col('scroll_percentage_fixed').min()).over('user_id').alias('scroll_percentage_fixed_mmnorm'),
        # time_to_impression
        pl.col('time_to_impression').dt.total_minutes().sqrt().alias('time_to_impression_minutes_sqrt'),
    ).with_columns(
        pl.when(pl.col('read_time_fixed_article_len_ratio').is_infinite()).then(0.0).otherwise(pl.col('read_time_fixed_article_len_ratio')).alias('read_time_fixed_article_len_ratio')
    ).group_by('user_id').agg(pl.all())\
    .with_columns(
        pl.col('read_time_fixed_article_len_ratio').list.eval(pl.element().truediv(pl.element().sum())).alias('read_time_fixed_article_len_ratio_l1_w'),
        pl.col('scroll_percentage_fixed_mmnorm').list.eval(pl.element().truediv(pl.element().sum())).alias('scroll_percentage_fixed_mmnorm_l1_w'),
        pl.col('time_to_impression_minutes_sqrt').list.eval(pl.element().truediv(pl.element().sum())).alias('time_to_impression_minutes_sqrt_l1_w'),
    ).select('user_id', 'read_time_fixed_article_len_ratio_l1_w', 'scroll_percentage_fixed_mmnorm_l1_w', 'time_to_impression_minutes_sqrt_l1_w')
history_all_w.head(1)

user_id,read_time_fixed_article_len_ratio_l1_w,scroll_percentage_fixed_mmnorm_l1_w,time_to_impression_minutes_sqrt_l1_w
u32,list[f64],list[f32],list[f64]
1718475,"[0.467089, 0.0, … 0.0]","[0.5, 0.0, … 0.0]","[0.053021, 0.117523, … 0.300137]"


# Multiple embeddings

In [22]:
emb_name_list = {'Ekstra_Bladet_contrastive_vector': 'contrastive_vector',
                 'FacebookAI_xlm_roberta_base': 'xlm_roberta_base',
                 'Ekstra_Bladet_image_embeddings': 'image_embeddings',
                 'google_bert_base_multilingual_cased': 'bert_base_multilingual_cased'}

In [98]:
def build_emb_scores(df: pl.DataFrame, history_m: np.ndarray, m_dict:dict[str, np.ndarray]):
    df = reduce_polars_df_memory_size(df)
    print(f'Starting to build embeddings scores for {m_dict.keys()}...')
    df = pl.concat([
        slice.explode(['article_index', 'article']).with_columns(
            *[pl.lit(np.dot(m[slice['article_index'].explode().to_numpy()], m[history_m[key[0]]].T)).alias(f'{emb_name}_scores') for emb_name, m in m_dict.items()]
        )\
        .group_by(['impression_id', 'user_id', 'user_index'])\
        .agg(pl.all())
        for key, slice in tqdm(df.partition_by(by=['user_index'], as_dict=True).items(), total=df['user_index'].n_unique()) # keep only 1000 for testing
    ]).drop('article_index', 'user_index')
    return df

def build_agg_scores(df: pl.DataFrame, emb_names: list[str] = None):
    df = reduce_polars_df_memory_size(df)
    if emb_names is None:
        emb_names = [col for col in df.columns if 'scores' in col]
    print(f'Starting to build aggregated scores for {emb_names}...')
    df = df.with_columns(
        *[pl.col(col).list.eval(pl.element().list.mean()).name.suffix('_mean') for col in emb_names],
        *[pl.col(col).list.eval(pl.element().list.max()).name.suffix('_max') for col in emb_names],
        *[pl.col(col).list.eval(pl.element().list.min()).name.suffix('_min') for col in emb_names],
        *[pl.col(col).list.eval(pl.element().list.std()).name.suffix('_std') for col in emb_names],
        *[pl.col(col).list.eval(pl.element().list.median()).name.suffix('_median') for col in emb_names],
    )
    return df


In [29]:
norm_m_dict = {}
article_emb_mapping = articles.select('article_id').unique().with_row_index()
for dir, file_name in emb_name_list.items():
    print(f'Processing {file_name} embedding matrix...')
    emb_df = pl.read_parquet(emb_dir / dir / f'{file_name}.parquet')
    emb_df.columns = ['article_id', 'embedding']
    
    emb_size = len(emb_df['embedding'][0])
    missing_articles_in_embedding = list(set(articles['article_id'].to_numpy()) - set(emb_df['article_id'].to_numpy()))
    if len(missing_articles_in_embedding) > 0:
        print(f'[Warning... {len(missing_articles_in_embedding)} missing articles in embedding matrix]')
        null_vector = np.zeros(emb_size, dtype=np.float32)
        emb_df = emb_df.vstack(pl.DataFrame({'article_id': missing_articles_in_embedding, 'embedding': [null_vector] * len(missing_articles_in_embedding)}))
        
    emb_df = article_emb_mapping.join(emb_df, on='article_id', how='left')
    m = np.array([np.array(row) for row in emb_df['embedding'].to_numpy()])
    row_norms = np.linalg.norm(m, axis=1, keepdims=True)
    m = m / (row_norms + 1e-6)
    norm_m_dict[file_name] = m

Processing contrastive_vector embedding matrix...
Processing xlm_roberta_base embedding matrix...
Processing image_embeddings embedding matrix...
Processing bert_base_multilingual_cased embedding matrix...


In [36]:
history_m = history_train\
    .select('user_id', pl.col('article_id_fixed').list.eval(
                pl.element().replace(article_emb_mapping['article_id'], article_emb_mapping['index'], default=None)))\
    .with_row_index('user_index')

user_history_map = history_m.select('user_id', 'user_index')
history_m = history_m['article_id_fixed'].to_numpy()
train_ds = behaviors_train[:1000].select('impression_id', 'user_id', pl.col('article_ids_inview').alias('article'))\
    .join(user_history_map, on='user_id')\
    .with_columns(
        pl.col('article').list.eval(pl.element().replace(article_emb_mapping['article_id'], article_emb_mapping['index'], default=None)).name.suffix('_index'),
    ).drop('impression_time_fixed', 'scroll_percentage_fixed', 'read_time_fixed')

train_ds = build_emb_scores(train_ds, history_m, m_dict=norm_m_dict)
train_ds_agg = build_agg_scores(train_ds, history_train)
# agg_scores_col = [col for col in train_ds.columns if '_scores_' in col]
# train_ds = train_ds.drop([f'{emb_name}_scores' for emb_name in list(norm_m_dict.keys())]).explode(['article'] + agg_scores_col)
train_ds.head()

Memory usage of dataframe is 0.11 MB
Memory usage after optimization is: 0.09 MB
Decreased by 20.6%
Starting to build embeddings scores for dict_keys(['contrastive_vector', 'xlm_roberta_base', 'image_embeddings', 'bert_base_multilingual_cased'])...


100%|██████████| 466/466 [00:07<00:00, 63.80it/s] 


impression_id,user_id,article,contrastive_vector_scores,xlm_roberta_base_scores,image_embeddings_scores,bert_base_multilingual_cased_scores
u32,u32,list[i32],list[list[f32]],list[list[f32]],list[list[f32]],list[list[f32]]
2097255,63123,"[9771916, 9771938, … 9771855]","[[0.557273, 0.317587, … 0.145003], [0.182505, 0.066342, … 0.131874], … [0.116064, 0.052735, … 0.095286]]","[[0.999381, 0.998612, … 0.999212], [0.998721, 0.997995, … 0.998756], … [0.998922, 0.998184, … 0.998931]]","[[0.618652, 0.237785, … 0.0], [0.022949, -0.205564, … 0.0], … [0.395846, 0.083482, … 0.0]]","[[0.986466, 0.981622, … 0.985318], [0.969257, 0.968638, … 0.966765], … [0.977267, 0.980927, … 0.981054]]"
2097252,63123,"[9761926, 9771896, … 9769370]","[[0.107697, 0.240556, … 0.207324], [0.269079, 0.082241, … 0.149119], … [0.188112, 0.083346, … 0.402493]]","[[0.998851, 0.998298, … 0.998933], [0.999403, 0.999, … 0.999468], … [0.999225, 0.998372, … 0.999208]]","[[0.339977, 0.162691, … 0.0], [0.174351, 0.361142, … 0.0], … [0.0, 0.0, … 0.0]]","[[0.981906, 0.986131, … 0.984856], [0.985936, 0.985265, … 0.98395], … [0.985635, 0.984928, … 0.988126]]"
2099252,84383,"[9771187, 9771919, … 9769370]","[[0.335738, 0.274959, … 0.292364], [0.154554, 0.258899, … 0.204154], … [0.122367, 0.401185, … 0.173453]]","[[0.999364, 0.99929, … 0.999399], [0.999168, 0.999351, … 0.999357], … [0.999324, 0.999554, … 0.999526]]","[[0.439715, 0.233114, … 0.336275], [0.442196, -0.204846, … 0.343981], … [0.0, 0.0, … 0.0]]","[[0.951342, 0.942027, … 0.947812], [0.989138, 0.987074, … 0.988399], … [0.988274, 0.987833, … 0.985349]]"
2099250,84383,"[9686860, 9702964, … 9771919]","[[0.182729, 0.550608, … 0.136484], [0.168673, 0.700092, … 0.182869], … [0.154554, 0.258899, … 0.204154]]","[[0.999335, 0.999449, … 0.99938], [0.999257, 0.999564, … 0.999384], … [0.999168, 0.999351, … 0.999357]]","[[0.179768, 0.514603, … 0.060329], [0.092108, -0.099428, … 0.083508], … [0.442196, -0.204846, … 0.343981]]","[[0.988752, 0.983094, … 0.984562], [0.986144, 0.984657, … 0.983357], … [0.989138, 0.987074, … 0.988399]]"
2099253,84383,"[9771916, 9771187, … 9769348]","[[0.235649, 0.25459, … 0.14069], [0.335738, 0.274959, … 0.292364], … [0.133124, 0.523986, … 0.167435]]","[[0.999306, 0.999261, … 0.999192], [0.999364, 0.99929, … 0.999399], … [0.999006, 0.999353, … 0.999236]]","[[0.465422, 0.067244, … 0.059784], [0.439715, 0.233114, … 0.336275], … [0.350681, 0.199099, … 0.166773]]","[[0.98583, 0.978367, … 0.978577], [0.951342, 0.942027, … 0.947812], … [0.981203, 0.978487, … 0.983893]]"


## Apply weight

In [99]:
x = train_ds.join(
    history_all_w, on='user_id', how='left'
)
x.head(2)

impression_id,user_id,article,contrastive_vector_scores,xlm_roberta_base_scores,image_embeddings_scores,bert_base_multilingual_cased_scores,read_time_fixed_article_len_ratio_l1_w,scroll_percentage_fixed_mmnorm_l1_w,time_to_impression_minutes_sqrt_l1_w
u32,u32,list[i32],list[list[f32]],list[list[f32]],list[list[f32]],list[list[f32]],list[f64],list[f32],list[f64]
2097255,63123,"[9771916, 9771938, … 9771855]","[[0.557273, 0.317587, … 0.145003], [0.182505, 0.066342, … 0.131874], … [0.116064, 0.052735, … 0.095286]]","[[0.999381, 0.998612, … 0.999212], [0.998721, 0.997995, … 0.998756], … [0.998922, 0.998184, … 0.998931]]","[[0.618652, 0.237785, … 0.0], [0.022949, -0.205564, … 0.0], … [0.395846, 0.083482, … 0.0]]","[[0.986466, 0.981622, … 0.985318], [0.969257, 0.968638, … 0.966765], … [0.977267, 0.980927, … 0.981054]]","[0.00012, 0.000011, … 0.0]","[0.000822, 0.00069, … 0.0]","[0.000552, 0.000264, … 0.000329]"
2097252,63123,"[9761926, 9771896, … 9769370]","[[0.107697, 0.240556, … 0.207324], [0.269079, 0.082241, … 0.149119], … [0.188112, 0.083346, … 0.402493]]","[[0.998851, 0.998298, … 0.998933], [0.999403, 0.999, … 0.999468], … [0.999225, 0.998372, … 0.999208]]","[[0.339977, 0.162691, … 0.0], [0.174351, 0.361142, … 0.0], … [0.0, 0.0, … 0.0]]","[[0.981906, 0.986131, … 0.984856], [0.985936, 0.985265, … 0.98395], … [0.985635, 0.984928, … 0.988126]]","[0.00012, 0.000011, … 0.0]","[0.000822, 0.00069, … 0.0]","[0.000552, 0.000264, … 0.000329]"


In [100]:
l1_w_cols = [col for col in x.columns if col.endswith('_l1_w')]
scores_cols = [col for col in x.columns if col.endswith('_scores')]
train_ds_w = pl.concat([
    slice.explode(['article'] + scores_cols).with_columns(
        *[pl.lit(
            np.array([np.array(i) for i in slice[col_score].explode().to_numpy()]) * slice[col_w][0].to_numpy()
        ).alias(f'{col_score}_weighted_{col_w}')
        for col_w in l1_w_cols for col_score in scores_cols]
    ).drop(l1_w_cols).group_by('impression_id', 'user_id').agg(pl.all())
    for key, slice in tqdm(x.partition_by(by=['user_id'], as_dict=True).items(), total=x['user_id'].n_unique())    
])
train_ds_w.head(2)



[A
[A
[A
[A
[A
[A
[A
[A
[A
100%|██████████| 466/466 [00:00<00:00, 502.65it/s]


impression_id,user_id,article,contrastive_vector_scores,xlm_roberta_base_scores,image_embeddings_scores,bert_base_multilingual_cased_scores,contrastive_vector_scores_weighted_read_time_fixed_article_len_ratio_l1_w,xlm_roberta_base_scores_weighted_read_time_fixed_article_len_ratio_l1_w,image_embeddings_scores_weighted_read_time_fixed_article_len_ratio_l1_w,bert_base_multilingual_cased_scores_weighted_read_time_fixed_article_len_ratio_l1_w,contrastive_vector_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w,xlm_roberta_base_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w,image_embeddings_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w,bert_base_multilingual_cased_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w,contrastive_vector_scores_weighted_time_to_impression_minutes_sqrt_l1_w,xlm_roberta_base_scores_weighted_time_to_impression_minutes_sqrt_l1_w,image_embeddings_scores_weighted_time_to_impression_minutes_sqrt_l1_w,bert_base_multilingual_cased_scores_weighted_time_to_impression_minutes_sqrt_l1_w
u32,u32,list[i32],list[list[f32]],list[list[f32]],list[list[f32]],list[list[f32]],list[list[f64]],list[list[f64]],list[list[f64]],list[list[f64]],list[list[f32]],list[list[f32]],list[list[f32]],list[list[f32]],list[list[f64]],list[list[f64]],list[list[f64]],list[list[f64]]
2097252,63123,"[9761926, 9771896, … 9769370]","[[0.107697, 0.240556, … 0.207324], [0.269079, 0.082241, … 0.149119], … [0.188112, 0.083346, … 0.402493]]","[[0.998851, 0.998298, … 0.998933], [0.999403, 0.999, … 0.999468], … [0.999225, 0.998372, … 0.999208]]","[[0.339977, 0.162691, … 0.0], [0.174351, 0.361142, … 0.0], … [0.0, 0.0, … 0.0]]","[[0.981906, 0.986131, … 0.984856], [0.985936, 0.985265, … 0.98395], … [0.985635, 0.984928, … 0.988126]]","[[0.000013, 0.000003, … 0.0], [0.000032, 8.8333e-7, … 0.0], … [0.000023, 8.9520e-7, … 0.0]]","[[0.00012, 0.000011, … 0.0], [0.00012, 0.000011, … 0.0], … [0.00012, 0.000011, … 0.0]]","[[0.000041, 0.000002, … 0.0], [0.000021, 0.000004, … 0.0], … [0.0, 0.0, … 0.0]]","[[0.000118, 0.000011, … 0.0], [0.000118, 0.000011, … 0.0], … [0.000118, 0.000011, … 0.0]]","[[0.000089, 0.000166, … 0.0], [0.000221, 0.000057, … 0.0], … [0.000155, 0.000058, … 0.0]]","[[0.000821, 0.000689, … 0.0], [0.000821, 0.00069, … 0.0], … [0.000821, 0.000689, … 0.0]]","[[0.000279, 0.000112, … 0.0], [0.000143, 0.000249, … 0.0], … [0.0, 0.0, … 0.0]]","[[0.000807, 0.000681, … 0.0], [0.00081, 0.00068, … 0.0], … [0.00081, 0.00068, … 0.0]]","[[0.000059, 0.000064, … 0.000068], [0.000149, 0.000022, … 0.000049], … [0.000104, 0.000022, … 0.000132]]","[[0.000552, 0.000264, … 0.000328], [0.000552, 0.000264, … 0.000329], … [0.000552, 0.000264, … 0.000328]]","[[0.000188, 0.000043, … 0.0], [0.000096, 0.000095, … 0.0], … [0.0, 0.0, … 0.0]]","[[0.000542, 0.000261, … 0.000324], [0.000545, 0.000261, … 0.000323], … [0.000544, 0.00026, … 0.000325]]"
2097255,63123,"[9771916, 9771938, … 9771855]","[[0.557273, 0.317587, … 0.145003], [0.182505, 0.066342, … 0.131874], … [0.116064, 0.052735, … 0.095286]]","[[0.999381, 0.998612, … 0.999212], [0.998721, 0.997995, … 0.998756], … [0.998922, 0.998184, … 0.998931]]","[[0.618652, 0.237785, … 0.0], [0.022949, -0.205564, … 0.0], … [0.395846, 0.083482, … 0.0]]","[[0.986466, 0.981622, … 0.985318], [0.969257, 0.968638, … 0.966765], … [0.977267, 0.980927, … 0.981054]]","[[0.000067, 0.000003, … 0.0], [0.000022, 7.1257e-7, … 0.0], … [0.000014, 5.6641e-7, … 0.0]]","[[0.00012, 0.000011, … 0.0], [0.00012, 0.000011, … 0.0], … [0.00012, 0.000011, … 0.0]]","[[0.000074, 0.000003, … 0.0], [0.000003, -0.000002, … 0.0], … [0.000048, 8.9666e-7, … 0.0]]","[[0.000118, 0.000011, … 0.0], [0.000116, 0.00001, … 0.0], … [0.000117, 0.000011, … 0.0]]","[[0.000458, 0.000219, … 0.0], [0.00015, 0.000046, … 0.0], … [0.000095, 0.000036, … 0.0]]","[[0.000821, 0.000689, … 0.0], [0.000821, 0.000689, … 0.0], … [0.000821, 0.000689, … 0.0]]","[[0.000508, 0.000164, … 0.0], [0.000019, -0.000142, … 0.0], … [0.000325, 0.000058, … 0.0]]","[[0.000811, 0.000678, … 0.0], [0.000797, 0.000669, … 0.0], … [0.000803, 0.000677, … 0.0]]","[[0.000308, 0.000084, … 0.000048], [0.000101, 0.000018, … 0.000043], … [0.000064, 0.000014, … 0.000031]]","[[0.000552, 0.000264, … 0.000328], [0.000552, 0.000264, … 0.000328], … [0.000552, 0.000264, … 0.000328]]","[[0.000342, 0.000063, … 0.0], [0.000013, -0.000054, … 0.0], … [0.000219, 0.000022, … 0.0]]","[[0.000545, 0.00026, … 0.000324], [0.000535, 0.000256, … 0.000318], … [0.00054, 0.000259, … 0.000323]]"


In [103]:
scores_col = [col for col in train_ds_w.columns if '_scores' in col]
train_ds_w_aggs = build_agg_scores(train_ds_w).drop(scores_col)
train_ds_w_aggs.head(2)

Memory usage of dataframe is 302.64 MB
Memory usage after optimization is: 302.64 MB
Decreased by 0.0%
Starting to build aggregated scores for ['contrastive_vector_scores', 'xlm_roberta_base_scores', 'image_embeddings_scores', 'bert_base_multilingual_cased_scores', 'contrastive_vector_scores_weighted_read_time_fixed_article_len_ratio_l1_w', 'xlm_roberta_base_scores_weighted_read_time_fixed_article_len_ratio_l1_w', 'image_embeddings_scores_weighted_read_time_fixed_article_len_ratio_l1_w', 'bert_base_multilingual_cased_scores_weighted_read_time_fixed_article_len_ratio_l1_w', 'contrastive_vector_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w', 'xlm_roberta_base_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w', 'image_embeddings_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w', 'bert_base_multilingual_cased_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w', 'contrastive_vector_scores_weighted_time_to_impression_minutes_sqrt_l1_w', 'xlm_roberta_base_scores_weighted_time_to_i

impression_id,user_id,article,contrastive_vector_scores_mean,xlm_roberta_base_scores_mean,image_embeddings_scores_mean,bert_base_multilingual_cased_scores_mean,contrastive_vector_scores_weighted_read_time_fixed_article_len_ratio_l1_w_mean,xlm_roberta_base_scores_weighted_read_time_fixed_article_len_ratio_l1_w_mean,image_embeddings_scores_weighted_read_time_fixed_article_len_ratio_l1_w_mean,bert_base_multilingual_cased_scores_weighted_read_time_fixed_article_len_ratio_l1_w_mean,contrastive_vector_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_mean,xlm_roberta_base_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_mean,image_embeddings_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_mean,bert_base_multilingual_cased_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_mean,contrastive_vector_scores_weighted_time_to_impression_minutes_sqrt_l1_w_mean,xlm_roberta_base_scores_weighted_time_to_impression_minutes_sqrt_l1_w_mean,image_embeddings_scores_weighted_time_to_impression_minutes_sqrt_l1_w_mean,bert_base_multilingual_cased_scores_weighted_time_to_impression_minutes_sqrt_l1_w_mean,contrastive_vector_scores_max,xlm_roberta_base_scores_max,image_embeddings_scores_max,bert_base_multilingual_cased_scores_max,contrastive_vector_scores_weighted_read_time_fixed_article_len_ratio_l1_w_max,xlm_roberta_base_scores_weighted_read_time_fixed_article_len_ratio_l1_w_max,image_embeddings_scores_weighted_read_time_fixed_article_len_ratio_l1_w_max,bert_base_multilingual_cased_scores_weighted_read_time_fixed_article_len_ratio_l1_w_max,contrastive_vector_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_max,xlm_roberta_base_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_max,image_embeddings_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_max,bert_base_multilingual_cased_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_max,contrastive_vector_scores_weighted_time_to_impression_minutes_sqrt_l1_w_max,xlm_roberta_base_scores_weighted_time_to_impression_minutes_sqrt_l1_w_max,image_embeddings_scores_weighted_time_to_impression_minutes_sqrt_l1_w_max,bert_base_multilingual_cased_scores_weighted_time_to_impression_minutes_sqrt_l1_w_max,contrastive_vector_scores_min,xlm_roberta_base_scores_min,…,bert_base_multilingual_cased_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_min,contrastive_vector_scores_weighted_time_to_impression_minutes_sqrt_l1_w_min,xlm_roberta_base_scores_weighted_time_to_impression_minutes_sqrt_l1_w_min,image_embeddings_scores_weighted_time_to_impression_minutes_sqrt_l1_w_min,bert_base_multilingual_cased_scores_weighted_time_to_impression_minutes_sqrt_l1_w_min,contrastive_vector_scores_std,xlm_roberta_base_scores_std,image_embeddings_scores_std,bert_base_multilingual_cased_scores_std,contrastive_vector_scores_weighted_read_time_fixed_article_len_ratio_l1_w_std,xlm_roberta_base_scores_weighted_read_time_fixed_article_len_ratio_l1_w_std,image_embeddings_scores_weighted_read_time_fixed_article_len_ratio_l1_w_std,bert_base_multilingual_cased_scores_weighted_read_time_fixed_article_len_ratio_l1_w_std,contrastive_vector_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_std,xlm_roberta_base_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_std,image_embeddings_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_std,bert_base_multilingual_cased_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_std,contrastive_vector_scores_weighted_time_to_impression_minutes_sqrt_l1_w_std,xlm_roberta_base_scores_weighted_time_to_impression_minutes_sqrt_l1_w_std,image_embeddings_scores_weighted_time_to_impression_minutes_sqrt_l1_w_std,bert_base_multilingual_cased_scores_weighted_time_to_impression_minutes_sqrt_l1_w_std,contrastive_vector_scores_median,xlm_roberta_base_scores_median,image_embeddings_scores_median,bert_base_multilingual_cased_scores_median,contrastive_vector_scores_weighted_read_time_fixed_article_len_ratio_l1_w_median,xlm_roberta_base_scores_weighted_read_time_fixed_article_len_ratio_l1_w_median,image_embeddings_scores_weighted_read_time_fixed_article_len_ratio_l1_w_median,bert_base_multilingual_cased_scores_weighted_read_time_fixed_article_len_ratio_l1_w_median,contrastive_vector_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_median,xlm_roberta_base_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_median,image_embeddings_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_median,bert_base_multilingual_cased_scores_weighted_scroll_percentage_fixed_mmnorm_l1_w_median,contrastive_vector_scores_weighted_time_to_impression_minutes_sqrt_l1_w_median,xlm_roberta_base_scores_weighted_time_to_impression_minutes_sqrt_l1_w_median,image_embeddings_scores_weighted_time_to_impression_minutes_sqrt_l1_w_median,bert_base_multilingual_cased_scores_weighted_time_to_impression_minutes_sqrt_l1_w_median
u32,u32,list[i32],list[f32],list[f32],list[f32],list[f32],list[f64],list[f64],list[f64],list[f64],list[f32],list[f32],list[f32],list[f32],list[f64],list[f64],list[f64],list[f64],list[f32],list[f32],list[f32],list[f32],list[f64],list[f64],list[f64],list[f64],list[f32],list[f32],list[f32],list[f32],list[f64],list[f64],list[f64],list[f64],list[f32],list[f32],…,list[f32],list[f64],list[f64],list[f64],list[f64],list[f32],list[f32],list[f32],list[f32],list[f64],list[f64],list[f64],list[f64],list[f32],list[f32],list[f32],list[f32],list[f64],list[f64],list[f64],list[f64],list[f32],list[f32],list[f32],list[f32],list[f64],list[f64],list[f64],list[f64],list[f32],list[f32],list[f32],list[f32],list[f64],list[f64],list[f64],list[f64]
2097252,63123,"[9761926, 9771896, … 9769370]","[0.175282, 0.2935, … 0.229902]","[0.999008, 0.999115, … 0.99925]","[0.194606, 0.14915, … 0.0]","[0.966587, 0.965852, … 0.9677]","[0.00015, 0.000195, … 0.000106]","[0.001376, 0.001376, … 0.001376]","[0.00033, 0.000317, … 0.0]","[0.00127, 0.001265, … 0.001271]","[0.000255, 0.000428, … 0.000342]","[0.001376, 0.001376, … 0.001376]","[0.000278, 0.000226, … 0.0]","[0.001337, 0.001336, … 0.001338]","[0.000312, 0.000492, … 0.000326]","[0.001376, 0.001376, … 0.001376]","[0.000259, 0.00022, … 0.0]","[0.001336, 0.001335, … 0.001336]","[0.797017, 0.771882, … 0.716651]","[0.999568, 0.999626, … 0.999731]","[0.911962, 0.80103, … 0.0]","[0.99214, 0.993042, … 0.993572]","[0.006662, 0.014161, … 0.005872]","[0.1747, 0.174706, … 0.174748]","[0.090131, 0.11578, … 0.0]","[0.163513, 0.161841, … 0.163951]","[0.002532, 0.002505, … 0.002356]","[0.003286, 0.003286, … 0.003286]","[0.002998, 0.002599, … 0.0]","[0.003258, 0.003264, … 0.003264]","[0.011884, 0.01939, … 0.00858]","[0.059949, 0.059941, … 0.059956]","[0.013416, 0.007883, … 0.0]","[0.058689, 0.059149, … 0.058883]","[-0.0649, -0.109783, … -0.180447]","[0.992307, 0.994077, … 0.992055]",…,"[-0.0, -0.0, … -0.0]","[-0.000047, -0.000131, … -0.00023]","[0.000113, 0.000113, … 0.000113]","[-0.003801, -0.00301, … 0.0]","[-0.000153, -0.00015, … -0.000147]","[0.103359, 0.186201, … 0.143388]","[0.000528, 0.000401, … 0.000559]","[0.247309, 0.217667, … 0.0]","[0.074262, 0.07411, … 0.074224]","[0.000592, 0.001078, … 0.000829]","[0.008901, 0.008901, … 0.008903]","[0.003747, 0.004454, … 0.0]","[0.007923, 0.007866, … 0.007937]","[0.000298, 0.000482, … 0.000362]","[0.001106, 0.001106, … 0.001106]","[0.000472, 0.000448, … 0.0]","[0.001072, 0.001071, … 0.001073]","[0.001201, 0.00166, … 0.000819]","[0.003389, 0.003388, … 0.003389]","[0.000912, 0.000793, … 0.0]","[0.003331, 0.003339, … 0.003331]","[0.163243, 0.262363, … 0.222947]","[0.999121, 0.99918, … 0.99936]","[0.123727, 0.079192, … 0.0]","[0.982949, 0.98281, … 0.983546]","[0.000013, 0.000019, … 0.000016]","[0.000089, 0.000089, … 0.000089]","[0.000005, 0.000003, … 0.0]","[0.000086, 0.000085, … 0.000086]","[0.000176, 0.000265, … 0.000227]","[0.001084, 0.001084, … 0.001084]","[0.00002, 0.0, … 0.0]","[0.001044, 0.001046, … 0.001051]","[0.000116, 0.000185, … 0.000154]","[0.000726, 0.000726, … 0.000726]","[0.00008, 0.000057, … 0.0]","[0.000698, 0.000694, … 0.000698]"
2097255,63123,"[9771916, 9771938, … 9771855]","[0.200907, 0.154063, … 0.251488]","[0.999051, 0.998986, … 0.999161]","[0.198911, 0.193671, … 0.162336]","[0.961375, 0.951313, … 0.965217]","[0.000126, 0.000107, … 0.000175]","[0.001376, 0.001376, … 0.001376]","[0.000226, 0.00029, … 0.000337]","[0.001262, 0.001249, … 0.001273]","[0.000277, 0.000225, … 0.000368]","[0.001376, 0.001376, … 0.001376]","[0.000282, 0.000275, … 0.00024]","[0.00133, 0.001316, … 0.001335]","[0.000268, 0.000226, … 0.000474]","[0.001376, 0.001376, … 0.001376]","[0.000345, 0.000259, … 0.000233]","[0.001327, 0.001313, … 0.001333]","[0.909731, 0.94831, … 0.87319]","[0.999661, 0.999873, … 0.999793]","[0.791619, 0.880052, … 0.867757]","[0.990057, 0.993238, … 0.99174]","[0.008613, 0.00695, … 0.012875]","[0.174697, 0.174734, … 0.174727]","[0.019199, 0.066946, … 0.088744]","[0.162056, 0.163121, … 0.16432]","[0.002857, 0.002695, … 0.00283]","[0.003286, 0.003286, … 0.003287]","[0.002522, 0.002729, … 0.002853]","[0.003238, 0.003249, … 0.003249]","[0.007098, 0.007963, … 0.021301]","[0.059936, 0.059946, … 0.05997]","[0.025202, 0.009532, … 0.007328]","[0.058207, 0.058001, … 0.058468]","[-0.117726, -0.097062, … -0.063647]","[0.992668, 0.992133, … 0.991547]",…,"[-0.0, -0.0, … -0.0]","[-0.00014, -0.000057, … -0.000071]","[0.000113, 0.000113, … 0.000113]","[-0.00125, -0.011343, … -0.001147]","[-0.000137, -0.000145, … -0.000143]","[0.176701, 0.171519, … 0.187725]","[0.000481, 0.00058, … 0.000631]","[0.237967, 0.261545, … 0.235213]","[0.073118, 0.073395, … 0.072741]","[0.000605, 0.000469, … 0.000957]","[0.0089, 0.008902, … 0.008903]","[0.00142, 0.003239, … 0.003578]","[0.00787, 0.007832, … 0.007961]","[0.000348, 0.000362, … 0.000473]","[0.001106, 0.001106, … 0.001106]","[0.000463, 0.000523, … 0.000473]","[0.001066, 0.001054, … 0.001071]","[0.000639, 0.000605, … 0.001857]","[0.003388, 0.003388, … 0.00339]","[0.001371, 0.001098, … 0.000711]","[0.003303, 0.003267, … 0.003316]","[0.159288, 0.101472, … 0.206196]","[0.999138, 0.999105, … 0.999317]","[0.163326, 0.129527, … 0.091122]","[0.977341, 0.966508, … 0.979482]","[0.000012, 0.000008, … 0.000015]","[0.000089, 0.000089, … 0.000089]","[0.000007, 0.000004, … 0.000003]","[0.000085, 0.000084, … 0.000085]","[0.000166, 0.00011, … 0.000211]","[0.001084, 0.001084, … 0.001084]","[0.000085, 0.0, … 0.0]","[0.00104, 0.001038, … 0.00105]","[0.000118, 0.000073, … 0.000144]","[0.000726, 0.000726, … 0.000726]","[0.000091, 0.000084, … 0.000057]","[0.000694, 0.000686, … 0.000699]"
