## Setup


In [None]:
import os
import sys

from datasets import load_dataset
from loguru import logger
from pydantic import BaseModel

sys.path.insert(0, "..")

from src.sampling_raw_data import InteractionDataSampler

In [2]:
class Args(BaseModel):
    run_name: str = "00-prep-data"
    testing: bool = True
    notebook_persist_dp: str = None
    random_seed: int = 42

    user_col: str = "user_id"
    item_col: str = "parent_asin"
    rating_col: str = "rating"
    timestamp_col: str = "timestamp"

    sample_users: int = 1000
    min_user_interactions: int = 5
    min_item_interactions: int = 10

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        if not self.testing:
            os.makedirs(self.notebook_persist_dp, exist_ok=True)

        return self


args = Args().init()

print(args.model_dump_json(indent=2))

{
  "run_name": "00-prep-data",
  "testing": true,
  "notebook_persist_dp": "/home/duong/Documents/datn1/notebooks/data/00-prep-data",
  "random_seed": 42,
  "user_col": "user_id",
  "item_col": "parent_asin",
  "rating_col": "rating",
  "timestamp_col": "timestamp",
  "sample_users": 1000,
  "min_user_interactions": 5,
  "min_item_interactions": 10
}


## Load data


In [3]:
dataset = load_dataset(
    "McAuley-Lab/Amazon-Reviews-2023",
    "5core_timestamp_Toys_and_Games",
    trust_remote_code=True,
)


def parse_dtype(df):
    return df.assign(
        rating=lambda df: df[args.rating_col].astype(float),
        timestamp=lambda df: df[args.timestamp_col].astype(int),
    )


train_raw = dataset["train"].to_pandas().pipe(parse_dtype)
val_raw = dataset["valid"].to_pandas().pipe(parse_dtype)
test_raw = dataset["test"].to_pandas().pipe(parse_dtype)

In [4]:
import pandas as pd


def print_min_max_time(df, df_name):
    # Lấy min và max timestamp
    min_time = df["timestamp"].min()
    max_time = df["timestamp"].max()

    # Chuyển đổi từ milliseconds sang seconds và thành datetime (UTC)
    min_datetime = pd.to_datetime(min_time, unit="ms").tz_localize("UTC")
    max_datetime = pd.to_datetime(max_time, unit="ms").tz_localize("UTC")

    # Chuyển đổi sang timezone khác (ví dụ: 'US/Pacific')
    min_datetime_pst = min_datetime.tz_convert("US/Pacific")
    max_datetime_pst = max_datetime.tz_convert("US/Pacific")

    print(f"\n{df_name}:")
    print(
        f"- Min timestamp (raw): {min_time} → Datetime (UTC): {min_datetime} → PST: {min_datetime_pst}"
    )
    print(
        f"- Max timestamp (raw): {max_time} → Datetime (UTC): {max_datetime} → PST: {max_datetime_pst}"
    )


# Áp dụng cho từng tập dữ liệu
print_min_max_time(train_raw, "Train")
print_min_max_time(val_raw, "Validation")
print_min_max_time(test_raw, "Test")


Train:
- Min timestamp (raw): 952575870000 → Datetime (UTC): 2000-03-09 04:24:30+00:00 → PST: 2000-03-08 20:24:30-08:00
- Max timestamp (raw): 1628643348297 → Datetime (UTC): 2021-08-11 00:55:48.297000+00:00 → PST: 2021-08-10 17:55:48.297000-07:00

Validation:
- Min timestamp (raw): 1628643494904 → Datetime (UTC): 2021-08-11 00:58:14.904000+00:00 → PST: 2021-08-10 17:58:14.904000-07:00
- Max timestamp (raw): 1658002608846 → Datetime (UTC): 2022-07-16 20:16:48.846000+00:00 → PST: 2022-07-16 13:16:48.846000-07:00

Test:
- Min timestamp (raw): 1658002857227 → Datetime (UTC): 2022-07-16 20:20:57.227000+00:00 → PST: 2022-07-16 13:20:57.227000-07:00
- Max timestamp (raw): 1694444496685 → Datetime (UTC): 2023-09-11 15:01:36.685000+00:00 → PST: 2023-09-11 08:01:36.685000-07:00


In [5]:
train_raw

Unnamed: 0,user_id,parent_asin,rating,timestamp
0,AGKASBHYZPGTEPO6LWZPVJWB2BVA,B006GBITXC,3.0,1452647382000
1,AGKASBHYZPGTEPO6LWZPVJWB2BVA,B00TLEMSVK,4.0,1454675785000
2,AGKASBHYZPGTEPO6LWZPVJWB2BVA,B00SO7HF6I,3.0,1454676014000
3,AGKASBHYZPGTEPO6LWZPVJWB2BVA,B00MZG6OO8,3.0,1471541996000
4,AGKASBHYZPGTEPO6LWZPVJWB2BVA,B007JWWUDW,5.0,1471542588000
...,...,...,...,...
3114942,AHMDS2PYZIJWE6SBXGDTBSJ4SRLA,B00BY2ER66,1.0,1379154911000
3114943,AHMDS2PYZIJWE6SBXGDTBSJ4SRLA,B00CVDMCH8,3.0,1379155054000
3114944,AHMDS2PYZIJWE6SBXGDTBSJ4SRLA,B00BY2ER6G,5.0,1379155144000
3114945,AHMDS2PYZIJWE6SBXGDTBSJ4SRLA,B00I8Z6GAM,5.0,1420664276000


In [6]:
import datetime

max_timestamp = train_raw["timestamp"].max()
max_datetime = datetime.datetime.fromtimestamp(max_timestamp / 1000)
print("Max datetime:", max_datetime)

Max datetime: 2021-08-11 07:55:48.297000


In [7]:
import datetime

max_timestamp = val_raw["timestamp"].max()
max_datetime = datetime.datetime.fromtimestamp(max_timestamp / 1000)
print("Max datetime:", max_datetime)

Max datetime: 2022-07-17 03:16:48.846000


## Sample

In [8]:
data_sampler = InteractionDataSampler(
    user_col=args.user_col,
    item_col=args.item_col,
    sample_users=args.sample_users,
    min_val_records=1000,
    random_seed=args.random_seed,
    min_item_interactions=args.min_item_interactions,
    min_user_interactions=args.min_user_interactions,
    debug=False,
)
sample_df, val_sample_df = data_sampler.sample(train_raw, val_raw)

[32m2025-06-19 02:02:40.177[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 1: num_users=407,842[0m
[32m2025-06-19 02:02:40.178[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 407,842 are still greater than expected, keep removing...[0m




Randomly removing 2529 users - Round 2 started


[32m2025-06-19 02:02:45.250[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 2: num_users=252,964[0m
[32m2025-06-19 02:02:45.250[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 252,964 are still greater than expected, keep removing...[0m




Randomly removing 2494 users - Round 3 started


[32m2025-06-19 02:02:51.290[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 3: num_users=249,424[0m
[32m2025-06-19 02:02:51.291[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 249,424 are still greater than expected, keep removing...[0m




Randomly removing 2459 users - Round 4 started


[32m2025-06-19 02:02:55.484[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 4: num_users=245,935[0m
[32m2025-06-19 02:02:55.484[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 245,935 are still greater than expected, keep removing...[0m




Randomly removing 2424 users - Round 5 started


[32m2025-06-19 02:02:59.629[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 5: num_users=242,418[0m
[32m2025-06-19 02:02:59.630[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 242,418 are still greater than expected, keep removing...[0m




Randomly removing 2391 users - Round 6 started


[32m2025-06-19 02:03:05.255[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 6: num_users=239,120[0m
[32m2025-06-19 02:03:05.255[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 239,120 are still greater than expected, keep removing...[0m




Randomly removing 2357 users - Round 7 started


[32m2025-06-19 02:03:09.363[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 7: num_users=235,739[0m
[32m2025-06-19 02:03:09.363[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 235,739 are still greater than expected, keep removing...[0m




Randomly removing 2325 users - Round 8 started


[32m2025-06-19 02:03:14.130[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 8: num_users=232,584[0m
[32m2025-06-19 02:03:14.131[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 232,584 are still greater than expected, keep removing...[0m




Randomly removing 2293 users - Round 9 started


[32m2025-06-19 02:03:18.175[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 9: num_users=229,365[0m
[32m2025-06-19 02:03:18.176[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 229,365 are still greater than expected, keep removing...[0m




Randomly removing 2261 users - Round 10 started


[32m2025-06-19 02:03:23.547[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 10: num_users=226,131[0m
[32m2025-06-19 02:03:23.547[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 226,131 are still greater than expected, keep removing...[0m




Randomly removing 2229 users - Round 11 started


[32m2025-06-19 02:03:27.265[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 11: num_users=222,920[0m
[32m2025-06-19 02:03:27.265[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 222,920 are still greater than expected, keep removing...[0m




Randomly removing 2197 users - Round 12 started


[32m2025-06-19 02:03:35.523[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 12: num_users=219,741[0m
[32m2025-06-19 02:03:35.523[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 219,741 are still greater than expected, keep removing...[0m




Randomly removing 2165 users - Round 13 started


[32m2025-06-19 02:03:41.044[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 13: num_users=216,543[0m
[32m2025-06-19 02:03:41.045[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 216,543 are still greater than expected, keep removing...[0m




Randomly removing 2133 users - Round 14 started


[32m2025-06-19 02:03:45.954[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 14: num_users=213,372[0m
[32m2025-06-19 02:03:45.954[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 213,372 are still greater than expected, keep removing...[0m




Randomly removing 2102 users - Round 15 started


[32m2025-06-19 02:03:51.327[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 15: num_users=210,253[0m
[32m2025-06-19 02:03:51.328[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 210,253 are still greater than expected, keep removing...[0m




Randomly removing 2072 users - Round 16 started


[32m2025-06-19 02:03:55.442[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 16: num_users=207,247[0m
[32m2025-06-19 02:03:55.442[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 207,247 are still greater than expected, keep removing...[0m




Randomly removing 2042 users - Round 17 started


[32m2025-06-19 02:03:59.409[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 17: num_users=204,211[0m
[32m2025-06-19 02:03:59.410[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 204,211 are still greater than expected, keep removing...[0m




Randomly removing 2013 users - Round 18 started


[32m2025-06-19 02:04:05.890[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 18: num_users=201,316[0m
[32m2025-06-19 02:04:05.890[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 201,316 are still greater than expected, keep removing...[0m




Randomly removing 1984 users - Round 19 started


[32m2025-06-19 02:04:12.070[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 19: num_users=198,417[0m
[32m2025-06-19 02:04:12.070[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 198,417 are still greater than expected, keep removing...[0m




Randomly removing 1954 users - Round 20 started


[32m2025-06-19 02:04:14.579[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 20: num_users=195,431[0m
[32m2025-06-19 02:04:14.580[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 195,431 are still greater than expected, keep removing...[0m




Randomly removing 1925 users - Round 21 started


[32m2025-06-19 02:04:18.440[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 21: num_users=192,575[0m
[32m2025-06-19 02:04:18.441[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 192,575 are still greater than expected, keep removing...[0m




Randomly removing 1898 users - Round 22 started


[32m2025-06-19 02:04:22.111[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 22: num_users=189,812[0m
[32m2025-06-19 02:04:22.111[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 189,812 are still greater than expected, keep removing...[0m




Randomly removing 1870 users - Round 23 started


[32m2025-06-19 02:04:26.207[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 23: num_users=187,042[0m
[32m2025-06-19 02:04:26.208[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 187,042 are still greater than expected, keep removing...[0m




Randomly removing 1841 users - Round 24 started


[32m2025-06-19 02:04:29.290[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 24: num_users=184,139[0m
[32m2025-06-19 02:04:29.290[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 184,139 are still greater than expected, keep removing...[0m




Randomly removing 1814 users - Round 25 started


[32m2025-06-19 02:04:32.267[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 25: num_users=181,403[0m
[32m2025-06-19 02:04:32.268[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 181,403 are still greater than expected, keep removing...[0m




Randomly removing 1787 users - Round 26 started


[32m2025-06-19 02:04:35.766[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 26: num_users=178,749[0m
[32m2025-06-19 02:04:35.767[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 178,749 are still greater than expected, keep removing...[0m




Randomly removing 1761 users - Round 27 started


[32m2025-06-19 02:04:38.631[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 27: num_users=176,185[0m
[32m2025-06-19 02:04:38.631[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 176,185 are still greater than expected, keep removing...[0m




Randomly removing 1735 users - Round 28 started


[32m2025-06-19 02:04:43.543[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 28: num_users=173,578[0m
[32m2025-06-19 02:04:43.544[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 173,578 are still greater than expected, keep removing...[0m




Randomly removing 1707 users - Round 29 started


[32m2025-06-19 02:04:46.817[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 29: num_users=170,747[0m
[32m2025-06-19 02:04:46.817[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 170,747 are still greater than expected, keep removing...[0m




Randomly removing 1682 users - Round 30 started


[32m2025-06-19 02:04:50.139[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 30: num_users=168,285[0m
[32m2025-06-19 02:04:50.139[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 168,285 are still greater than expected, keep removing...[0m




Randomly removing 1656 users - Round 31 started


[32m2025-06-19 02:04:53.330[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 31: num_users=165,654[0m
[32m2025-06-19 02:04:53.330[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 165,654 are still greater than expected, keep removing...[0m




Randomly removing 1631 users - Round 32 started


[32m2025-06-19 02:04:56.037[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 32: num_users=163,171[0m
[32m2025-06-19 02:04:56.037[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 163,171 are still greater than expected, keep removing...[0m




Randomly removing 1608 users - Round 33 started


[32m2025-06-19 02:05:00.003[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 33: num_users=160,823[0m
[32m2025-06-19 02:05:00.004[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 160,823 are still greater than expected, keep removing...[0m




Randomly removing 1584 users - Round 34 started


[32m2025-06-19 02:05:03.487[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 34: num_users=158,495[0m
[32m2025-06-19 02:05:03.488[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 158,495 are still greater than expected, keep removing...[0m




Randomly removing 1560 users - Round 35 started


[32m2025-06-19 02:05:06.491[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 35: num_users=156,058[0m
[32m2025-06-19 02:05:06.491[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 156,058 are still greater than expected, keep removing...[0m




Randomly removing 1536 users - Round 36 started


[32m2025-06-19 02:05:09.641[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 36: num_users=153,630[0m
[32m2025-06-19 02:05:09.642[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 153,630 are still greater than expected, keep removing...[0m




Randomly removing 1512 users - Round 37 started


[32m2025-06-19 02:05:12.130[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 37: num_users=151,201[0m
[32m2025-06-19 02:05:12.131[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 151,201 are still greater than expected, keep removing...[0m




Randomly removing 1489 users - Round 38 started


[32m2025-06-19 02:05:15.437[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 38: num_users=148,939[0m
[32m2025-06-19 02:05:15.437[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 148,939 are still greater than expected, keep removing...[0m




Randomly removing 1466 users - Round 39 started


[32m2025-06-19 02:05:18.718[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 39: num_users=146,627[0m
[32m2025-06-19 02:05:18.718[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 146,627 are still greater than expected, keep removing...[0m




Randomly removing 1442 users - Round 40 started


[32m2025-06-19 02:05:21.041[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 40: num_users=144,265[0m
[32m2025-06-19 02:05:21.041[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 144,265 are still greater than expected, keep removing...[0m




Randomly removing 1421 users - Round 41 started


[32m2025-06-19 02:05:23.785[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 41: num_users=142,145[0m
[32m2025-06-19 02:05:23.786[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 142,145 are still greater than expected, keep removing...[0m




Randomly removing 1399 users - Round 42 started


[32m2025-06-19 02:05:26.882[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 42: num_users=139,931[0m
[32m2025-06-19 02:05:26.882[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 139,931 are still greater than expected, keep removing...[0m




Randomly removing 1377 users - Round 43 started


[32m2025-06-19 02:05:31.559[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 43: num_users=137,773[0m
[32m2025-06-19 02:05:31.559[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 137,773 are still greater than expected, keep removing...[0m




Randomly removing 1355 users - Round 44 started


[32m2025-06-19 02:05:33.738[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 44: num_users=135,549[0m
[32m2025-06-19 02:05:33.738[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 135,549 are still greater than expected, keep removing...[0m




Randomly removing 1334 users - Round 45 started


[32m2025-06-19 02:05:36.319[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 45: num_users=133,488[0m
[32m2025-06-19 02:05:36.320[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 133,488 are still greater than expected, keep removing...[0m




Randomly removing 1313 users - Round 46 started


[32m2025-06-19 02:05:39.597[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 46: num_users=131,359[0m
[32m2025-06-19 02:05:39.598[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 131,359 are still greater than expected, keep removing...[0m




Randomly removing 1290 users - Round 47 started


[32m2025-06-19 02:05:42.332[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 47: num_users=129,077[0m
[32m2025-06-19 02:05:42.333[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 129,077 are still greater than expected, keep removing...[0m




Randomly removing 1270 users - Round 48 started


[32m2025-06-19 02:05:44.757[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 48: num_users=127,031[0m
[32m2025-06-19 02:05:44.757[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 127,031 are still greater than expected, keep removing...[0m




Randomly removing 1249 users - Round 49 started


[32m2025-06-19 02:05:47.957[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 49: num_users=124,941[0m
[32m2025-06-19 02:05:47.958[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 124,941 are still greater than expected, keep removing...[0m




Randomly removing 1229 users - Round 50 started


[32m2025-06-19 02:05:50.391[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 50: num_users=122,950[0m
[32m2025-06-19 02:05:50.392[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 122,950 are still greater than expected, keep removing...[0m




Randomly removing 1209 users - Round 51 started


[32m2025-06-19 02:05:53.023[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 51: num_users=120,922[0m
[32m2025-06-19 02:05:53.024[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 120,922 are still greater than expected, keep removing...[0m




Randomly removing 1188 users - Round 52 started


[32m2025-06-19 02:05:54.920[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 52: num_users=118,871[0m
[32m2025-06-19 02:05:54.920[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 118,871 are still greater than expected, keep removing...[0m




Randomly removing 1170 users - Round 53 started


[32m2025-06-19 02:05:56.792[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 53: num_users=117,000[0m
[32m2025-06-19 02:05:56.792[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 117,000 are still greater than expected, keep removing...[0m




Randomly removing 1151 users - Round 54 started


[32m2025-06-19 02:06:00.900[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 54: num_users=115,120[0m
[32m2025-06-19 02:06:00.901[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 115,120 are still greater than expected, keep removing...[0m




Randomly removing 1132 users - Round 55 started


[32m2025-06-19 02:06:03.034[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 55: num_users=113,204[0m
[32m2025-06-19 02:06:03.035[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 113,204 are still greater than expected, keep removing...[0m




Randomly removing 1113 users - Round 56 started


[32m2025-06-19 02:06:06.374[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 56: num_users=111,326[0m
[32m2025-06-19 02:06:06.375[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 111,326 are still greater than expected, keep removing...[0m




Randomly removing 1093 users - Round 57 started


[32m2025-06-19 02:06:08.812[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 57: num_users=109,353[0m
[32m2025-06-19 02:06:08.813[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 109,353 are still greater than expected, keep removing...[0m




Randomly removing 1074 users - Round 58 started


[32m2025-06-19 02:06:11.511[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 58: num_users=107,462[0m
[32m2025-06-19 02:06:11.511[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 107,462 are still greater than expected, keep removing...[0m




Randomly removing 1055 users - Round 59 started


[32m2025-06-19 02:06:13.783[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 59: num_users=105,583[0m
[32m2025-06-19 02:06:13.784[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 105,583 are still greater than expected, keep removing...[0m




Randomly removing 1038 users - Round 60 started


[32m2025-06-19 02:06:15.763[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 60: num_users=103,886[0m
[32m2025-06-19 02:06:15.763[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 103,886 are still greater than expected, keep removing...[0m




Randomly removing 1022 users - Round 61 started


[32m2025-06-19 02:06:17.361[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 61: num_users=102,204[0m
[32m2025-06-19 02:06:17.362[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 102,204 are still greater than expected, keep removing...[0m




Randomly removing 1004 users - Round 62 started


[32m2025-06-19 02:06:19.799[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 62: num_users=100,461[0m
[32m2025-06-19 02:06:19.800[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 100,461 are still greater than expected, keep removing...[0m




Randomly removing 988 users - Round 63 started


[32m2025-06-19 02:06:21.934[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 63: num_users=98,821[0m
[32m2025-06-19 02:06:21.935[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 98,821 are still greater than expected, keep removing...[0m




Randomly removing 970 users - Round 64 started


[32m2025-06-19 02:06:23.405[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 64: num_users=97,095[0m
[32m2025-06-19 02:06:23.405[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 97,095 are still greater than expected, keep removing...[0m




Randomly removing 954 users - Round 65 started


[32m2025-06-19 02:06:25.395[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 65: num_users=95,473[0m
[32m2025-06-19 02:06:25.396[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 95,473 are still greater than expected, keep removing...[0m




Randomly removing 938 users - Round 66 started


[32m2025-06-19 02:06:27.618[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 66: num_users=93,849[0m
[32m2025-06-19 02:06:27.619[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 93,849 are still greater than expected, keep removing...[0m




Randomly removing 922 users - Round 67 started


[32m2025-06-19 02:06:29.090[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 67: num_users=92,200[0m
[32m2025-06-19 02:06:29.090[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 92,200 are still greater than expected, keep removing...[0m




Randomly removing 907 users - Round 68 started


[32m2025-06-19 02:06:30.985[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 68: num_users=90,735[0m
[32m2025-06-19 02:06:30.985[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 90,735 are still greater than expected, keep removing...[0m




Randomly removing 891 users - Round 69 started


[32m2025-06-19 02:06:32.389[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 69: num_users=89,142[0m
[32m2025-06-19 02:06:32.389[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 89,142 are still greater than expected, keep removing...[0m




Randomly removing 876 users - Round 70 started


[32m2025-06-19 02:06:33.785[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 70: num_users=87,656[0m
[32m2025-06-19 02:06:33.785[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 87,656 are still greater than expected, keep removing...[0m




Randomly removing 861 users - Round 71 started


[32m2025-06-19 02:06:36.050[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 71: num_users=86,186[0m
[32m2025-06-19 02:06:36.051[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 86,186 are still greater than expected, keep removing...[0m




Randomly removing 846 users - Round 72 started


[32m2025-06-19 02:06:38.287[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 72: num_users=84,642[0m
[32m2025-06-19 02:06:38.288[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 84,642 are still greater than expected, keep removing...[0m




Randomly removing 830 users - Round 73 started


[32m2025-06-19 02:06:39.838[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 73: num_users=83,059[0m
[32m2025-06-19 02:06:39.838[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 83,059 are still greater than expected, keep removing...[0m




Randomly removing 816 users - Round 74 started


[32m2025-06-19 02:06:42.895[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 74: num_users=81,694[0m
[32m2025-06-19 02:06:42.896[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 81,694 are still greater than expected, keep removing...[0m




Randomly removing 801 users - Round 75 started


[32m2025-06-19 02:06:44.150[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 75: num_users=80,106[0m
[32m2025-06-19 02:06:44.151[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 80,106 are still greater than expected, keep removing...[0m




Randomly removing 786 users - Round 76 started


[32m2025-06-19 02:06:46.742[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 76: num_users=78,670[0m
[32m2025-06-19 02:06:46.743[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 78,670 are still greater than expected, keep removing...[0m




Randomly removing 772 users - Round 77 started


[32m2025-06-19 02:06:48.356[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 77: num_users=77,234[0m
[32m2025-06-19 02:06:48.356[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 77,234 are still greater than expected, keep removing...[0m




Randomly removing 758 users - Round 78 started


[32m2025-06-19 02:06:50.286[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 78: num_users=75,886[0m
[32m2025-06-19 02:06:50.287[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 75,886 are still greater than expected, keep removing...[0m




Randomly removing 744 users - Round 79 started


[32m2025-06-19 02:06:51.667[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 79: num_users=74,497[0m
[32m2025-06-19 02:06:51.667[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 74,497 are still greater than expected, keep removing...[0m




Randomly removing 731 users - Round 80 started


[32m2025-06-19 02:06:52.987[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 80: num_users=73,135[0m
[32m2025-06-19 02:06:52.987[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 73,135 are still greater than expected, keep removing...[0m




Randomly removing 717 users - Round 81 started


[32m2025-06-19 02:06:54.272[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 81: num_users=71,748[0m
[32m2025-06-19 02:06:54.272[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 71,748 are still greater than expected, keep removing...[0m




Randomly removing 705 users - Round 82 started


[32m2025-06-19 02:06:55.714[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 82: num_users=70,547[0m
[32m2025-06-19 02:06:55.714[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 70,547 are still greater than expected, keep removing...[0m




Randomly removing 692 users - Round 83 started


[32m2025-06-19 02:06:56.742[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 83: num_users=69,236[0m
[32m2025-06-19 02:06:56.742[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 69,236 are still greater than expected, keep removing...[0m




Randomly removing 679 users - Round 84 started


[32m2025-06-19 02:06:58.151[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 84: num_users=67,959[0m
[32m2025-06-19 02:06:58.152[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 67,959 are still greater than expected, keep removing...[0m




Randomly removing 666 users - Round 85 started


[32m2025-06-19 02:07:00.167[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 85: num_users=66,676[0m
[32m2025-06-19 02:07:00.168[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 66,676 are still greater than expected, keep removing...[0m




Randomly removing 655 users - Round 86 started


[32m2025-06-19 02:07:01.226[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 86: num_users=65,527[0m
[32m2025-06-19 02:07:01.226[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 65,527 are still greater than expected, keep removing...[0m




Randomly removing 644 users - Round 87 started


[32m2025-06-19 02:07:02.982[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 87: num_users=64,408[0m
[32m2025-06-19 02:07:02.983[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 64,408 are still greater than expected, keep removing...[0m




Randomly removing 631 users - Round 88 started


[32m2025-06-19 02:07:04.451[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 88: num_users=63,148[0m
[32m2025-06-19 02:07:04.451[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 63,148 are still greater than expected, keep removing...[0m




Randomly removing 619 users - Round 89 started


[32m2025-06-19 02:07:05.902[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 89: num_users=61,922[0m
[32m2025-06-19 02:07:05.903[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 61,922 are still greater than expected, keep removing...[0m




Randomly removing 607 users - Round 90 started


[32m2025-06-19 02:07:08.407[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 90: num_users=60,744[0m
[32m2025-06-19 02:07:08.408[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 60,744 are still greater than expected, keep removing...[0m




Randomly removing 593 users - Round 91 started


[32m2025-06-19 02:07:09.658[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 91: num_users=59,376[0m
[32m2025-06-19 02:07:09.659[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 59,376 are still greater than expected, keep removing...[0m




Randomly removing 581 users - Round 92 started


[32m2025-06-19 02:07:10.870[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 92: num_users=58,188[0m
[32m2025-06-19 02:07:10.870[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 58,188 are still greater than expected, keep removing...[0m




Randomly removing 569 users - Round 93 started


[32m2025-06-19 02:07:12.420[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 93: num_users=56,971[0m
[32m2025-06-19 02:07:12.420[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 56,971 are still greater than expected, keep removing...[0m




Randomly removing 557 users - Round 94 started


[32m2025-06-19 02:07:13.601[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 94: num_users=55,740[0m
[32m2025-06-19 02:07:13.601[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 55,740 are still greater than expected, keep removing...[0m




Randomly removing 545 users - Round 95 started


[32m2025-06-19 02:07:14.711[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 95: num_users=54,564[0m
[32m2025-06-19 02:07:14.712[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 54,564 are still greater than expected, keep removing...[0m




Randomly removing 534 users - Round 96 started


[32m2025-06-19 02:07:15.682[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 96: num_users=53,490[0m
[32m2025-06-19 02:07:15.682[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 53,490 are still greater than expected, keep removing...[0m




Randomly removing 525 users - Round 97 started


[32m2025-06-19 02:07:17.138[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 97: num_users=52,580[0m
[32m2025-06-19 02:07:17.138[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 52,580 are still greater than expected, keep removing...[0m




Randomly removing 514 users - Round 98 started


[32m2025-06-19 02:07:18.063[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 98: num_users=51,493[0m
[32m2025-06-19 02:07:18.063[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 51,493 are still greater than expected, keep removing...[0m




Randomly removing 504 users - Round 99 started


[32m2025-06-19 02:07:19.070[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 99: num_users=50,474[0m
[32m2025-06-19 02:07:19.071[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 50,474 are still greater than expected, keep removing...[0m




Randomly removing 495 users - Round 100 started


[32m2025-06-19 02:07:20.543[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 100: num_users=49,525[0m
[32m2025-06-19 02:07:20.544[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 49,525 are still greater than expected, keep removing...[0m




Randomly removing 484 users - Round 101 started


[32m2025-06-19 02:07:21.521[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 101: num_users=48,454[0m
[32m2025-06-19 02:07:21.522[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 48,454 are still greater than expected, keep removing...[0m




Randomly removing 475 users - Round 102 started


[32m2025-06-19 02:07:22.384[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 102: num_users=47,500[0m
[32m2025-06-19 02:07:22.384[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 47,500 are still greater than expected, keep removing...[0m




Randomly removing 465 users - Round 103 started


[32m2025-06-19 02:07:23.076[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 103: num_users=46,560[0m
[32m2025-06-19 02:07:23.076[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 46,560 are still greater than expected, keep removing...[0m




Randomly removing 457 users - Round 104 started


[32m2025-06-19 02:07:24.124[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 104: num_users=45,706[0m
[32m2025-06-19 02:07:24.125[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 45,706 are still greater than expected, keep removing...[0m




Randomly removing 448 users - Round 105 started


[32m2025-06-19 02:07:25.278[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 105: num_users=44,860[0m
[32m2025-06-19 02:07:25.279[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 44,860 are still greater than expected, keep removing...[0m




Randomly removing 439 users - Round 106 started


[32m2025-06-19 02:07:26.125[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 106: num_users=43,906[0m
[32m2025-06-19 02:07:26.126[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 43,906 are still greater than expected, keep removing...[0m




Randomly removing 429 users - Round 107 started


[32m2025-06-19 02:07:27.047[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 107: num_users=42,941[0m
[32m2025-06-19 02:07:27.047[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 42,941 are still greater than expected, keep removing...[0m




Randomly removing 420 users - Round 108 started


[32m2025-06-19 02:07:27.639[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 108: num_users=42,034[0m
[32m2025-06-19 02:07:27.639[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 42,034 are still greater than expected, keep removing...[0m




Randomly removing 411 users - Round 109 started


[32m2025-06-19 02:07:28.434[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 109: num_users=41,189[0m
[32m2025-06-19 02:07:28.435[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 41,189 are still greater than expected, keep removing...[0m




Randomly removing 403 users - Round 110 started


[32m2025-06-19 02:07:29.209[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 110: num_users=40,372[0m
[32m2025-06-19 02:07:29.209[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 40,372 are still greater than expected, keep removing...[0m




Randomly removing 394 users - Round 111 started


[32m2025-06-19 02:07:29.880[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 111: num_users=39,480[0m
[32m2025-06-19 02:07:29.880[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 39,480 are still greater than expected, keep removing...[0m




Randomly removing 386 users - Round 112 started


[32m2025-06-19 02:07:30.633[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 112: num_users=38,620[0m
[32m2025-06-19 02:07:30.633[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 38,620 are still greater than expected, keep removing...[0m




Randomly removing 378 users - Round 113 started


[32m2025-06-19 02:07:31.289[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 113: num_users=37,805[0m
[32m2025-06-19 02:07:31.289[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 37,805 are still greater than expected, keep removing...[0m




Randomly removing 370 users - Round 114 started


[32m2025-06-19 02:07:32.371[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 114: num_users=37,014[0m
[32m2025-06-19 02:07:32.371[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 37,014 are still greater than expected, keep removing...[0m




Randomly removing 362 users - Round 115 started


[32m2025-06-19 02:07:33.000[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 115: num_users=36,249[0m
[32m2025-06-19 02:07:33.001[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 36,249 are still greater than expected, keep removing...[0m




Randomly removing 354 users - Round 116 started


[32m2025-06-19 02:07:33.997[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 116: num_users=35,474[0m
[32m2025-06-19 02:07:33.998[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 35,474 are still greater than expected, keep removing...[0m




Randomly removing 345 users - Round 117 started


[32m2025-06-19 02:07:34.920[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 117: num_users=34,518[0m
[32m2025-06-19 02:07:34.921[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 34,518 are still greater than expected, keep removing...[0m




Randomly removing 337 users - Round 118 started


[32m2025-06-19 02:07:35.474[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 118: num_users=33,761[0m
[32m2025-06-19 02:07:35.474[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 33,761 are still greater than expected, keep removing...[0m




Randomly removing 329 users - Round 119 started


[32m2025-06-19 02:07:36.031[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 119: num_users=32,962[0m
[32m2025-06-19 02:07:36.032[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 32,962 are still greater than expected, keep removing...[0m




Randomly removing 322 users - Round 120 started


[32m2025-06-19 02:07:36.476[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 120: num_users=32,214[0m
[32m2025-06-19 02:07:36.477[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 32,214 are still greater than expected, keep removing...[0m




Randomly removing 316 users - Round 121 started


[32m2025-06-19 02:07:36.995[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 121: num_users=31,605[0m
[32m2025-06-19 02:07:36.996[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 31,605 are still greater than expected, keep removing...[0m




Randomly removing 310 users - Round 122 started


[32m2025-06-19 02:07:37.415[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 122: num_users=31,015[0m
[32m2025-06-19 02:07:37.416[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 31,015 are still greater than expected, keep removing...[0m




Randomly removing 303 users - Round 123 started


[32m2025-06-19 02:07:37.998[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 123: num_users=30,391[0m
[32m2025-06-19 02:07:37.999[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 30,391 are still greater than expected, keep removing...[0m




Randomly removing 296 users - Round 124 started


[32m2025-06-19 02:07:38.688[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 124: num_users=29,698[0m
[32m2025-06-19 02:07:38.689[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 29,698 are still greater than expected, keep removing...[0m




Randomly removing 290 users - Round 125 started


[32m2025-06-19 02:07:39.536[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 125: num_users=29,016[0m
[32m2025-06-19 02:07:39.537[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 29,016 are still greater than expected, keep removing...[0m




Randomly removing 282 users - Round 126 started


[32m2025-06-19 02:07:40.820[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 126: num_users=28,200[0m
[32m2025-06-19 02:07:40.820[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 28,200 are still greater than expected, keep removing...[0m




Randomly removing 273 users - Round 127 started


[32m2025-06-19 02:07:41.622[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 127: num_users=27,305[0m
[32m2025-06-19 02:07:41.622[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 27,305 are still greater than expected, keep removing...[0m




Randomly removing 265 users - Round 128 started


[32m2025-06-19 02:07:42.411[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 128: num_users=26,563[0m
[32m2025-06-19 02:07:42.411[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 26,563 are still greater than expected, keep removing...[0m




Randomly removing 258 users - Round 129 started


[32m2025-06-19 02:07:42.924[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 129: num_users=25,832[0m
[32m2025-06-19 02:07:42.925[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 25,832 are still greater than expected, keep removing...[0m




Randomly removing 250 users - Round 130 started


[32m2025-06-19 02:07:43.601[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 130: num_users=25,085[0m
[32m2025-06-19 02:07:43.602[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 25,085 are still greater than expected, keep removing...[0m




Randomly removing 242 users - Round 131 started


[32m2025-06-19 02:07:44.107[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 131: num_users=24,265[0m
[32m2025-06-19 02:07:44.108[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 24,265 are still greater than expected, keep removing...[0m




Randomly removing 235 users - Round 132 started


[32m2025-06-19 02:07:44.608[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 132: num_users=23,520[0m
[32m2025-06-19 02:07:44.608[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 23,520 are still greater than expected, keep removing...[0m




Randomly removing 228 users - Round 133 started


[32m2025-06-19 02:07:45.084[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 133: num_users=22,812[0m
[32m2025-06-19 02:07:45.085[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 22,812 are still greater than expected, keep removing...[0m




Randomly removing 220 users - Round 134 started


[32m2025-06-19 02:07:45.648[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 134: num_users=22,081[0m
[32m2025-06-19 02:07:45.648[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 22,081 are still greater than expected, keep removing...[0m




Randomly removing 213 users - Round 135 started


[32m2025-06-19 02:07:46.189[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 135: num_users=21,398[0m
[32m2025-06-19 02:07:46.190[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 21,398 are still greater than expected, keep removing...[0m




Randomly removing 208 users - Round 136 started


[32m2025-06-19 02:07:46.764[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 136: num_users=20,895[0m
[32m2025-06-19 02:07:46.765[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 20,895 are still greater than expected, keep removing...[0m




Randomly removing 202 users - Round 137 started


[32m2025-06-19 02:07:47.131[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 137: num_users=20,246[0m
[32m2025-06-19 02:07:47.131[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 20,246 are still greater than expected, keep removing...[0m




Randomly removing 196 users - Round 138 started


[32m2025-06-19 02:07:47.860[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 138: num_users=19,695[0m
[32m2025-06-19 02:07:47.860[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 19,695 are still greater than expected, keep removing...[0m




Randomly removing 190 users - Round 139 started


[32m2025-06-19 02:07:48.160[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 139: num_users=19,020[0m
[32m2025-06-19 02:07:48.160[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 19,020 are still greater than expected, keep removing...[0m




Randomly removing 184 users - Round 140 started


[32m2025-06-19 02:07:48.571[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 140: num_users=18,475[0m
[32m2025-06-19 02:07:48.571[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 18,475 are still greater than expected, keep removing...[0m




Randomly removing 179 users - Round 141 started


[32m2025-06-19 02:07:48.892[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 141: num_users=17,953[0m
[32m2025-06-19 02:07:48.893[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 17,953 are still greater than expected, keep removing...[0m




Randomly removing 175 users - Round 142 started


[32m2025-06-19 02:07:49.199[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 142: num_users=17,507[0m
[32m2025-06-19 02:07:49.200[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 17,507 are still greater than expected, keep removing...[0m




Randomly removing 170 users - Round 143 started


[32m2025-06-19 02:07:49.622[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 143: num_users=17,028[0m
[32m2025-06-19 02:07:49.622[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 17,028 are still greater than expected, keep removing...[0m




Randomly removing 164 users - Round 144 started


[32m2025-06-19 02:07:49.859[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 144: num_users=16,499[0m
[32m2025-06-19 02:07:49.859[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 16,499 are still greater than expected, keep removing...[0m




Randomly removing 161 users - Round 145 started


[32m2025-06-19 02:07:50.249[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 145: num_users=16,139[0m
[32m2025-06-19 02:07:50.250[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 16,139 are still greater than expected, keep removing...[0m




Randomly removing 154 users - Round 146 started


[32m2025-06-19 02:07:50.676[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 146: num_users=15,485[0m
[32m2025-06-19 02:07:50.677[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 15,485 are still greater than expected, keep removing...[0m




Randomly removing 149 users - Round 147 started


[32m2025-06-19 02:07:51.238[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 147: num_users=14,963[0m
[32m2025-06-19 02:07:51.238[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 14,963 are still greater than expected, keep removing...[0m




Randomly removing 143 users - Round 148 started


[32m2025-06-19 02:07:51.592[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 148: num_users=14,384[0m
[32m2025-06-19 02:07:51.593[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 14,384 are still greater than expected, keep removing...[0m




Randomly removing 140 users - Round 149 started


[32m2025-06-19 02:07:52.077[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 149: num_users=14,037[0m
[32m2025-06-19 02:07:52.078[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 14,037 are still greater than expected, keep removing...[0m




Randomly removing 134 users - Round 150 started


[32m2025-06-19 02:07:52.466[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 150: num_users=13,433[0m
[32m2025-06-19 02:07:52.466[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 13,433 are still greater than expected, keep removing...[0m




Randomly removing 128 users - Round 151 started


[32m2025-06-19 02:07:52.759[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m119[0m - [1mAfter randomly removing users - round 151: num_users=12,873[0m
[32m2025-06-19 02:07:52.760[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m123[0m - [1mNumber of users 12,873 are still greater than expected, keep removing...[0m
[32m2025-06-19 02:07:52.775[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m140[0m - [1mNumber of val_df records 966 are falling below expected threshold, stop and use `sample_df` as final output...[0m
[32m2025-06-19 02:07:52.781[0m | [1mINFO    [0m | [36msrc.sample[0m:[36msample[0m:[36m149[0m - [1mlen(sample_users)=12,409 len(sample_items)=4,143[0m


In [9]:
train_sample_df = sample_df.copy()
sample_users = train_sample_df[args.user_col].unique()
sample_items = train_sample_df[args.item_col].unique()
logger.info(f"{len(sample_users)=} {len(sample_items)=}")

val_sample_df = val_raw.loc[
    lambda df: df[args.user_col].isin(sample_users)
    & df[args.item_col].isin(sample_items)
]

train_items = train_sample_df[args.item_col].unique()
train_users = train_sample_df[args.user_col].unique()
val_items = val_sample_df[args.item_col].unique()
val_users = val_sample_df[args.user_col].unique()
logger.info(f"{len(train_items)=}, {len(train_users)=}")
logger.info(f"{len(val_items)=}, {len(val_users)=}")
val_users_in_train = set(val_users).intersection(set(train_users))
val_items_in_train = set(val_items).intersection(set(train_items))
logger.info(
    f"Percentage of val users in train: {len(val_users_in_train) / len(val_users):,.0%}"
)
logger.info(
    f"Percentage of val items in train: {len(val_items_in_train) / len(val_items):,.0%}"
)
logger.info(
    f"Sparsity: {1 - len(train_sample_df) / (len(train_items) * len(train_users)):,.4%}"
)

[32m2025-06-19 02:07:52.793[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mlen(sample_users)=12409 len(sample_items)=4143[0m
[32m2025-06-19 02:07:52.818[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m15[0m - [1mlen(train_items)=4143, len(train_users)=12409[0m
[32m2025-06-19 02:07:52.819[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m16[0m - [1mlen(val_items)=651, len(val_users)=644[0m
[32m2025-06-19 02:07:52.820[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mPercentage of val users in train: 100%[0m
[32m2025-06-19 02:07:52.820[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m22[0m - [1mPercentage of val items in train: 100%[0m
[32m2025-06-19 02:07:52.820[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m25[0m - [1mSparsity: 99.8236%[0m


In [10]:
assert train_sample_df.groupby(args.user_col).size().min() >= args.min_user_interactions
assert train_sample_df.groupby(args.item_col).size().min() >= args.min_item_interactions

In [11]:
train_sample_df.describe(include="all").T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
user_id,90700.0,12409.0,AGTM6TECFNENFVXRGNVCFUYVL22A,87.0,,,,,,,
parent_asin,90700.0,4143.0,B0BW3QTWJJ,437.0,,,,,,,
rating,90700.0,,,,4.554344,0.928609,1.0,5.0,5.0,5.0,5.0
timestamp,90700.0,,,,1490914780140.19,68577577958.46466,1036400058000.0,1442153383000.0,1485965216500.0,1546654323543.5,1628640736369.0


## Persist sample

In [13]:
train_sample_df.to_parquet("../data/train.parquet")
val_sample_df.to_parquet("../data/val.parquet")

In [14]:
from datasets import load_dataset

metadata_raw = load_dataset(
    "McAuley-Lab/Amazon-Reviews-2023", "raw_meta_Toys_and_Games", trust_remote_code=True
)
metadata_raw_df = metadata_raw["full"].to_pandas()
metadata_raw_df.to_parquet("../data/raw_meta.parquet")