In [48]:
import httpx
import orjson
import pandas as pd

SEED = 42
URL = "https://file.notion.so/f/f/081b116f-dee6-47da-9e96-9ed78637864d/7e88d041-9f9c-4962-81e5-ef94cf465657/homework_data_v2.json?table=block&id=2e9a9966-b0b2-8086-ad5f-ef783de9f165&spaceId=081b116f-dee6-47da-9e96-9ed78637864d&expirationTimestamp=1769383809697&signature=KrWjCqxEcRIDDIlY4w41mYLhZzXSjljCiRunzQWk_IA&downloadName=homework_data_v2.json"

### Load

In [76]:
def load_json(url: str) -> list[dict]:
    rows = []
    with httpx.Client(timeout=30.0) as client, client.stream(method="GET", url=url) as response:
        response.raise_for_status()
        for line in response.iter_lines():
            if not line:
                continue
            row = orjson.loads(line)
            rows.append(row)
    return rows


rows = load_json(url=URL)

len(rows)

28830

### Prepare

In [77]:
orders = pd.json_normalize(rows, sep=".")

orders.shape

(28830, 20)

In [None]:
orders_long = orders.explode("its", ignore_index=True)

its_cols = pd.json_normalize(orders_long["its"]).add_prefix("its.")
items = pd.concat(
    [
        orders_long.drop(columns=["its"]),
        its_cols,
    ],
    axis=1,
)

items.shape

(29231, 23)

In [73]:
items["_id"] = items["_id.exp"] + items["_id.o"]

items["im.rev.price"] = items["im.rev.price"].div(1_000_000)
items["fm.rev.price"] = items["fm.rev.price"].div(1_000_000)

items["im.t"] = pd.to_datetime(items["im.t"])
items["fm.t"] = pd.to_datetime(items["fm.t"])

In [74]:
items.dtypes

_id.exp                               str
_id.o                                 str
fm.ch                                 str
fm.pid                                str
fm.pw                             float64
fm.rev.price                      float64
fm.st                                 str
fm.t                  datetime64[us, UTC]
fm.w                              float64
im.ch                                 str
im.pid                                str
im.rev.price                      float64
im.st                                 str
im.t                  datetime64[us, UTC]
im.w                              float64
rcv.city                              str
rcv.countryCode                       str
rcv.id                                str
rcv.zipCode                           str
its.dangerousKinds                 object
its.externalId                        str
its.qty                             int64
its.uw                            float64
_id                               

### Explore

In [None]:
# всего уникальных заказов
items["_id.o"].nunique()

15029

In [None]:
# число заказов в группах эксперимента
items.groupby("_id.exp")["_id.o"].nunique()

_id.exp
russia-12-25-baseline-v2                15029
russia-12-25-tariff-change-no-dpx-v4    13801
Name: _id.o, dtype: int64

In [None]:
# число товаров в группах эксперимента
items.groupby("_id.exp")["its.externalId"].count()

_id.exp
russia-12-25-baseline-v2                15238
russia-12-25-tariff-change-no-dpx-v4    13993
Name: its.externalId, dtype: int64

In [None]:
# всего уникальных пользователей
items["rcv.id"].nunique()

4253

In [None]:
# число пользователей в группах эксперимента
items.groupby("_id.exp")["rcv.id"].nunique()

_id.exp
russia-12-25-baseline-v2                4253
russia-12-25-tariff-change-no-dpx-v4    3973
Name: rcv.id, dtype: int64

In [98]:
items.sample(5, random_state=SEED)

Unnamed: 0,_id.exp,_id.o,fm.ch,fm.pid,fm.pw,fm.rev.price,fm.st,fm.t,fm.w,im.ch,...,im.t,im.w,rcv.city,rcv.countryCode,rcv.id,rcv.zipCode,its.dangerousKinds,its.externalId,its.qty,its.uw
16641,russia-12-25-tariff-change-no-dpx-v4,9ON641582V,ChinaPost-CN-RU-RM-EP-ROAD-D,6941586a7cd01083cd976ebb,1.572,17643171.36,RM,2025-11-22T00:00:00.000Z,0.154,ChinaPost-CN-RU-RM-EP-ROAD-D,...,2025-11-15T11:35:08.609Z,0.147,Верхняя Тура,RU,66e2460e39ac8a3531a774bb,624320,[0],38XQDV7D,1,0.147
963,russia-12-25-baseline-v2,9X4MWM4WO9,DPX-CN-RU-RM-epacket-PS,6941184a0b883dcc76c2a700,1.268,9701566.56,RM,2025-11-22T00:00:00.000Z,0.039,DPX-CN-RU-RM-PS,...,2025-11-20T21:53:31.801Z,0.041,Уфа.,RU,666d3fe104f01a355d0c0f07,450112,[0],WDW3JLD8,1,0.041
1087,russia-12-25-baseline-v2,LZ1OPZOOZV,ChinaPost-CN-RU-RM-EP-ROAD-D,69411898bd21de0068b6e950,1.05,33820707.36,RM,2025-11-22T00:00:00.000Z,0.29,ChinaPost-CN-RU-RM-EP-ROAD-D,...,2025-11-21T07:25:31.661Z,0.291,село Горячие Ключи,RU,672cbd362f02a035f2f51152,694534,[0],38L8P5XL,1,0.281
23272,russia-12-25-tariff-change-no-dpx-v4,V8M8055R7V,ChinaPost-CN-RU-RM-EP-ROAD-D,694168265345d6f755fd18ab,0.193,9519539.52,RM,2025-10-25T06:32:40.188Z,0.03,ChinaPost-CN-RU-RM-EP-ROAD-D,...,2025-10-23T12:28:59.493Z,0.032,Челябинск,RU,66c4e1131e1f4c353a1e82a5,454119,[0],LNV2VYGE,1,0.032
21948,russia-12-25-tariff-change-no-dpx-v4,L1PZ25W42V,ChinaPost-CN-RU-RM-EP-ROAD-D,694164d3756c15589d33cd93,0.769,28260676.32,RM,2025-11-01T05:23:53.616Z,0.218,ChinaPost-CN-RU-RM-EP-ROAD-D,...,2025-10-26T09:19:12.580Z,0.17,Москва,RU,6803941023f751354f4b4567,115372,[0],6JNJ67L5,1,0.17
