In [38]:
import pandas as pd
import os
import json
import xgboost as xgb
import numpy as np
from sklearn import metrics

In [39]:
def read_parquet(data_dir):
    df = []
    for file in os.listdir(data_dir):
        if not file.endswith(".parquet"):
            continue
        df.append(pd.read_parquet(data_dir + "/" + file, engine="pyarrow", use_threads=True))
    df = pd.concat(df).reset_index(drop=True)
    return df

In [40]:
embs = read_parquet(data_dir="emb_dump")
embs["user_embedding2"] = embs.apply(lambda row: json.loads(row["user_embedding"]) if row["user_embedding"] else [0.0] * 32, axis=1)
embs["creator_embedding2"] = embs.apply(lambda row: json.loads(row["creator_embedding"]) if row["creator_embedding"] else [0.0] * 32, axis=1)
embs, embs.dtypes

(             user_id                                     user_embedding   
 0        31463714121  [-0.9922972917556763, 0.6494506597518921, 0.74...  \
 1        33111718131  [-0.35345447063446045, 0.22865581512451172, 0....   
 2        77592817701  [-0.12684990465641022, -0.6259598731994629, -0...   
 3        79749557531  [0.6071574687957764, 0.4647164046764374, 0.164...   
 4        16036107631  [-0.22589842975139618, 0.5515395998954773, 0.3...   
 ...              ...                                                ...   
 1695693  30464369601                                               None   
 1695694  19742278011                                               None   
 1695695  38700470401                                               None   
 1695696  57688015001                                               None   
 1695697  70927656911                                               None   
 
                                          creator_embedding version   
 0             

In [67]:
dt = read_parquet(data_dir ="sample_data2")
dt = dt[dt["hostId"] != ""]
dt['userId'] = dt['userId'].astype(int)
dt['hostId'] = dt['hostId'].astype(int)

In [68]:
dt["variant"]

0           control
1           control
2         variant-5
3         variant-5
4         variant-2
            ...    
281553    variant-5
281554    variant-5
281555    variant-5
281556    variant-5
281557    variant-5
Name: variant, Length: 281553, dtype: object

In [69]:
dt = dt[dt["variant"] == "variant-5"]
dt["variant"]

2         variant-5
3         variant-5
8         variant-5
24        variant-5
26        variant-5
            ...    
281553    variant-5
281554    variant-5
281555    variant-5
281556    variant-5
281557    variant-5
Name: variant, Length: 57116, dtype: object

In [70]:
dt2 = dt.merge(embs[["user_id", "user_embedding", "user_embedding2"]], left_on='userId', right_on="user_id", how='left')
dt3 = dt2.merge(embs[["user_id", "creator_embedding", "creator_embedding2"]], left_on='hostId', right_on="user_id", how='left')

In [71]:
dt3['v3_dot'] = dt3.apply(lambda row: np.dot(row['user_embedding2'] if not np.isnan(row['user_embedding2']).any() else [0.0] * 32,
                                            row['creator_embedding2'] if not np.isnan(row['creator_embedding2']).any() else [0.0] * 32),
                         axis=1)

In [72]:
df = dt3

In [73]:
np.mean(df["live-popularity-embeddings-affinity-v3"] != 0)

0.27885356117375165

In [74]:
np.mean(df["v3_dot"] != 0)

0.3145178233769872

In [75]:
eval_df = df[df["time"] >= "2023-05-21"]
train_df = df[df["time"] < "2023-05-21"]
eval_df.shape, train_df.shape

((27859, 218), (29257, 218))

In [76]:
list(df.columns)

['livestreamId',
 'userId',
 'time',
 'name',
 'tenant',
 'eventsPlatformSource',
 'eventUuid',
 'ntp_eventRecordTime_ms',
 'ntp_eventDispatchTime_ms',
 'serverTime_ms',
 'eventIngestionTime_ms',
 'requestId',
 'score',
 'meta',
 'like_2m',
 'like_4m',
 'like_8m',
 'like_16m',
 'like_30m',
 'like_1d',
 'dt_liker_2m',
 'dt_liker_4m',
 'dt_liker_8m',
 'dt_liker_16m',
 'dt_liker_30m',
 'dt_liker_1d',
 'cheer_spent_2m',
 'cheer_spent_4m',
 'cheer_spent_8m',
 'cheer_spent_16m',
 'cheer_spent_30m',
 'cheer_spent_1d',
 'comment_2m',
 'comment_4m',
 'comment_8m',
 'comment_16m',
 'comment_30m',
 'comment_1d',
 'dt_commenter_2m',
 'dt_commenter_4m',
 'dt_commenter_8m',
 'dt_commenter_16m',
 'dt_commenter_30m',
 'dt_commenter_1d',
 'cohost_req_2m',
 'cohost_req_4m',
 'cohost_req_8m',
 'cohost_req_16m',
 'cohost_req_30m',
 'cohost_req_1d',
 'dt_cohost_req_2m',
 'dt_cohost_req_4m',
 'dt_cohost_req_8m',
 'dt_cohost_req_16m',
 'dt_cohost_req_30m',
 'dt_cohost_req_1d',
 'gifts_2m',
 'gifts_4m',
 'gif

In [60]:
label_cols = ["label"]
input_cols = [
    "like_2m",
    "like_4m",
    "like_8m",
    "like_16m",
    "like_30m",
    "like_1d",
    "dt_liker_2m",
    "dt_liker_4m",
    "dt_liker_8m",
    "dt_liker_16m",
    "dt_liker_30m",
    "dt_liker_1d",
    "cheer_spent_2m",
    "cheer_spent_4m",
    "cheer_spent_8m",
    "cheer_spent_16m",
    "cheer_spent_30m",
    "cheer_spent_1d",
    "comment_2m",
    "comment_4m",
    "comment_8m",
    "comment_16m",
    "comment_30m",
    "comment_1d",
    "dt_commenter_2m",
    "dt_commenter_4m",
    "dt_commenter_8m",
    "dt_commenter_16m",
    "dt_commenter_30m",
    "dt_commenter_1d",
    "cohost_req_2m",
    "cohost_req_4m",
    "cohost_req_8m",
    "cohost_req_16m",
    "cohost_req_30m",
    "cohost_req_1d",
    "dt_cohost_req_2m",
    "dt_cohost_req_4m",
    "dt_cohost_req_8m",
    "dt_cohost_req_16m",
    "dt_cohost_req_30m",
    "dt_cohost_req_1d",
    "gifts_2m",
    "gifts_4m",
    "gifts_8m",
    "gifts_16m",
    "gifts_30m",
    "gifts_1d",
    "dt_gifter_2m",
    "dt_gifter_4m",
    "dt_gifter_8m",
    "dt_gifter_16m",
    "dt_gifter_30m",
    "dt_gifter_1d",
    "share_2m",
    "share_4m",
    "share_8m",
    "share_16m",
    "share_30m",
    "share_1d",
    "dt_shares_2m",
    "dt_shares_4m",
    "dt_shares_8m",
    "dt_shares_16m",
    "dt_shares_30m",
    "dt_shares_1d",
    "follow_2m",
    "follow_4m",
    "follow_8m",
    "follow_16m",
    "follow_30m",
    "follow_1d",
    "act_vc",
    "agg_vc",
    "avg_ts",
    "live-cross-feature-user-timespent-gifting-affinity-model-v1",
    "live-promotion_retool_Planned_Live",
    "live-cross-feature-affinity-model-v2",
    "live-similar-creator-affinity",
    "live-short-video-creator-affinity",
    "live-promotion_retool_Celeb_Live",
    "live-min-views-agency",
    "live-promotion_backend_super_power_creator",
    "live-promotion_retool_Creator_Push",
    "live-promotion_retool_Good_Content",
    "live-min-views",
    "live-popular",
    "live-popularity-embeddings-affinity-v3",
    'userHost_likes_15m',
 'userHost_likes_1h',
 'userHost_likes_6h',
 'userHost_likes_1d',
 'userHost_likes_7d',
 'userHost_likes_30d',
 'userHost_cheersSpent_15m',
 'userHost_cheersSpent_1h',
 'userHost_cheersSpent_6h',
 'userHost_cheersSpent_1d',
 'userHost_cheersSpent_7d',
 'userHost_cheersSpent_30d',
 'userHost_gifts_15m',
 'userHost_gifts_1h',
 'userHost_gifts_6h',
 'userHost_gifts_1d',
 'userHost_gifts_7d',
 'userHost_gifts_30d',
 'userHost_cohostRequests_15m',
 'userHost_cohostRequests_1h',
 'userHost_cohostRequests_6h',
 'userHost_cohostRequests_1d',
 'userHost_cohostRequests_7d',
 'userHost_cohostRequests_30d',
 'userHost_comments_15m',
 'userHost_comments_1h',
 'userHost_comments_6h',
 'userHost_comments_1d',
 'userHost_comments_7d',
 'userHost_comments_30d',
 'userHost_shares_15m',
 'userHost_shares_1h',
 'userHost_shares_6h',
 'userHost_shares_1d',
 'userHost_shares_7d',
 'userHost_shares_30d',
 'userHost_follows_15m',
 'userHost_follows_1h',
 'userHost_follows_6h',
 'userHost_follows_1d',
 'userHost_follows_7d',
 'userHost_follows_30d',
 'userHost_dt_like_6h',
 'userHost_dt_like_1d',
 'userHost_dt_like_7d',
 'userHost_dt_like_30d',
 'userHost_dt_liked_live_6h',
 'userHost_dt_liked_live_1d',
 'userHost_dt_liked_live_7d',
 'userHost_dt_liked_live_30d',
 'userHost_dt_gifted_live_6h',
 'userHost_dt_gifted_live_1d',
 'userHost_dt_gifted_live_7d',
 'userHost_dt_gifted_live_30d',
 'userHost_dt_cohost_req_live_6h',
 'userHost_dt_cohost_req_live_1d',
 'userHost_dt_cohost_req_live_7d',
 'userHost_dt_cohost_req_live_30d',
 'userHost_dt_commented_live_6h',
 'userHost_dt_commented_live_1d',
 'userHost_dt_commented_live_7d',
 'userHost_dt_commented_live_30d',
 'userHost_dt_shared_live_6h',
 'userHost_dt_shared_live_1d',
 'userHost_dt_shared_live_7d',
 'userHost_dt_shared_live_30d',
  ]

In [59]:
input_cols = [
    "like_2m",
    "like_4m",
    "like_8m",
    "like_16m",
    "like_30m",
    "like_1d",
    "dt_liker_2m",
    "dt_liker_4m",
    "dt_liker_8m",
    "dt_liker_16m",
    "dt_liker_30m",
    "dt_liker_1d",
    "cheer_spent_2m",
    "cheer_spent_4m",
    "cheer_spent_8m",
    "cheer_spent_16m",
    "cheer_spent_30m",
    "cheer_spent_1d",
    "comment_2m",
    "comment_4m",
    "comment_8m",
    "comment_16m",
    "comment_30m",
    "comment_1d",
    "dt_commenter_2m",
    "dt_commenter_4m",
    "dt_commenter_8m",
    "dt_commenter_16m",
    "dt_commenter_30m",
    "dt_commenter_1d",
    "cohost_req_2m",
    "cohost_req_4m",
    "cohost_req_8m",
    "cohost_req_16m",
    "cohost_req_30m",
    "cohost_req_1d",
    "dt_cohost_req_2m",
    "dt_cohost_req_4m",
    "dt_cohost_req_8m",
    "dt_cohost_req_16m",
    "dt_cohost_req_30m",
    "dt_cohost_req_1d",
    "gifts_2m",
    "gifts_4m",
    "gifts_8m",
    "gifts_16m",
    "gifts_30m",
    "gifts_1d",
    "dt_gifter_2m",
    "dt_gifter_4m",
    "dt_gifter_8m",
    "dt_gifter_16m",
    "dt_gifter_30m",
    "dt_gifter_1d",
    "share_2m",
    "share_4m",
    "share_8m",
    "share_16m",
    "share_30m",
    "share_1d",
    "dt_shares_2m",
    "dt_shares_4m",
    "dt_shares_8m",
    "dt_shares_16m",
    "dt_shares_30m",
    "dt_shares_1d",
    "follow_2m",
    "follow_4m",
    "follow_8m",
    "follow_16m",
    "follow_30m",
    "follow_1d",
    "act_vc",
    "agg_vc",
    "avg_ts",
    "live-cross-feature-user-timespent-gifting-affinity-model-v1",
    "live-promotion_retool_Planned_Live",
    "live-cross-feature-affinity-model-v2",
    "live-similar-creator-affinity",
    "live-short-video-creator-affinity",
    "live-promotion_retool_Celeb_Live",
    "live-min-views-agency",
    "live-promotion_backend_super_power_creator",
    "live-promotion_retool_Creator_Push",
    "live-promotion_retool_Good_Content",
    "live-min-views",
    "live-popular",
    "live-popularity-embeddings-affinity-v3"
  ]

In [61]:
train_data = train_df[input_cols]
train_labels = train_df[label_cols]
val_data = eval_df[input_cols]
val_labels = eval_df[label_cols]
dtrain = xgb.DMatrix(train_data, train_labels)
dval = xgb.DMatrix(val_data, val_labels)

evallist = [(dtrain, 'train'), (dval, 'eval')]

params = {
    'max_depth': 2,
    'eval_metric': ['auc', 'error', 'logloss'],
    'eta': 0.01,
    'objective': 'binary:logistic',
    }

In [62]:
model = xgb.train(params=params,
                dtrain=dtrain,
                evals = evallist,
                num_boost_round=2000,
                verbose_eval=50000,
                early_stopping_rounds=100,
                # xgb_model = pretrained_model
                )

print(f"model best iteration at {model.best_iteration}, best_score: {model.best_score} and best_ntree_limit: {model.best_ntree_limit} ")
preds = model.predict(dval)
y_true = val_labels["label"].to_numpy()
fpr, tpr, thresholds = metrics.roc_curve(y_true, preds, pos_label=1)
metrics.auc(fpr, tpr)

[0]	train-auc:0.60944	train-error:0.09382	train-logloss:0.68645	eval-auc:0.60201	eval-error:0.10818	eval-logloss:0.68670
[1524]	train-auc:0.73975	train-error:0.09050	train-logloss:0.27833	eval-auc:0.66521	eval-error:0.10735	eval-logloss:0.32261
model best iteration at 1425, best_score: 0.32259583647494205 and best_ntree_limit: 1426 


0.665208013670315

In [36]:
eval_df

Unnamed: 0,livestreamId,userId,time,name,tenant,eventsPlatformSource,eventUuid,ntp_eventRecordTime_ms,ntp_eventDispatchTime_ms,serverTime_ms,...,variant,live-popularity-embeddings-affinity-v3,label,user_id_x,user_embedding,user_embedding2,user_id_y,creator_embedding,creator_embedding2,v3_dot
1,4b965386-1ef0-44f6-bf8f-59dbeadaa3d7,17438212511,2023-05-17 00:56:47,LivestreamFeatures,moj,event-sink-loader,,,,1684285006354,...,control,0.000000,0,,,,3.750642e+10,"[1.3749712705612183, -0.9212243556976318, -1.3...","[1.3749712705612183, -0.9212243556976318, -1.3...",0.000000
8,3d247fc6-0b17-4181-87ab-c6eb9113ae83,4619021905,2023-05-17 13:26:28,LivestreamFeatures,moj,event-sink-loader,,,,1684329987395,...,control,-3.098605,0,4.619022e+09,"[0.4718713164329529, 0.3389163315296173, -0.28...","[0.4718713164329529, 0.3389163315296173, -0.28...",7.168669e+10,"[-0.9237340688705444, 0.22875450551509857, 0.1...","[-0.9237340688705444, 0.22875450551509857, 0.1...",-3.098605
13,9cffd26e-5e0d-4ecf-933c-9635208086b6,50065311201,2023-05-17 11:07:55,LivestreamFeatures,moj,event-sink-loader,,,,1684321675644,...,control,0.000000,0,5.006531e+10,"[-0.6726199388504028, 0.36651548743247986, 0.1...","[-0.6726199388504028, 0.36651548743247986, 0.1...",,,,0.000000
15,2d5de340-6849-446f-be9a-5bbdb1642c43,79982919631,2023-05-16 12:58:46,LivestreamFeatures,moj,event-sink-loader,,,,1684241926108,...,control,4.014164,1,7.998292e+10,"[0.08599864691495895, -0.16756461560726166, 0....","[0.08599864691495895, -0.16756461560726166, 0....",3.832568e+10,"[0.13086552917957306, -0.24801531434059143, 0....","[0.13086552917957306, -0.24801531434059143, 0....",4.014164
16,3dd5d2ca-d86e-4039-9075-16077d70989b,14392882831,2023-05-16 15:26:46,LivestreamFeatures,moj,event-sink-loader,,,,1684250796433,...,control,0.000000,0,,,,9.394881e+10,"[-0.19630391895771027, -0.3512236475944519, -1...","[-0.19630391895771027, -0.3512236475944519, -1...",0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258800,19009324-8f19-452e-b0ba-08cc8e179555,75496845621,2023-05-17 09:47:05,LivestreamFeatures,moj,event-sink-loader,,,,1684316824333,...,control,0.000000,0,,,,9.393477e+10,,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.000000
258823,df2c02d6-47ce-4bac-b700-8204ba2efdfe,35652050731,2023-05-16 09:37:06,LivestreamFeatures,moj,event-sink-loader,,,,1684229824719,...,control,0.000000,1,,,,9.393477e+10,,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.000000
258824,df2c02d6-47ce-4bac-b700-8204ba2efdfe,15728738431,2023-05-16 09:36:51,LivestreamFeatures,moj,event-sink-loader,,,,1684229811260,...,control,0.000000,1,,,,9.393477e+10,,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.000000
258825,df2c02d6-47ce-4bac-b700-8204ba2efdfe,30090900101,2023-05-16 09:36:12,LivestreamFeatures,moj,event-sink-loader,,,,1684229770914,...,control,0.000000,1,3.009090e+10,"[0.16216345131397247, 0.4492245614528656, 0.93...","[0.16216345131397247, 0.4492245614528656, 0.93...",9.393477e+10,,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.000000


In [37]:
np.mean(eval_df["live-cross-feature-affinity-model-v2"] > 0)

0.106765076639202

In [386]:
np.mean(eval_df["v3_dot"] > 0)

0.026070117105431428

In [382]:
# v3 auc
fpr, tpr, thresholds = metrics.roc_curve(y_true, eval_df["live-popularity-embeddings-affinity-v3"], pos_label=1)
metrics.auc(fpr, tpr)

0.5351080210558218

In [391]:
eval_df["v3_dot"]

1         0.000000
8        -3.098605
13        0.000000
15        4.014164
16        0.000000
            ...   
258800    0.000000
258823    0.000000
258824    0.000000
258825    0.000000
258849    0.000000
Name: v3_dot, Length: 109474, dtype: float64

In [390]:
# v3 auc
fpr, tpr, thresholds = metrics.roc_curve(y_true, eval_df["v3_dot"], pos_label=1)
metrics.auc(fpr, tpr)

0.534258366329391

In [385]:
eval_df2 = eval_df[eval_df["live-popularity-embeddings-affinity-v3"] != 0]
val_data2 = eval_df2[input_cols]
val_labels2 = eval_df2[label_cols]
dval2 = xgb.DMatrix(val_data2, val_labels2)
preds = model.predict(dval2)
fpr, tpr, thresholds = metrics.roc_curve(val_labels2["label"], eval_df2["live-popularity-embeddings-affinity-v3"], pos_label=1)
metrics.auc(fpr, tpr)

0.6229589461477802

In [None]:
fpr, tpr, thresholds = metrics.roc_curve(val_labels2["label"], preds, pos_label=1)
metrics.auc(fpr, tpr)

In [None]:
np.mean(eval_df["live-popularity-embeddings-affinity-v3"] != 0)

In [348]:
np.mean(eval_df["live-popular"] != 0)

0.9056122915030053

In [349]:
eval_df3 = eval_df[eval_df["live-popular"] != 0]
np.mean(eval_df3["live-popularity-embeddings-affinity-v3"] != 0)

0.26923270897005275

In [362]:
fpr, tpr, thresholds = metrics.roc_curve(val_labels2["label"], eval_df2["v3_dot"], pos_label=1)
metrics.auc(fpr, tpr)

0.6229589461477802

In [358]:
eval_df2["live-popularity-embeddings-affinity-v3"]

8        -3.098605
15        4.014164
30       -4.325688
37       -2.537180
41       -2.811369
            ...   
258448   -5.757068
258449   -5.757068
258534   -2.528187
258562   -5.693274
258682   -1.615251
Name: live-popularity-embeddings-affinity-v3, Length: 26786, dtype: float64

In [359]:
eval_df2["v3_dot"]

8        -3.098605
15        4.014164
30       -4.325688
37       -2.537180
41       -2.811369
            ...   
258448   -5.757068
258449   -5.757068
258534   -2.528187
258562   -5.693274
258682   -1.615251
Name: v3_dot, Length: 26786, dtype: float64

In [363]:
fpr, tpr, thresholds = metrics.roc_curve(val_labels2["label"], eval_df2["live-popularity-embeddings-affinity-v3"], pos_label=1)
metrics.auc(fpr, tpr)

0.6229589461477802

In [83]:
dt = df[df["meta"] == "live-fine-sort"]
dt["score"] = dt["score"].apply(float)
dt

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dt["score"] = dt["score"].apply(float)


Unnamed: 0,livestreamId,userId,time,name,tenant,eventsPlatformSource,eventUuid,ntp_eventRecordTime_ms,ntp_eventDispatchTime_ms,serverTime_ms,...,sv_user_creator_notinterested_7d,sv_user_creator_notinterested_unbounded,label,user_id_x,user_embedding,user_embedding2,user_id_y,creator_embedding,creator_embedding2,v3_dot
0,95e048b1-effb-49da-aa9b-060a2aa69475,12987661601,2023-05-20 03:22:32,LivestreamFeatures,moj,event-sink-loader,,,,1684552951416,...,,,0,1.298766e+10,"[0.6703124046325684, -0.1444208174943924, -0.8...","[0.6703124046325684, -0.1444208174943924, -0.8...",4.580289e+08,"[-0.31363025307655334, -0.03835073485970497, 0...","[-0.31363025307655334, -0.03835073485970497, 0...",-0.942957
1,51c4d820-da05-42e8-9427-70dab754ddc6,12987661601,2023-05-20 10:31:22,LivestreamFeatures,moj,event-sink-loader,,,,1684578682109,...,,,0,1.298766e+10,"[0.6703124046325684, -0.1444208174943924, -0.8...","[0.6703124046325684, -0.1444208174943924, -0.8...",9.905922e+10,,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.000000
3,b097560d-a76b-4a0e-855c-3022120c68b6,97797766821,2023-05-21 06:39:22,LivestreamFeatures,moj,event-sink-loader,,,,1684651161667,...,,,0,,,,9.429336e+10,"[-0.27564990520477295, -1.8270213603973389, 0....","[-0.27564990520477295, -1.8270213603973389, 0....",0.000000
8,d6b023ff-34b3-46b9-8eb0-8f804981e559,93263497731,2023-05-21 18:48:54,LivestreamFeatures,moj,event-sink-loader,,,,1684694933288,...,,,0,9.326350e+10,"[-0.4969100058078766, -0.05652187764644623, 0....","[-0.4969100058078766, -0.05652187764644623, 0....",7.058556e+10,,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.000000
10,f6c8e9fc-ae23-4f53-9ad4-cfd291d69e7c,13843581531,2023-05-21 02:29:54,LivestreamFeatures,moj,event-sink-loader,,,,1684636193941,...,,,0,,,,9.759070e+10,"[0.33914563059806824, -0.44347870349884033, -0...","[0.33914563059806824, -0.44347870349884033, -0...",0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57111,cd08607c-572c-4ffd-a1fc-c7d1216a3e7a,54845094001,2023-05-20 06:11:34,LivestreamFeatures,moj,event-sink-loader,,,,1684563094372,...,,,0,,,,1.084528e+10,"[0.5981948375701904, -0.2964537739753723, 0.82...","[0.5981948375701904, -0.2964537739753723, 0.82...",0.000000
57112,444fc634-463a-468d-b60e-192562155669,4600331585,2023-05-20 02:06:30,LivestreamFeatures,moj,event-sink-loader,,,,1684548389288,...,,,0,,,,7.339085e+10,"[0.3958539068698883, -0.8178473114967346, 0.51...","[0.3958539068698883, -0.8178473114967346, 0.51...",0.000000
57113,444fc634-463a-468d-b60e-192562155669,14521477401,2023-05-20 02:07:41,LivestreamFeatures,moj,event-sink-loader,,,,1684548460719,...,,,0,1.452148e+10,"[-0.3460037112236023, 0.8110354542732239, 0.08...","[-0.3460037112236023, 0.8110354542732239, 0.08...",7.339085e+10,"[0.3958539068698883, -0.8178473114967346, 0.51...","[0.3958539068698883, -0.8178473114967346, 0.51...",-1.608339
57114,444fc634-463a-468d-b60e-192562155669,14521477401,2023-05-20 02:07:54,LivestreamFeatures,moj,event-sink-loader,,,,1684548473583,...,,,0,1.452148e+10,"[-0.3460037112236023, 0.8110354542732239, 0.08...","[-0.3460037112236023, 0.8110354542732239, 0.08...",7.339085e+10,"[0.3958539068698883, -0.8178473114967346, 0.51...","[0.3958539068698883, -0.8178473114967346, 0.51...",-1.608339


In [85]:
fpr, tpr, thresholds = metrics.roc_curve(dt["label"], dt["score"], pos_label=1)
metrics.auc(fpr, tpr)

0.6807917950835125

In [84]:
dt["score"][0]

0.07605046778917313