In [None]:
import sys

from pyspark.sql import Window, functions as F
from pyspark.ml.feature import QuantileDiscretizer
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [None]:
import metaspore as ms

spark_confs={
        "spark.network.timeout":"500",
        "spark.sql.codegen.wholeStage": "false"
    }

spark_session = ms.spark.get_session(local=True,
                             app_name='soc-pokec Demo',
                             batch_size=256,
                             worker_count=2,
                             server_count=2,
                             worker_memory='10G',
                             server_memory='10G',
                             coordinator_memory='10G',
                             spark_confs=spark_confs)

In [None]:
!aws s3 ls s3://dmetasoul-bucket/demo/datasets/soc-pokec/

In [None]:
from pyspark.sql.types import StructType, StructField, IntegerType, FloatType, LongType, StringType
import pyspark.sql.functions as F

profile_colunm_names = ['user_id', 'public', 'completion_percentage', 'gender', 'region', 'last_login', 'registration',
                'AGE', 'body', 'I_am_working_in_field', 'spoken_languages', 'hobbies', 'I_most_enjoy_good_food',
                'pets', 'body_type', 'my_eyesight', 'eye_color', 'hair_color', 'hair_type', 'completed_level_of_education',
                'favourite_color', 'relation_to_smoking', 'relation_to_alcohol', 'sign_in_zodiac',
                'on_pokec_i_am_looking_for', 'love_is_for_me', 'relation_to_casual_sex', 'my_partner_should_be',
                'marital_status', 'children', 'relation_to_children', 'I_like_movies', 'I_like_watching_movie',
                'I_like_music', 'I_mostly_like_listening_to_music', 'the_idea_of_good_evening', 'I_like_specialties_from_kitchen',
                'fun', 'I_am_going_to_concerts', 'my_active_sports', 'my_passive_sports', 'profession', 'I_like_books',
                'life_style', 'music', 'cars', 'politics', 'relationships', 'art_culture', 'hobbies_interests',
                'science_technologies', 'computers_internet', 'education', 'sport', 'movies', 'travelling', 'health',
                'companies_brands', 'more']
relationship_colunm_names = ['user_id', 'friend_id']

profile_schema = StructType([StructField(cn, StringType(), True) for cn in profile_colunm_names])
relationship_schema = StructType([StructField(cn, LongType(), True) for cn in relationship_colunm_names])

profile_dataset = spark_session.read.csv('s3://dmetasoul-bucket/demo/datasets/soc-pokec/soc-pokec-profiles.txt', sep='\t', schema=profile_schema, header=False, inferSchema=False)
relationship_dataset = spark_session.read.csv('s3://dmetasoul-bucket/demo/datasets/soc-pokec/soc-pokec-relationships.txt', sep='\t', schema=relationship_schema, header=False, inferSchema=False)

profile_dataset = profile_dataset.withColumn('user_id', F.col('user_id').cast(LongType()))
profile_dataset = profile_dataset.orderBy(F.col('user_id')).limit(16000)
max_user_id = profile_dataset.agg({"user_id": "max"}).collect()[0]['max(user_id)']
relationship_dataset = relationship_dataset.filter((F.col('user_id') <= max_user_id) & (F.col('friend_id') <= max_user_id))

profile_dataset = profile_dataset.withColumn('user_id', F.col('user_id').cast(StringType()))
relationship_dataset = relationship_dataset.withColumn('user_id', F.col('user_id').cast(StringType()))
relationship_dataset = relationship_dataset.withColumn('friend_id', F.col('friend_id').cast(StringType()))

profile_dataset.cache()
relationship_dataset.cache()


In [None]:
profile_dataset.limit(10).toPandas()

In [None]:
relationship_dataset.limit(10).toPandas()

In [None]:
# relationship_df = relationship_dataset.groupby(F.col('user_id')).agg(F.collect_set(F.col('friend_id')).alias('friends'))
# relationship_df.limit(10).toPandas()

# profile_df = profile_dataset.join(relationship_df, on=profile_dataset.user_id==relationship_df.user_id, how='leftouter').drop(relationship_df.user_id)
# profile_df.limit(10).toPandas()


In [None]:
relationship_df = relationship_dataset.alias('t1').join(profile_dataset.alias('t2'), on=F.col('t1.user_id')==F.col('t2.user_id'), how='leftouter') \
                .select(F.col('t1.*'),
                        F.col('t2.gender').alias('user_gender'),
                        F.col('t2.AGE').alias('user_age'),
                        F.col('t2.completion_percentage').alias('user_completion_percentage'))

relationship_df = relationship_df.alias('t1').join(profile_dataset.alias('t2'), on=F.col('t1.friend_id')==F.col('t2.user_id'), how='leftouter') \
                .select(F.col('t1.*'),
                        F.col('t2.gender').alias('friend_gender'),
                        F.col('t2.AGE').alias('friend_age'),
                        F.col('t2.completion_percentage').alias('friend_completion_percentage'))


In [None]:
relationship_df.printSchema()
relationship_df.show(5)

In [None]:
relationship_df = relationship_df.select(F.lit('1').alias('label'), '*')
relationship_df.show(5)

In [None]:
splits = relationship_df.randomSplit([0.9, 0.1], 24)
train_dataset, test_dataset = splits[0], splits[1]

print('train dataset count: ', train_dataset.count())
print('test dataset count: ', test_dataset.count())

In [None]:
item_dataset = (
    relationship_df
    .withColumn('rn', F.row_number().over(
        Window.partitionBy('friend_id').orderBy(F.col('user_id'))
    ))
    .filter('rn == 1')
    .drop(F.col('rn'))
)

item_dataset.limit(10).toPandas()

In [None]:
train_dataset.write.parquet('s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/train_dataset.parquet', mode="overwrite")
test_dataset.write.parquet('s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/test_dataset.parquet', mode="overwrite")
item_dataset.write.parquet('s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/item_dataset.parquet', mode="overwrite")
profile_dataset.write.parquet('s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/profile_dataset.parquet', mode="overwrite")
relationship_dataset.write.parquet('s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/relationship_dataset.parquet', mode="overwrite")

# ItemCF

In [None]:
tigger_df = (
    test_dataset
    .withColumn('rn', F.row_number().over(
        Window.partitionBy('user_id').orderBy(F.col('friend_id')))
        )
    .filter('rn == 1')
    .drop(F.col('rn'))
)

label_df = (
    test_dataset
    .withColumn('rn', F.row_number().over(
        Window.partitionBy('user_id').orderBy(F.col('friend_id')))
        )
    .filter('rn > 1')
    .drop(F.col('rn'))
    .groupby('user_id')
    .agg(F.collect_list('friend_id').alias('label_friends'))
)

test_df = (
    tigger_df.alias('t1').join(label_df.alias('t2'), on=F.col('t1.user_id')==F.col('t2.user_id'), how='rightouter')
    .select(F.col('t1.*'),
            F.col('t2.label_friends'))
)

test_df.limit(10).toPandas()

In [None]:
sys.path.append('/home/spark/work/MetaSpore/') 
from python.algos.item_cf_retrieval import ItemCFEstimator

estimator = ItemCFEstimator(user_id_column_name='user_id',
                            item_id_column_name='friend_id',
                            behavior_column_name='label',
                            behavior_filter_value='1',
                            key_column_name='key',
                            value_column_name='value',
                            max_recommendation_count=20,
                            debug=True)

model = estimator.fit(train_dataset)

In [None]:
prediction_df = model.transform(test_df)
prediction_df = prediction_df.withColumnRenamed('value', 'rec_info')
prediction_df.limit(10).toPandas()

In [None]:
from pyspark.mllib.evaluation import RankingMetrics
prediction_label_rdd = prediction_df.rdd.map(lambda x:(\
                                [xx._1 for xx in x.rec_info] if x.rec_info is not None else [], \
                                 x.label_friends))
recall_metrics = RankingMetrics(prediction_label_rdd)

print("Debug -- Precision@20: ", recall_metrics.precisionAt(20))
print("Debug -- Recall@20: ", recall_metrics.recallAt(20))
print("Debug -- MAP@20: ", recall_metrics.meanAveragePrecisionAt(20))

# Swing

In [None]:
swing_estimator = ms.SwingEstimator(user_id_column_name='user_id',
                            item_id_column_name='friend_id',
                            behavior_column_name='label',
                            behavior_filter_value='1',
                            key_column_name='key',
                            value_column_name='value',
                            use_plain_weight=False,
                            smoothing_coefficient=1.0,
                            max_recommendation_count=20)

swing_model = swing_estimator.fit(train_dataset)

In [None]:
swing_prediction_df = swing_model.transform(test_df)
swing_prediction_df = swing_prediction_df.withColumnRenamed('value', 'rec_info')
swing_prediction_df.limit(10).toPandas()

In [None]:
from pyspark.mllib.evaluation import RankingMetrics
swing_prediction_label_rdd = swing_prediction_df.rdd.map(lambda x:(\
                                [xx._1 for xx in x.rec_info] if x.rec_info is not None else [], \
                                 x.label_friends))
swing_recall_metrics = RankingMetrics(swing_prediction_label_rdd)

print("Debug -- Swing Precision@20: ", swing_recall_metrics.precisionAt(20))
print("Debug -- Swing Recall@20: ", swing_recall_metrics.recallAt(20))
print("Debug -- Swing MAP@20: ", swing_recall_metrics.meanAveragePrecisionAt(20))

# TwoTowers

In [None]:
import yaml
import subprocess
import sys
import metaspore as ms

model_params = dict()
with open('conf/soc_pokec_dssm_inbatch_new.yaml', 'r') as stream:
    model_params = yaml.load(stream, Loader=yaml.FullLoader)
    print('Debug -- load config: ', model_params)

In [None]:
subprocess.run(['zip', '-r', '../MetaSpore/solutions/recommend/offline/social_network/python.zip', 'python'], cwd='../../../../../recommend-algos')
spark_confs={
    "spark.network.timeout":"500",
    "spark.ui.showConsoleProgress": "true",
    "spark.kubernetes.executor.deleteOnTermination":"true",
    "spark.submit.pyFiles":"python.zip",
}
spark = ms.spark.get_session(local=model_params['local'],
                             app_name=model_params['app_name'],
                             batch_size=model_params['batch_size'],
                             worker_count=model_params['worker_count'],
                             server_count=model_params['server_count'],
                             worker_memory=model_params['worker_memory'],
                             server_memory=model_params['server_memory'],
                             coordinator_memory=model_params['coordinator_memory'],
                             spark_confs=spark_confs)
sc = spark.sparkContext
print('Debug -- spark init')
print('Debug -- version:', sc.version)   
print('Debug -- applicaitonId:', sc.applicationId)
print('Debug -- uiWebUrl:', sc.uiWebUrl)

In [None]:
# sys.path.append('/home/spark/work/recommend-algos')
from python.dssm_net import UserModule, ItemModule, SimilarityModule
from python.training import TwoTowerBatchNegativeSamplingAgent, TwoTowerBatchNegativeSamplingModule

In [None]:
from pyspark.sql.types import StructType, StructField, IntegerType, FloatType, LongType, StringType
import pyspark.sql.functions as F

train_dataset = spark.read.parquet(model_params['train_path'])
test_dataset = spark.read.parquet(model_params['test_path'])
item_dataset = spark.read.parquet(model_params['item_path'])


In [None]:
train_dataset.limit(100).toPandas()

In [None]:
## init user module, item module, similarity module
user_module = UserModule(model_params['user_column_name'], \
                         model_params['user_combine_schema'], \
                         emb_size = model_params['vector_embedding_size'], \
                         alpha = model_params['ftrl_learning_rate'], \
                         beta = model_params['ftrl_smothing_rate'], \
                         l1 = model_params['ftrl_l1_regularization'], \
                         l2 = model_params['ftrl_l2_regularization'], \
                         dense_structure = model_params['dense_structure'])
item_module = ItemModule(model_params['item_column_name'], \
                         model_params['item_combine_schema'], \
                         emb_size = model_params['vector_embedding_size'], \
                         alpha = model_params['ftrl_learning_rate'], \
                         beta = model_params['ftrl_smothing_rate'], \
                         l1 = model_params['ftrl_l1_regularization'], \
                         l2 = model_params['ftrl_l2_regularization'], \
                         dense_structure = model_params['dense_structure'])
similarity_module = SimilarityModule(model_params['tau'])
module = TwoTowerBatchNegativeSamplingModule(user_module, item_module, similarity_module)

import importlib
module_lib = importlib.import_module(model_params['two_tower_module'])
## init estimator class
estimator_class_ = getattr(module_lib, model_params['two_tower_estimator_class'])
estimator = estimator_class_(module = module,
                             item_dataset = item_dataset,
                             item_ids_column_indices = [2],
                             retrieval_item_count = 20,
                             metric_update_interval = 500,
                             agent_class = TwoTowerBatchNegativeSamplingAgent,
                             **model_params)
## dnn learning rate
estimator.updater = ms.AdamTensorUpdater(model_params['adam_learning_rate'])
## model train
model = estimator.fit(train_dataset)

In [None]:
test_result = model.transform(test_dataset)
print('Debug -- test result sample:')
test_result.show(20)

In [None]:
from pyspark.sql import functions as F
print('Debug -- test sample:')
test_result.select('user_id', (F.posexplode('rec_info').alias('pos', 'rec_info'))).show(60)

test_result[test_result['user_id']==100]\
            .select('user_id', (F.posexplode('rec_info').alias('pos', 'rec_info'))).show(60)

## evaluation
from pyspark.mllib.evaluation import RankingMetrics
prediction_label_rdd = test_result.rdd.map(lambda x:(\
                                        [xx.name for xx in x.rec_info] if x.rec_info is not None else [], \
                                        [x.friend_id]))

recall_metrics = RankingMetrics(prediction_label_rdd)

print("Debug -- Precision@20: ", recall_metrics.precisionAt(20))
print("Debug -- Recall@20: ", recall_metrics.recallAt(20))
print("Debug -- MAP@20: ", recall_metrics.meanAveragePrecisionAt(20))

# Negative Sampling for CTR

In [None]:
import yaml
import subprocess
import sys
import metaspore as ms

subprocess.run(['zip', '-r', '../../solutions/recommend/offline/social_network/python.zip', 'common'], cwd='../../../../demo/dataset')
spark_confs={
    "spark.network.timeout":"500",
    "spark.ui.showConsoleProgress": "true",
    "spark.kubernetes.executor.deleteOnTermination":"true",
    "spark.submit.pyFiles":"python.zip",
}
spark = ms.spark.get_session(local=False,
                             app_name='soc-pokec ng sampling',
                             batch_size=128,
                             worker_count=2,
                             server_count=2,
                             worker_memory='10G',
                             server_memory='10G',
                             coordinator_memory='10G',
                             spark_confs=spark_confs)
sc = spark.sparkContext
print('Debug -- spark init')
print('Debug -- version:', sc.version)   
print('Debug -- applicaitonId:', sc.applicationId)
print('Debug -- uiWebUrl:', sc.uiWebUrl)

In [None]:
from pyspark.sql.types import StructType, StructField, IntegerType, FloatType, LongType, StringType
import pyspark.sql.functions as F

train_dataset = spark.read.parquet('s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/train_dataset.parquet')
test_dataset = spark.read.parquet('s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/test_dataset.parquet')
item_dataset = spark.read.parquet('s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/item_dataset.parquet')


all_dataset = train_dataset.union(test_dataset)


In [None]:
from common.neg_sampler import negative_sampling

neg_sample_df = negative_sampling(spark, dataset=all_dataset, user_column='user_id', item_column='friend_id', time_column=None, 
                                      negative_item_column='trigger_item_id', negative_sample=3)
neg_sample_df.cache()

print('count of neg_sample_df: ', neg_sample_df.count())
neg_sample_df.limit(10).toPandas()


In [None]:
neg_sample_df = neg_sample_df.withColumn('label', F.lit('0'))

neg_sample_df = neg_sample_df.alias('t1')\
                            .join(all_dataset.alias('t2'), \
                                (F.col('t1.user_id')==F.col('t2.user_id')) & (F.col('t1.trigger_item_id')==F.col('t2.friend_id')),
                                how='leftouter')\
                            .select('t1.label', \
                                't1.user_id', 't1.friend_id', 't2.user_gender', 't2.user_age', 't2.user_completion_percentage')

neg_sample_df = neg_sample_df.alias('t1')\
                            .join(item_dataset.alias('t2'), \
                                F.col('t1.friend_id')==F.col('t2.friend_id'),
                                how='leftouter')\
                            .select('t1.*', 't2.friend_gender', 't2.friend_age', 't2.friend_completion_percentage')

neg_sample_df.limit(10).toPandas()

In [None]:
all_dataset = all_dataset.union(neg_sample_df)

splits = all_dataset.randomSplit([0.9, 0.1], 24)
train_dataset_rank, test_dataset_rank = splits[0], splits[1]

In [None]:
train_dataset_rank.cache()
test_dataset_rank.cache()

print('Percentage of positive sample in train_dataset: ', train_dataset_rank.filter(F.col('label') == '1').count() / train_dataset_rank.count())
print('Percentage of positive sample in test_dataset: ', test_dataset_rank.filter(F.col('label') == '1').count() / test_dataset_rank.count())

train_dataset_rank.write.parquet('s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/train_dataset_rank.parquet', mode="overwrite")
test_dataset_rank.write.parquet('s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/test_dataset_rank.parquet', mode="overwrite")


# DeepFM

In [1]:
import yaml
import subprocess
import sys
import metaspore as ms

model_params = dict()
with open('conf/soc_pokec_deepfm.yaml', 'r') as stream:
    model_params = yaml.load(stream, Loader=yaml.FullLoader)
    print('Debug -- load config: ', model_params)
    
locals().update(model_params)

  from .autonotebook import tqdm as notebook_tqdm


Debug -- load config:  {'app_name': 'CTR Demo DeepFM - soc_pokec', 'local': False, 'worker_count': 2, 'server_count': 2, 'batch_size': 50, 'worker_memory': '5G', 'server_memory': '5G', 'coordinator_memory': '5G', 'train_path': 's3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/train_dataset_rank.parquet', 'test_path': 's3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_fg/test_dataset_rank.parquet', 'wide_column_name_path': 's3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_schema/column_name.txt', 'wide_combine_schema_path': 's3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_schema/wide_combine_schema.txt', 'deep_column_name_path': 's3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_schema/column_name.txt', 'deep_combine_schema_path': 's3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_schema/deep_combine_schema.txt', 'model_in_path': None, 'model_out_path': 's3://dmetasoul-bucket/demo/soc-pokec/deepfm/model_out/', 'model_version': '0.1', 'experiment_name': 'soc_pokec_dfm', 'inp

In [2]:
subprocess.run(['zip', '-r', 'solutions/recommend/offline/social_network/python.zip', 'python'], cwd='../../../../')
spark_confs={
    "spark.network.timeout":"500",
    "spark.ui.showConsoleProgress": "true",
    "spark.kubernetes.executor.deleteOnTermination":"true",
    "spark.submit.pyFiles":"python.zip",
}
spark = ms.spark.get_session(local=model_params['local'],
                             app_name=model_params['app_name'],
                             batch_size=model_params['batch_size'],
                             worker_count=model_params['worker_count'],
                             server_count=model_params['server_count'],
                             worker_memory=model_params['worker_memory'],
                             server_memory=model_params['server_memory'],
                             coordinator_memory=model_params['coordinator_memory'],
                             spark_confs=spark_confs)
sc = spark.sparkContext
print('Debug -- spark init')
print('Debug -- version:', sc.version)   
print('Debug -- applicaitonId:', sc.applicationId)
print('Debug -- uiWebUrl:', sc.uiWebUrl)

updating: python/ (stored 0%)
updating: python/algos/ (stored 0%)
updating: python/algos/xdeepfm_net.py (deflated 71%)
updating: python/algos/widedeep_net.py (deflated 68%)
updating: python/algos/tuner/ (stored 0%)
updating: python/algos/tuner/base_tuner.py (deflated 70%)
updating: python/algos/multitask/ (stored 0%)
updating: python/algos/multitask/mmoe/ (stored 0%)
updating: python/algos/multitask/mmoe/mmoe_net.py (deflated 75%)
updating: python/algos/multitask/mmoe/mmoe_agent.py (deflated 70%)
updating: python/algos/multitask/mmoe/__pycache__/ (stored 0%)
updating: python/algos/multitask/mmoe/__pycache__/mmoe_net.cpython-38.pyc (deflated 43%)
updating: python/algos/multitask/mmoe/__pycache__/mmoe_agent.cpython-38.pyc (deflated 52%)
updating: python/algos/multitask/mmoe/.ipynb_checkpoints/ (stored 0%)
updating: python/algos/multitask/mmoe/.ipynb_checkpoints/mmoe_net-checkpoint.py (deflated 75%)
updating: python/algos/multitask/mmoe/.ipynb_checkpoints/mmoe_agent-checkpoint.py (deflate

22/06/21 07:33:50 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
22/06/21 07:33:51 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


Debug -- spark init
Debug -- version: 3.1.2
Debug -- applicaitonId: spark-application-1655796832122
Debug -- uiWebUrl: http://jupyter.my.nginx.test/hub/user-redirect/proxy/4041/jobs/


In [3]:
from pyspark.sql.types import StructType, StructField, IntegerType, FloatType, LongType, StringType
import pyspark.sql.functions as F

train_dataset = spark.read.parquet(model_params['train_path'])
test_dataset = spark.read.parquet(model_params['test_path'])

                                                                                

In [4]:
from python.algos.deepfm_net import DeepFM

module = DeepFM(use_wide=use_wide,
            use_dnn=use_dnn,
            use_fm=use_fm,
            wide_embedding_dim=wide_embedding_dim,
            deep_embedding_dim=deep_embedding_dim,
            wide_column_name_path=wide_column_name_path,
            wide_combine_schema_path=wide_combine_schema_path,
            deep_column_name_path=deep_column_name_path,
            deep_combine_schema_path=deep_combine_schema_path,
            sparse_init_var=sparse_init_var,
            dnn_hidden_units=dnn_hidden_units,
            dnn_hidden_activations=dnn_hidden_activations,
            use_bias=use_bias,
            batch_norm=batch_norm,
            net_dropout=net_dropout,
            net_regularizer=net_regularizer,
            ftrl_l1=ftrl_l1,
            ftrl_l2=ftrl_l2,
            ftrl_alpha=ftrl_alpha,
            ftrl_beta=ftrl_beta)

estimator = ms.PyTorchEstimator(module=module, **model_params)

estimator.updater = ms.AdamTensorUpdater(adam_learning_rate)
model = estimator.fit(train_dataset)


Get aws endpoint from env: ks3-cn-beijing-internal.ksyuncs.com
[32mloaded combine schema from[m [32mcolumn name file [m's3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_schema/column_name.txt' [32mand combine schema file [m's3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_schema/wide_combine_schema.txt'[WARN] 2022-06-21 07:34:13.392 STSAssumeRoleWithWebIdentityCredentialsProvider [139894355953472] Token file must be specified to use STS AssumeRole web identity creds provider.
[2022-06-21 07:34:13.393] [info] ../cpp/metaspore/s3_sdk_filesys.cpp:411 -- Try to open S3 stream: s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_schema/column_name.txt, read_only: true

[2022-06-21 07:34:13.432] [info] ../cpp/metaspore/s3_sdk_filesys.cpp:437 -- Opened read-only stream for object: s3://dmetasoul-bucket/demo/datasets/soc-pokec/demo_schema/column_name.txt with total length: 125

[2022-06-21 07:34:13.436] [info] ../cpp/metaspore/s3_sdk_filesys.cpp:479 -- Read S3 object s3://dmetasoul-bu

[Stage 2:>                  (0 + 2) / 2][Stage 3:>                  (0 + 2) / 2]

[2022-06-21 07:34:16.585] [info] C[0]:9: The coordinator has connected to 2 servers and 2 workers.
PS Coordinator node [32mC[0]:9[m is ready.


                                                                                

shuffle df to partitions 4


[Stage 2:>                  (0 + 2) / 2][Stage 8:>                  (0 + 2) / 4]

2022-06-21 07:34:29.392 -- auc: 0.5080945506599379, Δauc: 0.5080945506599379, pcoc: 1.0211280284636062, Δpcoc: 1.0211280284636062, #instance: 5000
2022-06-21 07:34:29.782 -- auc: 0.505055990736009, Δauc: 0.5023478434132538, pcoc: 1.0398334454373273, Δpcoc: 1.0591693820843198, #instance: 10000
2022-06-21 07:34:36.210 -- auc: 0.514924039535408, Δauc: 0.5446873653583264, pcoc: 1.035013532005094, Δpcoc: 1.0252815314973018, #instance: 15000
2022-06-21 07:34:36.706 -- auc: 0.5179560721313156, Δauc: 0.528445078356796, pcoc: 1.0280001004348158, Δpcoc: 1.007026231279501, #instance: 20000
2022-06-21 07:34:43.007 -- auc: 0.5290973220947028, Δauc: 0.5826278413201091, pcoc: 1.025671064148137, Δpcoc: 1.0163916256410856, #instance: 25000
2022-06-21 07:34:43.521 -- auc: 0.5349348529730871, Δauc: 0.5681581573825804, pcoc: 1.020272798226817, Δpcoc: 0.9938250585270888, #instance: 30000
2022-06-21 07:34:49.884 -- auc: 0.5432887146082466, Δauc: 0.5970318106817718, pcoc: 1.0187785744137139, Δpcoc: 1.0095565

[Stage 2:>                  (0 + 2) / 2][Stage 8:>                  (0 + 2) / 4]

2022-06-21 07:35:24.105 -- auc: 0.5941350772696721, Δauc: 0.6542538669035285, pcoc: 1.0063156210205508, Δpcoc: 0.9948280561574505, #instance: 85000
2022-06-21 07:35:24.899 -- auc: 0.5959386932296898, Δauc: 0.6266058896119076, pcoc: 1.0067606703230665, Δpcoc: 1.0145913402730056, #instance: 90000
2022-06-21 07:35:30.782 -- auc: 0.5988791951592294, Δauc: 0.6497864178163879, pcoc: 1.0068311261010747, Δpcoc: 1.0080904673926439, #instance: 95000
2022-06-21 07:35:31.673 -- auc: 0.6017488317612341, Δauc: 0.6528520017243229, pcoc: 1.004948986975672, Δpcoc: 0.9709063841593455, #instance: 100000
2022-06-21 07:35:37.508 -- auc: 0.6042821034531655, Δauc: 0.6556666794957087, pcoc: 1.006732807397137, Δpcoc: 1.044313947925009, #instance: 105000
2022-06-21 07:35:38.470 -- auc: 0.6055982046381736, Δauc: 0.6324345453605639, pcoc: 1.0047180481393048, Δpcoc: 0.9639419803048498, #instance: 110000
2022-06-21 07:35:44.326 -- auc: 0.6074515117471537, Δauc: 0.6476282185697158, pcoc: 1.0046452908753036, Δpcoc: 1

[Stage 2:>                  (0 + 2) / 2][Stage 8:>                  (0 + 2) / 4]

2022-06-21 07:36:25.506 -- auc: 0.6220613231026784, Δauc: 0.6658605757534801, pcoc: 1.0030595327239546, Δpcoc: 1.0008644680101044, #instance: 175000
2022-06-21 07:36:26.669 -- auc: 0.6228562589622478, Δauc: 0.6498833906908688, pcoc: 1.0031064667131067, Δpcoc: 1.0047990069073531, #instance: 180000
2022-06-21 07:36:32.282 -- auc: 0.6238166947169332, Δauc: 0.6577204640538135, pcoc: 1.0021643177245472, Δpcoc: 0.9691366400722377, #instance: 185000
2022-06-21 07:36:33.508 -- auc: 0.6245825352392149, Δauc: 0.65259850378898, pcoc: 1.0023163515512452, Δpcoc: 1.0078797871875613, #instance: 190000
2022-06-21 07:36:39.297 -- auc: 0.625096077277081, Δauc: 0.6447041870485213, pcoc: 1.0031831630297314, Δpcoc: 1.0359754765768188, #instance: 195000
2022-06-21 07:36:40.363 -- auc: 0.6260315909888543, Δauc: 0.6600093573979806, pcoc: 1.002364241576281, Δpcoc: 0.9721630572914988, #instance: 200000
2022-06-21 07:36:46.175 -- auc: 0.6269802912518158, Δauc: 0.6632023173266391, pcoc: 1.0021800580468856, Δpcoc:

[Stage 2:>                  (0 + 2) / 2][Stage 8:>                  (0 + 2) / 4]

2022-06-21 07:37:27.091 -- auc: 0.6352805956674368, Δauc: 0.6791368504564979, pcoc: 1.0011703534377785, Δpcoc: 0.9753403681961048, #instance: 265000
2022-06-21 07:37:28.804 -- auc: 0.6359740531985532, Δauc: 0.6712656747094644, pcoc: 1.0011366103757793, Δpcoc: 0.9993186723215747, #instance: 270000
2022-06-21 07:37:33.879 -- auc: 0.6367081188818448, Δauc: 0.6754126773888363, pcoc: 1.0012699503988896, Δpcoc: 1.0086826760428294, #instance: 275000
2022-06-21 07:37:35.586 -- auc: 0.6373562645647424, Δauc: 0.6715662623899945, pcoc: 1.0012207680198393, Δpcoc: 0.9984974312706115, #instance: 280000
2022-06-21 07:37:40.679 -- auc: 0.6376445628522305, Δauc: 0.6535092698407894, pcoc: 1.0009446499612762, Δpcoc: 0.9856959329524511, #instance: 285000
2022-06-21 07:37:42.763 -- auc: 0.6380052941333005, Δauc: 0.6577713373655913, pcoc: 1.0009299357023633, Δpcoc: 1.0001039139926433, #instance: 290000
2022-06-21 07:37:47.492 -- auc: 0.6388102389682586, Δauc: 0.683283625958854, pcoc: 1.0011739632215966, Δpc

[Stage 2:>                  (0 + 2) / 2][Stage 8:>                  (0 + 2) / 4]

2022-06-21 07:38:24.238 -- auc: 0.6450958870792909, Δauc: 0.6820420212670022, pcoc: 1.0006927911767962, Δpcoc: 1.000626604853616, #instance: 350000
2022-06-21 07:38:28.927 -- auc: 0.6458465050860619, Δauc: 0.6949093069823544, pcoc: 1.0006900308031517, Δpcoc: 1.0004973615272135, #instance: 355000
2022-06-21 07:38:31.483 -- auc: 0.6464755440785181, Δauc: 0.6868510246099644, pcoc: 1.000330877994977, Δpcoc: 0.975871041122274, #instance: 360000
2022-06-21 07:38:35.871 -- auc: 0.64707889569172, Δauc: 0.6882848752484367, pcoc: 1.0004043365664943, Δpcoc: 1.0057066089587674, #instance: 365000
2022-06-21 07:38:38.420 -- auc: 0.647565358245718, Δauc: 0.6819824553877831, pcoc: 1.000537453199355, Δpcoc: 1.0104203970201553, #instance: 370000
2022-06-21 07:38:42.750 -- auc: 0.648086439635045, Δauc: 0.6849619219903766, pcoc: 0.9999878326600549, Δpcoc: 0.9597996305148804, #instance: 375000
2022-06-21 07:38:45.398 -- auc: 0.6486861690211987, Δauc: 0.691343796563705, pcoc: 0.99995187158282, Δpcoc: 0.9972

[Stage 2:>                  (0 + 2) / 2][Stage 8:====>              (1 + 2) / 4]

2022-06-21 07:38:52.309 -- auc: 0.6501671458126673, Δauc: 0.6979351022826732, pcoc: 1.0002096868785202, Δpcoc: 1.0366617885636695, #instance: 390000




2022-06-21 07:38:56.640 -- auc: 0.6506638550321686, Δauc: 0.6881765929687225, pcoc: 1.0001905374362177, Δpcoc: 0.9986631636298583, #instance: 394982
2022-06-21 07:38:59.285 -- auc: 0.6511791240589212, Δauc: 0.68931799568322, pcoc: 1.0001020572123707, Δpcoc: 0.9930815920886765, #instance: 399973
2022-06-21 07:39:03.596 -- auc: 0.6517567439741726, Δauc: 0.695723207130936, pcoc: 1.0000636346387644, Δpcoc: 0.9969240370986914, #instance: 404973
2022-06-21 07:39:06.338 -- auc: 0.6523757746092502, Δauc: 0.698328426586746, pcoc: 0.999992758835982, Δpcoc: 0.9943322889122045, #instance: 409973
2022-06-21 07:39:10.440 -- auc: 0.6531597457939206, Δauc: 0.7111034429875009, pcoc: 1.0000566538699622, Δpcoc: 1.0052397247386315, #instance: 414973
2022-06-21 07:39:13.248 -- auc: 0.6538158707996894, Δauc: 0.70452799177834, pcoc: 0.9999244904173499, Δpcoc: 0.9890969193532492, #instance: 419973
2022-06-21 07:39:17.467 -- auc: 0.6543585919746102, Δauc: 0.6976314069563277, pcoc: 1.0001864138131638, Δpcoc: 1.



2022-06-21 07:39:54.665 -- auc: 0.6607917466221926, Δauc: 0.713047594468216, pcoc: 0.9996193390539677, Δpcoc: 1.01016871333589, #instance: 479973
2022-06-21 07:39:58.375 -- auc: 0.6613872190278356, Δauc: 0.7125778282524848, pcoc: 0.9994016924379375, Δpcoc: 0.9789018164147864, #instance: 484973
2022-06-21 07:40:01.562 -- auc: 0.662007420578911, Δauc: 0.7169491327081527, pcoc: 0.999392426937821, Δpcoc: 0.9984879664690846, #instance: 489973
2022-06-21 07:40:05.274 -- auc: 0.6625682262309504, Δauc: 0.7136065888799832, pcoc: 0.9995288092856932, Δpcoc: 1.012893091565734, #instance: 494973
2022-06-21 07:40:08.570 -- auc: 0.6631312060038707, Δauc: 0.7130584210547393, pcoc: 0.999471334480709, Δpcoc: 0.9938265230369119, #instance: 499973
2022-06-21 07:40:12.100 -- auc: 0.6637428860353773, Δauc: 0.7187313961346128, pcoc: 0.9994716626236759, Δpcoc: 0.9995054945653798, #instance: 504973
2022-06-21 07:40:15.612 -- auc: 0.6643854076443244, Δauc: 0.7225655932220765, pcoc: 0.9994967834411249, Δpcoc: 1.



2022-06-21 07:40:56.952 -- auc: 0.6705797027930075, Δauc: 0.7163710211625125, pcoc: 0.999277565965042, Δpcoc: 1.0174403433737003, #instance: 569973
2022-06-21 07:41:00.261 -- auc: 0.6712055011239652, Δauc: 0.7353058823754934, pcoc: 0.999231785215482, Δpcoc: 0.9939688258616333, #instance: 574973
2022-06-21 07:41:03.844 -- auc: 0.67165042238899, Δauc: 0.7187128256325186, pcoc: 0.9990992522798284, Δpcoc: 0.984090078090748, #instance: 579973
2022-06-21 07:41:07.143 -- auc: 0.6721355320564888, Δauc: 0.7246141018439025, pcoc: 0.9993604742774279, Δpcoc: 1.0315912668953842, #instance: 584973
2022-06-21 07:41:10.738 -- auc: 0.6727326532568018, Δauc: 0.7351937679068491, pcoc: 0.9994023266048627, Δpcoc: 1.0043938331203752, #instance: 589973
2022-06-21 07:41:14.003 -- auc: 0.6731581508399249, Δauc: 0.7188176420897234, pcoc: 0.9993009586843818, Δpcoc: 0.9876191094229596, #instance: 594973
2022-06-21 07:41:17.557 -- auc: 0.6736211677106072, Δauc: 0.7216538939410153, pcoc: 0.999059119347104, Δpcoc: 0



2022-06-21 07:41:55.470 -- auc: 0.678838233362663, Δauc: 0.7422402254952082, pcoc: 0.9990921257087789, Δpcoc: 0.9589540578160246, #instance: 654973
2022-06-21 07:41:58.715 -- auc: 0.6792383900932693, Δauc: 0.7267640348079467, pcoc: 0.9990501799435682, Δpcoc: 0.9936419878726998, #instance: 659973
2022-06-21 07:42:02.397 -- auc: 0.6796757190028928, Δauc: 0.7315284025744824, pcoc: 0.998983983111115, Δpcoc: 0.9903962190976866, #instance: 664973
2022-06-21 07:42:05.720 -- auc: 0.6800249860911378, Δauc: 0.723897135191415, pcoc: 0.9991409678180118, Δpcoc: 1.0203913335930812, #instance: 669973
2022-06-21 07:42:09.352 -- auc: 0.6804802323479358, Δauc: 0.7345718970221923, pcoc: 0.9990190594486482, Δpcoc: 0.98293912698771, #instance: 674973
2022-06-21 07:42:12.680 -- auc: 0.6810155293872419, Δauc: 0.7447063017898514, pcoc: 0.9989622824648379, Δpcoc: 0.9913332723462741, #instance: 679973
2022-06-21 07:42:16.229 -- auc: 0.6814206560882062, Δauc: 0.7319721742970053, pcoc: 0.9990886404227426, Δpcoc: 



2022-06-21 07:42:53.806 -- auc: 0.6861228199812143, Δauc: 0.7296874821510898, pcoc: 0.9992221130486698, Δpcoc: 1.0031478155153153, #instance: 739973
2022-06-21 07:42:58.356 -- auc: 0.6865583870570676, Δauc: 0.7425362410403923, pcoc: 0.9990374056412729, Δpcoc: 0.9727693133884006, #instance: 744973
2022-06-21 07:43:00.669 -- auc: 0.6869813875980844, Δauc: 0.744829329784731, pcoc: 0.9989649353044321, Δpcoc: 0.9881154859265165, #instance: 749973
2022-06-21 07:43:05.210 -- auc: 0.6874195914769856, Δauc: 0.7478401116225516, pcoc: 0.9992063063631884, Δpcoc: 1.0380225837331878, #instance: 754973
2022-06-21 07:43:07.631 -- auc: 0.687882979646073, Δauc: 0.7506009844213668, pcoc: 0.9990430433095698, Δpcoc: 0.9744608632007858, #instance: 759973
2022-06-21 07:43:12.002 -- auc: 0.6882316193274245, Δauc: 0.736082619281425, pcoc: 0.9989076708767476, Δpcoc: 0.9786614204698119, #instance: 764973
2022-06-21 07:43:14.471 -- auc: 0.6886169741341441, Δauc: 0.7422801191450221, pcoc: 0.998997796588263, Δpcoc:



2022-06-21 07:43:21.057 -- auc: 0.689528944578734, Δauc: 0.7492324280278319, pcoc: 0.9990060877636344, Δpcoc: 0.9908285630634422, #instance: 779973


                                                                                

2022-06-21 07:43:23.344 -- auc: 0.6896542400209763, Δauc: 0.742559770591487, pcoc: 0.9989833416025428, Δpcoc: 0.9886140933743229, #instance: 781745
2022-06-21 07:43:23.355 -- auc: 0.6897089537728468, Δauc: 0.732066785763364, pcoc: 0.9990765990098607, Δpcoc: 1.0798043828261525, #instance: 782739


[Stage 2:>                  (0 + 2) / 2][Stage 10:>                 (0 + 2) / 2]

[2022-06-21 07:43:33.710] [info] C[0]:9 has stopped.
[2022-06-21 07:43:33.712] [info] PS job with coordinator address 10.0.1.219:37261 stopped.


                                                                                

In [5]:
from pyspark.ml.evaluation import BinaryClassificationEvaluator

train_result = model.transform(train_dataset)
test_result = model.transform(test_dataset)

train_evaluator = BinaryClassificationEvaluator()
train_auc = train_evaluator.evaluate(train_result)

test_evaluator = BinaryClassificationEvaluator()
test_auc = test_evaluator.evaluate(test_result)

print('Debug -- Train AUC: ', train_auc)
print('Debug -- Test AUC: ', test_auc)

[2022-06-21 07:43:33.889] [info] PS job with coordinator address 10.0.1.219:55951 started.
[2022-06-21 07:43:33.889] [info] PSRunner::RunPS: pid: 1658, tid: 1968, thread: 0x7f3b17fff700
[2022-06-21 07:43:33.889] [info] PSRunner::RunPSCoordinator: pid: 1658, tid: 1968, thread: 0x7f3b17fff700
[2022-06-21 07:43:33.891] [info] ActorProcess::Receiving: Coordinator pid: 1658, tid: 1971, thread: 0x7f3b2f7fe700
[2022-06-21 07:43:33.946] [info] C[0]:9: The coordinator has connected to 2 servers and 2 workers.
PS Coordinator node [32mC[0]:9[m is ready.


[Stage 11:>                 (0 + 2) / 2][Stage 16:>                 (0 + 2) / 4]

2022-06-21 07:43:42.601 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 5000
2022-06-21 07:43:43.120 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 10000
2022-06-21 07:43:43.642 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 15000
2022-06-21 07:43:44.175 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 20000
2022-06-21 07:43:44.660 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 25000
2022-06-21 07:43:44.700 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 30000
2022-06-21 07:43:45.166 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 35000
2022-06-21 07:43:45.234 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 40000
2022-06-21 07:43:45.700 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 45000
2022-06-21 07:43:45.751 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 50000
2022-06-21 07:43:46.210 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 55000
2022-06-21 07:43:46.267 -- auc: 0

[Stage 11:>                 (0 + 2) / 2][Stage 16:====>             (1 + 2) / 4]

2022-06-21 07:44:12.495 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 575000
2022-06-21 07:44:12.992 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 580000
2022-06-21 07:44:13.286 -- auc: 0.7897888812218206, Δauc: 0.7866160309132161, pcoc: 36.6443653345108, Δpcoc: 0.4815572526305914, #instance: 584963




2022-06-21 07:44:13.814 -- auc: 0.8083030375960238, Δauc: 1.0, pcoc: 14.544621480150921, Δpcoc: 0.4007854133605957, #instance: 589963
2022-06-21 07:44:14.337 -- auc: 0.7989510443282146, Δauc: 1.0, pcoc: 9.175348828778123, Δpcoc: 0.3697416805267334, #instance: 594963
2022-06-21 07:44:14.345 -- auc: 0.798331591847015, Δauc: 0.7949611801242236, pcoc: 8.269908396070068, Δpcoc: 0.8000248262286186, #instance: 599944
2022-06-21 07:44:14.830 -- auc: 0.7972782119366422, Δauc: 1.0, pcoc: 6.277054507395234, Δpcoc: 0.37820699691772464, #instance: 604944
2022-06-21 07:44:14.849 -- auc: 0.7938277537445428, Δauc: 1.0, pcoc: 5.0844831059248214, Δpcoc: 0.36190035610198973, #instance: 609944
2022-06-21 07:44:15.331 -- auc: 0.7919632248843789, Δauc: 1.0, pcoc: 4.292259539997818, Δpcoc: 0.36283065299987793, #instance: 614944
2022-06-21 07:44:15.365 -- auc: 0.7919632431867444, Δauc: 1.0, pcoc: 3.728755970371181, Δpcoc: 0.3702746953964233, #instance: 619944
2022-06-21 07:44:15.815 -- auc: 0.7911386829437921



2022-06-21 07:44:16.385 -- auc: 0.7924866185755285, Δauc: 1.0, pcoc: 2.719532797240828, Δpcoc: 0.3955651992797852, #instance: 634944
2022-06-21 07:44:16.906 -- auc: 0.7949024164187272, Δauc: 1.0, pcoc: 2.508809381444959, Δpcoc: 0.41000416011810303, #instance: 639944
2022-06-21 07:44:17.424 -- auc: 0.7946917322715337, Δauc: 1.0, pcoc: 2.330441983431877, Δpcoc: 0.3755353012084961, #instance: 644944
2022-06-21 07:44:17.944 -- auc: 0.793909774430538, Δauc: 1.0, pcoc: 2.1788930944233766, Δpcoc: 0.3663683818817139, #instance: 649944
2022-06-21 07:44:18.461 -- auc: 0.795488495874177, Δauc: 1.0, pcoc: 2.0515188887536016, Δpcoc: 0.40074918327331543, #instance: 654944
2022-06-21 07:44:18.984 -- auc: 0.7951206830088025, Δauc: 1.0, pcoc: 1.9391479756679126, Δpcoc: 0.3704500289916992, #instance: 659944
2022-06-21 07:44:19.494 -- auc: 0.7939914858093582, Δauc: 1.0, pcoc: 1.8400431846496754, Δpcoc: 0.35743551101684573, #instance: 664944
2022-06-21 07:44:20.009 -- auc: 0.7927941748764664, Δauc: 1.0, p

                                                                                

2022-06-21 07:44:31.422 -- auc: 0.7869627225760845, Δauc: 1.0, pcoc: 0.9737547807492275, Δpcoc: 0.3606997661376267, #instance: 778148
2022-06-21 07:44:31.431 -- auc: 0.7872370798946611, Δauc: 1.0, pcoc: 0.9600677069443893, Δpcoc: 0.38466801349437985, #instance: 782739
[2022-06-21 07:44:31.444] [info] C[0]:9 has stopped.
[2022-06-21 07:44:31.446] [info] PS job with coordinator address 10.0.1.219:55951 stopped.
[2022-06-21 07:44:31.548] [info] PS job with coordinator address 10.0.1.219:33515 started.
[2022-06-21 07:44:31.548] [info] PSRunner::RunPS: pid: 1658, tid: 2083, thread: 0x7f3b1e7ed700
[2022-06-21 07:44:31.548] [info] PSRunner::RunPSCoordinator: pid: 1658, tid: 2083, thread: 0x7f3b1e7ed700
[2022-06-21 07:44:31.549] [info] ActorProcess::Receiving: Coordinator pid: 1658, tid: 2086, thread: 0x7f3b2effd700
[2022-06-21 07:44:31.632] [info] C[0]:9: The coordinator has connected to 2 servers and 2 workers.
PS Coordinator node [32mC[0]:9[m is ready.


[Stage 19:>                 (0 + 2) / 2][Stage 23:>                 (0 + 2) / 4]

2022-06-21 07:44:40.790 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 5000
2022-06-21 07:44:41.042 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 10000
2022-06-21 07:44:41.300 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 15000
2022-06-21 07:44:41.554 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 20000
2022-06-21 07:44:41.795 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 25000
2022-06-21 07:44:42.063 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 30000
2022-06-21 07:44:42.291 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 35000
2022-06-21 07:44:42.575 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 40000
2022-06-21 07:44:42.787 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 45000
2022-06-21 07:44:43.082 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 50000
2022-06-21 07:44:43.288 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 55000


[Stage 19:>                 (0 + 2) / 2][Stage 23:====>             (1 + 2) / 4]

2022-06-21 07:44:43.593 -- auc: 0.0, Δauc: 0.0, pcoc: nan, Δpcoc: nan, #instance: 60000




2022-06-21 07:44:44.363 -- auc: 0.7464837129833521, Δauc: 0.7433597092928304, pcoc: 5.263976347079644, Δpcoc: 0.5343721123842092, #instance: 64955
2022-06-21 07:44:44.864 -- auc: 0.752603569755517, Δauc: 1.0, pcoc: 2.035690521629233, Δpcoc: 0.35698189239501954, #instance: 69955




2022-06-21 07:44:45.381 -- auc: 0.7480089591980379, Δauc: 1.0, pcoc: 1.3640060982250033, Δpcoc: 0.34304577465057373, #instance: 74955
2022-06-21 07:44:45.879 -- auc: 0.7473633370328252, Δauc: 1.0, pcoc: 1.0745590698448095, Δpcoc: 0.3451525583267212, #instance: 79955


                                                                                

2022-06-21 07:44:46.164 -- auc: 0.74694123336181, Δauc: 1.0, pcoc: 0.9955391984404798, Δpcoc: 0.3416841271117533, #instance: 82082
2022-06-21 07:44:46.174 -- auc: 0.7457749595692871, Δauc: 0.7271896049614788, pcoc: 0.9573654869155315, Δpcoc: 0.6048126745313295, #instance: 86848
[2022-06-21 07:44:46.185] [info] C[0]:9 has stopped.
[2022-06-21 07:44:46.185] [info] PS job with coordinator address 10.0.1.219:33515 stopped.


                                                                                

Debug -- Train AUC:  0.7872362221175573
Debug -- Test AUC:  0.7457743722105097
