In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import bz2
import csv
import io
import json
import re
import time
import random
import requests
import datetime
from pathlib import Path
from pprint import pprint
from typing import List, Dict
from dateutil.relativedelta import relativedelta
import lsde2021.csv as csvutils
import lsde2021.utils as utils
import lsde2021.download as dl
from pyspark.sql import SparkSession
import pyspark.sql.types as T
import pyspark.sql.functions as F

In [2]:
MAX_MEMORY = "30G"

spark = SparkSession \
    .builder \
    .appName("parse-wikipedia-sql-dumps") \
    .config("spark.executor.memory", MAX_MEMORY) \
    .config("spark.driver.memory", MAX_MEMORY) \
    .config('spark.driver.maxResultSize', MAX_MEMORY) \
    .config('spark.ui.showConsoleProgress', 'false') \
    .getOrCreate()
sc = spark.sparkContext

csv_loader = spark.read.format("csv").options(header='True', inferSchema='True')
parquet_reader = spark.read.format("parquet").options(inferSchema='True')

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
21/10/27 12:08:51 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [3]:
pageview_complete_processed_src = Path("../nvme/pageview_complete_processed")
# pageview_complete_per_topic_dest = Path("../nvme/pageview_complete_per_topic")
end_date = datetime.date(2021, 10, 1)

daily_pageview_files = []
for year in [2019]: # 2019, 2020, 2021]:
    daily_range = list(dl.date_range(
        datetime.date(year, 1, 1),
        datetime.date(year, 12, 31),
    ))
    
    daily_range = [d for d in daily_range if (end_date - d).total_seconds() > 0]
    daily_pageview_files += daily_range
    
daily_pageview_files = [
    (
        pageview_complete_processed_src / Path("/".join(dl.wikimedia_pageview_complete_local_file(date, monthly=False))).with_suffix(".parquet"),
        date,
        # pageview_complete_per_topic_dest / Path("/".join(dl.wikimedia_pageview_complete_local_file(date, monthly=False))).with_suffix(".parquet"),
    )
    for date in daily_pageview_files
]
pprint(daily_pageview_files[:10])

[(PosixPath('../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190101-user.parquet'),
  datetime.date(2019, 1, 1)),
 (PosixPath('../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190102-user.parquet'),
  datetime.date(2019, 1, 2)),
 (PosixPath('../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190103-user.parquet'),
  datetime.date(2019, 1, 3)),
 (PosixPath('../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190104-user.parquet'),
  datetime.date(2019, 1, 4)),
 (PosixPath('../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190105-user.parquet'),
  datetime.date(2019, 1, 5)),
 (PosixPath('../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190106-user.parquet'),
  datetime.date(2019, 1, 6)),
 (PosixPath('../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190107-user.parquet'),
  datetime.date(2019, 1, 7)),
 (PosixPath('../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190108-user.parquet'),
  datetime.dat

In [4]:
group_cols = ["topic", "dbname", "wiki_code", "group", "language"]

def prepare(daily_processed_file, date):
    d = date.timetuple()
    day = f"{d.tm_year}-{d.tm_mon}-{d.tm_mday}"
    # print(day)
    
    df = spark.read.format("parquet").load(str(daily_processed_file))
    
    topic1_counts = df.select(*group_cols, "daily_total", F.explode("topics1").alias("topic"))
    topic1_counts = topic1_counts.groupBy(group_cols).agg(F.sum("daily_total").alias(day))
    topic1_counts = topic1_counts.withColumn("level", F.lit(1))
    # topic1_counts.limit(10).show()
    
    topic2_counts = df.select(*group_cols, "daily_total", F.explode("topics2").alias("topic"))
    topic2_counts = topic2_counts.groupBy(group_cols).agg(F.sum("daily_total").alias(day))
    topic2_counts = topic2_counts.withColumn("level", F.lit(2))
    
    topic3_counts = df.select(*group_cols, "daily_total", F.explode("topics3").alias("topic"))
    topic3_counts = topic3_counts.groupBy(group_cols).agg(F.sum("daily_total").alias(day))
    topic3_counts = topic3_counts.withColumn("level", F.lit(3))
    
    topic4_counts = df.select(*group_cols, "daily_total", F.explode("topics4").alias("topic"))
    topic4_counts = topic4_counts.groupBy(group_cols).agg(F.sum("daily_total").alias(day))
    topic4_counts = topic4_counts.withColumn("level", F.lit(4))
    
    topic_counts = topic1_counts
    topic_counts = topic_counts.union(topic2_counts)
    topic_counts = topic_counts.union(topic3_counts)
    topic_counts = topic_counts.union(topic4_counts)
    
    topic_counts = topic_counts.filter((F.col("group").isNotNull()))
    # topic_counts.limit(10).show()
    
    # test for pizza
    # topic_counts.filter(F.lower("topic") == "pizza").limit(100).show()
    
    return topic_counts

In [5]:
# start with the first date
total_start = time.time()
daily_pageview_file, date = daily_pageview_files[0]
daily_pageview_combined = prepare(daily_pageview_file, date)

# iteratively join other days
for daily_pageview_file, date in daily_pageview_files[1:]:
    start = time.time()
    daily_pageview_combined = daily_pageview_combined.join(prepare(daily_pageview_file, date).repartition("group", "level"), on=group_cols + ["level"], how="outer")
    daily_pageview_combined = daily_pageview_combined.repartition("group", "level")
    print("processed %s (%d rows) in %.2f minutes" % (daily_pageview_file, daily_pageview_combined.count(), (time.time() - start) / (60)))

# 6M * 4 * langs X 365
    
# daily_pageview_combined.limit(10).show()
daily_pageview_combined.write.format("parquet").mode("overwrite").partitionBy("group", "level").save("../nvme/pageview_complete_per_topic_combined/2019.parquet")
print("done in %.2f hours" % ((time.time() - total_start) / (60**2)))

processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190102-user.parquet (4403582 rows) in 0.80 minutes
processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190103-user.parquet (4558114 rows) in 0.91 minutes
processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190104-user.parquet (4646457 rows) in 1.19 minutes
processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190105-user.parquet (4714468 rows) in 1.48 minutes
processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190106-user.parquet (4765553 rows) in 1.74 minutes
processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190107-user.parquet (4805342 rows) in 2.03 minutes
processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190108-user.parquet (4842547 rows) in 2.40 minutes
processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190109-user.parquet (4878634 rows) in 2.80 minutes
processed ../nvme/pagevi

21/10/27 13:03:38 WARN DAGScheduler: Broadcasting large task binary with size 1002.4 KiB


processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190118-user.parquet (5024666 rows) in 5.94 minutes


21/10/27 13:09:53 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 13:09:57 WARN DAGScheduler: Broadcasting large task binary with size 1057.3 KiB


processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190119-user.parquet (5031106 rows) in 6.32 minutes


21/10/27 13:16:15 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 13:16:20 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 13:16:24 WARN DAGScheduler: Broadcasting large task binary with size 1112.2 KiB


processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190120-user.parquet (5036709 rows) in 6.45 minutes


21/10/27 13:22:54 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 13:22:59 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 13:23:04 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 13:23:08 WARN DAGScheduler: Broadcasting large task binary with size 1167.1 KiB


processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190121-user.parquet (5041537 rows) in 6.73 minutes


21/10/27 13:29:54 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 13:29:58 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 13:30:03 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 13:30:07 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 13:30:12 WARN DAGScheduler: Broadcasting large task binary with size 1222.0 KiB


processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190122-user.parquet (5046609 rows) in 7.07 minutes


21/10/27 13:37:10 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 13:37:15 WARN DAGScheduler: Broadcasting large task binary with size 1060.9 KiB
21/10/27 13:37:19 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 13:37:23 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 13:37:28 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 13:37:32 WARN DAGScheduler: Broadcasting large task binary with size 1277.0 KiB


processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190123-user.parquet (5051402 rows) in 7.34 minutes


21/10/27 13:44:45 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 13:44:50 WARN DAGScheduler: Broadcasting large task binary with size 1060.9 KiB
21/10/27 13:44:54 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 13:44:58 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 13:45:03 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 13:45:07 WARN DAGScheduler: Broadcasting large task binary with size 1280.6 KiB
21/10/27 13:45:11 WARN DAGScheduler: Broadcasting large task binary with size 1331.9 KiB


processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190124-user.parquet (5055787 rows) in 7.64 minutes


21/10/27 13:51:54 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 13:51:58 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 13:52:03 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 13:52:07 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 13:52:11 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 13:52:15 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 13:52:20 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 13:52:24 WARN DAGScheduler: Broadcasting large task binary with size 1386.8 KiB


processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190125-user.parquet (5059571 rows) in 7.22 minutes


21/10/27 13:59:22 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 13:59:26 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 13:59:30 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 13:59:35 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 13:59:39 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 13:59:44 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 13:59:48 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 13:59:52 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 13:59:56 WARN DAGScheduler: Broadcasting large task binary with size 1441.7 KiB


processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190126-user.parquet (5063048 rows) in 7.54 minutes


21/10/27 14:07:09 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 14:07:14 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 14:07:18 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 14:07:22 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 14:07:27 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 14:07:31 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 14:07:35 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 14:07:40 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 14:07:44 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 14:07:49 WARN DAGScheduler: Broadcasting large task binary with size 1496.7 KiB


processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190127-user.parquet (5066233 rows) in 7.87 minutes


21/10/27 14:15:17 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 14:15:22 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 14:15:26 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 14:15:30 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 14:15:34 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 14:15:39 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 14:15:43 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 14:15:47 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 14:15:52 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 14:15:56 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 14:16:01 WARN DAGScheduler: Broadcasting large task binary with size 1551.6 KiB


processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190128-user.parquet (5069850 rows) in 8.20 minutes


21/10/27 14:23:55 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 14:23:59 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 14:24:04 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 14:24:09 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 14:24:14 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 14:24:19 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 14:24:24 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 14:24:28 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 14:24:33 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 14:24:38 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 14:24:43 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 14:24:48 WAR

processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190129-user.parquet (5073449 rows) in 8.79 minutes


21/10/27 14:33:11 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 14:33:16 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 14:33:21 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 14:33:26 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 14:33:31 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 14:33:35 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 14:33:40 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 14:33:45 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 14:33:50 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 14:33:55 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 14:34:00 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 14:34:05 WAR

processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190130-user.parquet (5076241 rows) in 9.38 minutes


21/10/27 14:42:51 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 14:42:56 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 14:43:01 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 14:43:06 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 14:43:11 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 14:43:16 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 14:43:21 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 14:43:26 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 14:43:31 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 14:43:36 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/27 14:43:41 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 14:43:46 WAR

processed ../nvme/pageview_complete_processed/2019/2019-01/pageviews-20190131-user.parquet (5078358 rows) in 9.77 minutes


21/10/27 14:52:43 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 14:52:48 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 14:52:53 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 14:52:58 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 14:53:03 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 14:53:07 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 14:53:12 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 14:53:17 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 14:53:22 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 14:53:27 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 14:53:32 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 14:53:37 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190201-user.parquet (5080198 rows) in 9.92 minutes


21/10/27 15:02:50 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 15:02:55 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 15:02:59 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 15:03:04 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 15:03:09 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 15:03:14 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 15:03:19 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 15:03:24 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 15:03:29 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 15:03:34 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 15:03:39 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 15:03:44 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190202-user.parquet (5082015 rows) in 10.21 minutes


21/10/27 15:13:15 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 15:13:20 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 15:13:25 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 15:13:30 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 15:13:35 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 15:13:39 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 15:13:45 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 15:13:49 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 15:13:55 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 15:14:00 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 15:14:05 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 15:14:10 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190203-user.parquet (5083843 rows) in 10.52 minutes


21/10/27 15:24:15 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 15:24:20 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 15:24:24 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 15:24:29 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 15:24:35 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 15:24:40 WARN DAGScheduler: Broadcasting large task binary with size 1280.6 KiB
21/10/27 15:24:45 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 15:24:50 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 15:24:55 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 15:25:00 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/27 15:25:05 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 15:25:11 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190204-user.parquet (5085635 rows) in 11.13 minutes


21/10/27 15:35:51 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 15:35:56 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/27 15:36:01 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 15:36:05 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 15:36:10 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 15:36:15 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 15:36:20 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 15:36:26 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 15:36:31 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 15:36:36 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 15:36:41 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 15:36:46 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190205-user.parquet (5087242 rows) in 11.69 minutes


21/10/27 15:47:50 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 15:47:55 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 15:48:00 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 15:48:05 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 15:48:09 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 15:48:15 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 15:48:20 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 15:48:25 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 15:48:30 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 15:48:35 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 15:48:40 WARN DAGScheduler: Broadcasting large task binary with size 1555.1 KiB
21/10/27 15:48:45 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190206-user.parquet (5088645 rows) in 12.03 minutes


21/10/27 16:00:11 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 16:00:16 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 16:00:21 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 16:00:26 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 16:00:31 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 16:00:36 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 16:00:41 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 16:00:46 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 16:00:51 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 16:00:56 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 16:01:02 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 16:01:07 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190207-user.parquet (5090157 rows) in 12.53 minutes


21/10/27 16:12:58 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 16:13:03 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/27 16:13:08 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 16:13:13 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 16:13:18 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 16:13:23 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 16:13:28 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 16:13:33 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 16:13:38 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 16:13:44 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 16:13:49 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 16:13:54 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190208-user.parquet (5091522 rows) in 12.78 minutes


21/10/27 16:25:29 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 16:25:34 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/27 16:25:39 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 16:25:44 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 16:25:49 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 16:25:54 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 16:25:59 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 16:26:04 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 16:26:10 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 16:26:15 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 16:26:20 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 16:26:25 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190209-user.parquet (5093060 rows) in 12.66 minutes


21/10/27 16:38:12 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 16:38:16 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 16:38:21 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 16:38:25 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 16:38:30 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 16:38:35 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 16:38:39 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 16:38:44 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 16:38:48 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 16:38:54 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 16:38:58 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 16:39:03 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190210-user.parquet (5094702 rows) in 12.64 minutes


21/10/27 16:50:57 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 16:51:01 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 16:51:06 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 16:51:10 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 16:51:15 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 16:51:19 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 16:51:24 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 16:51:29 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 16:51:33 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 16:51:38 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 16:51:42 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 16:51:47 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190211-user.parquet (5095952 rows) in 12.75 minutes


21/10/27 17:03:57 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 17:04:02 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/27 17:04:07 WARN DAGScheduler: Broadcasting large task binary with size 1115.6 KiB
21/10/27 17:04:11 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 17:04:16 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 17:04:20 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 17:04:25 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 17:04:30 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 17:04:35 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 17:04:39 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 17:04:44 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 17:04:49 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190212-user.parquet (5097299 rows) in 13.19 minutes


21/10/27 17:17:25 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 17:17:30 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 17:17:34 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 17:17:39 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 17:17:44 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 17:17:50 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 17:17:56 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 17:18:01 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 17:18:06 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 17:18:11 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 17:18:16 WARN DAGScheduler: Broadcasting large task binary with size 1555.1 KiB
21/10/27 17:18:21 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190213-user.parquet (5098880 rows) in 13.59 minutes


21/10/27 17:31:06 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 17:31:11 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 17:31:15 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 17:31:20 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 17:31:24 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 17:31:29 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 17:31:34 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 17:31:39 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 17:31:43 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 17:31:48 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 17:31:53 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 17:31:57 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190214-user.parquet (5100302 rows) in 13.75 minutes


21/10/27 17:45:17 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 17:45:22 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 17:45:27 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 17:45:33 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 17:45:38 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 17:45:43 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 17:45:48 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 17:45:53 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 17:45:57 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 17:46:03 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 17:46:09 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 17:46:14 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190215-user.parquet (5101683 rows) in 14.41 minutes


21/10/27 17:59:44 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 17:59:49 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 17:59:53 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 17:59:58 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 18:00:03 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 18:00:08 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 18:00:13 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 18:00:17 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 18:00:22 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 18:00:27 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 18:00:31 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 18:00:36 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190216-user.parquet (5103004 rows) in 14.44 minutes


21/10/27 18:14:35 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 18:14:40 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 18:14:46 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 18:14:50 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 18:14:55 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 18:15:00 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 18:15:05 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 18:15:10 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 18:15:15 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 18:15:21 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 18:15:26 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 18:15:31 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190217-user.parquet (5104837 rows) in 15.09 minutes


21/10/27 18:30:02 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 18:30:07 WARN DAGScheduler: Broadcasting large task binary with size 1060.9 KiB
21/10/27 18:30:11 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 18:30:16 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 18:30:21 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 18:30:26 WARN DAGScheduler: Broadcasting large task binary with size 1280.6 KiB
21/10/27 18:30:31 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 18:30:36 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 18:30:42 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 18:30:47 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/27 18:30:52 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 18:30:57 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190218-user.parquet (5108233 rows) in 15.36 minutes


21/10/27 18:46:00 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 18:46:05 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 18:46:10 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 18:46:15 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 18:46:20 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 18:46:25 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 18:46:30 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/27 18:46:35 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 18:46:40 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 18:46:46 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 18:46:51 WARN DAGScheduler: Broadcasting large task binary with size 1555.1 KiB
21/10/27 18:46:56 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190219-user.parquet (5109620 rows) in 16.25 minutes


21/10/27 19:02:53 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 19:02:59 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 19:03:03 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 19:03:08 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 19:03:13 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 19:03:18 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 19:03:23 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 19:03:27 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 19:03:32 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 19:03:37 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/27 19:03:42 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 19:03:47 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190220-user.parquet (5110984 rows) in 16.95 minutes


21/10/27 19:20:28 WARN DAGScheduler: Broadcasting large task binary with size 1006.0 KiB
21/10/27 19:20:35 WARN DAGScheduler: Broadcasting large task binary with size 1060.9 KiB
21/10/27 19:20:40 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 19:20:45 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 19:20:50 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 19:20:56 WARN DAGScheduler: Broadcasting large task binary with size 1280.6 KiB
21/10/27 19:21:01 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 19:21:07 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 19:21:13 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 19:21:18 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/27 19:21:24 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 19:21:30 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190221-user.parquet (5112107 rows) in 18.02 minutes


21/10/27 19:39:29 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 19:39:35 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/27 19:39:41 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 19:39:47 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 19:39:53 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 19:39:59 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 19:40:04 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 19:40:11 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 19:40:16 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 19:40:23 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 19:40:29 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 19:40:35 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190222-user.parquet (5113182 rows) in 19.26 minutes


21/10/27 19:58:23 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 19:58:28 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/27 19:58:33 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 19:58:39 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 19:58:44 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 19:58:50 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 19:58:55 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 19:59:01 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 19:59:06 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 19:59:11 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 19:59:18 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 19:59:23 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190223-user.parquet (5114015 rows) in 18.76 minutes


21/10/27 20:17:05 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 20:17:10 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 20:17:16 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 20:17:23 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 20:17:30 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 20:17:36 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 20:17:42 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 20:17:49 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 20:17:54 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 20:18:00 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 20:18:06 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 20:18:11 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190224-user.parquet (5114901 rows) in 18.99 minutes


21/10/27 20:36:33 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 20:36:38 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 20:36:43 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 20:36:48 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 20:36:54 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 20:36:59 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 20:37:05 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 20:37:10 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 20:37:15 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 20:37:21 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 20:37:26 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 20:37:31 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190225-user.parquet (5115889 rows) in 19.54 minutes


21/10/27 20:56:12 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 20:56:17 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/27 20:56:23 WARN DAGScheduler: Broadcasting large task binary with size 1115.6 KiB
21/10/27 20:56:29 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 20:56:34 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 20:56:41 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 20:56:47 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 20:56:52 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 20:56:58 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 20:57:03 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 20:57:09 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 20:57:14 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190226-user.parquet (5116924 rows) in 19.69 minutes


21/10/27 21:16:18 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 21:16:23 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/27 21:16:29 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 21:16:34 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 21:16:39 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 21:16:45 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 21:16:50 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 21:16:56 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 21:17:02 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 21:17:09 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 21:17:15 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 21:17:21 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190227-user.parquet (5117790 rows) in 20.37 minutes


21/10/27 21:37:03 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 21:37:09 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 21:37:15 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 21:37:20 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 21:37:26 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 21:37:31 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 21:37:37 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 21:37:42 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 21:37:47 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 21:37:53 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 21:37:59 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 21:38:04 WAR

processed ../nvme/pageview_complete_processed/2019/2019-02/pageviews-20190228-user.parquet (5118515 rows) in 20.87 minutes


21/10/27 21:58:13 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 21:58:19 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 21:58:24 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 21:58:30 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 21:58:35 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 21:58:41 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 21:58:46 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 21:58:52 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 21:58:58 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 21:59:05 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/27 21:59:11 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 21:59:17 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190301-user.parquet (5119357 rows) in 21.32 minutes


21/10/27 22:19:19 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/27 22:19:25 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 22:19:32 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/27 22:19:38 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/27 22:19:44 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/27 22:19:51 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/27 22:19:57 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/27 22:20:04 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/27 22:20:12 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/27 22:20:17 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/27 22:20:23 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/27 22:20:29 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190302-user.parquet (5120193 rows) in 21.41 minutes


21/10/27 22:41:37 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 22:41:42 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/27 22:41:47 WARN DAGScheduler: Broadcasting large task binary with size 1115.6 KiB
21/10/27 22:41:53 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 22:41:59 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 22:42:04 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 22:42:10 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 22:42:15 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 22:42:21 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 22:42:27 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 22:42:32 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 22:42:38 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190303-user.parquet (5121725 rows) in 22.46 minutes


21/10/27 23:05:05 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 23:05:10 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/27 23:05:15 WARN DAGScheduler: Broadcasting large task binary with size 1115.6 KiB
21/10/27 23:05:20 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 23:05:25 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 23:05:30 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 23:05:35 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 23:05:41 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 23:05:47 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 23:05:52 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 23:05:57 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 23:06:02 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190304-user.parquet (5122401 rows) in 23.28 minutes


21/10/27 23:28:13 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 23:28:18 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/27 23:28:25 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 23:28:30 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 23:28:35 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 23:28:40 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 23:28:45 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 23:28:51 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 23:28:55 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 23:29:02 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 23:29:07 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 23:29:14 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190305-user.parquet (5123136 rows) in 22.95 minutes


21/10/27 23:51:02 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/27 23:51:09 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/27 23:51:14 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/27 23:51:20 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/27 23:51:25 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/27 23:51:31 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/27 23:51:37 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/27 23:51:43 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/27 23:51:50 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/27 23:51:55 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/27 23:52:01 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/27 23:52:07 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190306-user.parquet (5123914 rows) in 23.60 minutes


21/10/28 00:14:32 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 00:14:38 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 00:14:44 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/28 00:14:50 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/28 00:14:57 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/28 00:15:02 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/28 00:15:07 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/28 00:15:13 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/28 00:15:18 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/28 00:15:24 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/28 00:15:31 WARN DAGScheduler: Broadcasting large task binary with size 1555.1 KiB
21/10/28 00:15:36 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190307-user.parquet (5124488 rows) in 23.54 minutes


21/10/28 00:38:44 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/28 00:38:49 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/28 00:38:58 WARN DAGScheduler: Broadcasting large task binary with size 1115.6 KiB
21/10/28 00:39:03 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/28 00:39:10 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/28 00:39:15 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/28 00:39:21 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/28 00:39:26 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/28 00:39:34 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/28 00:39:41 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/28 00:39:47 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/28 00:39:53 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190308-user.parquet (5125114 rows) in 24.55 minutes


21/10/28 01:03:28 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/28 01:03:33 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/28 01:03:38 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/28 01:03:43 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/28 01:03:48 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/28 01:03:54 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/28 01:03:59 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/28 01:04:05 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/28 01:04:13 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/28 01:04:18 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/28 01:04:23 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/28 01:04:30 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190309-user.parquet (5125756 rows) in 24.70 minutes


21/10/28 01:28:07 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 01:28:13 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 01:28:18 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 01:28:23 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 01:28:28 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 01:28:34 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/28 01:28:40 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/28 01:28:45 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 01:28:52 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 01:28:59 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/28 01:29:05 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 01:29:11 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190310-user.parquet (5126451 rows) in 24.78 minutes


21/10/28 01:52:36 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 01:52:41 WARN DAGScheduler: Broadcasting large task binary with size 1060.9 KiB
21/10/28 01:52:46 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 01:52:51 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 01:52:57 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 01:53:03 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/28 01:53:09 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 01:53:15 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 01:53:22 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 01:53:27 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/28 01:53:34 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 01:53:40 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190311-user.parquet (5127095 rows) in 24.88 minutes


21/10/28 02:18:20 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 02:18:26 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 02:18:33 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/28 02:18:39 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/28 02:18:45 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/28 02:18:51 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/28 02:18:58 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/28 02:19:05 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/28 02:19:10 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/28 02:19:17 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/28 02:19:23 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/28 02:19:29 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190312-user.parquet (5127674 rows) in 26.04 minutes


21/10/28 02:45:41 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 02:45:47 WARN DAGScheduler: Broadcasting large task binary with size 1060.9 KiB
21/10/28 02:45:53 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 02:46:00 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 02:46:06 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 02:46:13 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/28 02:46:20 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 02:46:26 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 02:46:32 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 02:46:38 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/28 02:46:45 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 02:46:51 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190313-user.parquet (5128190 rows) in 27.92 minutes


21/10/28 03:14:05 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 03:14:11 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 03:14:17 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/28 03:14:23 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/28 03:14:29 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/28 03:14:35 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/28 03:14:42 WARN DAGScheduler: Broadcasting large task binary with size 1335.4 KiB
21/10/28 03:14:48 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/28 03:14:54 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/28 03:15:00 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/28 03:15:07 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/28 03:15:13 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190314-user.parquet (5128793 rows) in 28.58 minutes


21/10/28 03:43:46 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 03:43:53 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 03:43:59 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 03:44:07 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 03:44:14 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 03:44:21 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/28 03:44:29 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 03:44:37 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 03:44:43 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 03:44:50 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/28 03:44:58 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 03:45:04 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190315-user.parquet (5129316 rows) in 30.21 minutes


21/10/28 04:14:09 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 04:14:14 WARN DAGScheduler: Broadcasting large task binary with size 1060.9 KiB
21/10/28 04:14:20 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 04:14:26 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 04:14:34 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 04:14:41 WARN DAGScheduler: Broadcasting large task binary with size 1280.6 KiB
21/10/28 04:14:48 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 04:14:55 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 04:15:01 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 04:15:08 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/28 04:15:14 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 04:15:21 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190316-user.parquet (5129844 rows) in 30.33 minutes


21/10/28 04:44:57 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/28 04:45:03 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 04:45:09 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/28 04:45:15 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/28 04:45:21 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/28 04:45:27 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/28 04:45:33 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/28 04:45:40 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/28 04:45:47 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/28 04:45:55 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/28 04:46:01 WARN DAGScheduler: Broadcasting large task binary with size 1555.1 KiB
21/10/28 04:46:08 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190317-user.parquet (5130388 rows) in 31.57 minutes


21/10/28 05:16:50 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/28 05:16:56 WARN DAGScheduler: Broadcasting large task binary with size 1060.7 KiB
21/10/28 05:17:03 WARN DAGScheduler: Broadcasting large task binary with size 1115.6 KiB
21/10/28 05:17:10 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/28 05:17:16 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/28 05:17:23 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/28 05:17:29 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/28 05:17:36 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/28 05:17:44 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/28 05:17:50 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/28 05:17:57 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/28 05:18:04 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190318-user.parquet (5131013 rows) in 32.02 minutes


21/10/28 05:48:58 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/28 05:49:04 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 05:49:12 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/28 05:49:19 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/28 05:49:25 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/28 05:49:33 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/28 05:49:40 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/28 05:49:47 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/28 05:49:54 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/28 05:50:00 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/28 05:50:08 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/28 05:50:14 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190319-user.parquet (5131548 rows) in 32.37 minutes


21/10/28 06:21:48 WARN DAGScheduler: Broadcasting large task binary with size 1005.8 KiB
21/10/28 06:21:55 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 06:22:01 WARN DAGScheduler: Broadcasting large task binary with size 1115.7 KiB
21/10/28 06:22:08 WARN DAGScheduler: Broadcasting large task binary with size 1170.6 KiB
21/10/28 06:22:16 WARN DAGScheduler: Broadcasting large task binary with size 1225.5 KiB
21/10/28 06:22:23 WARN DAGScheduler: Broadcasting large task binary with size 1280.4 KiB
21/10/28 06:22:31 WARN DAGScheduler: Broadcasting large task binary with size 1335.3 KiB
21/10/28 06:22:37 WARN DAGScheduler: Broadcasting large task binary with size 1390.3 KiB
21/10/28 06:22:44 WARN DAGScheduler: Broadcasting large task binary with size 1445.2 KiB
21/10/28 06:22:51 WARN DAGScheduler: Broadcasting large task binary with size 1500.1 KiB
21/10/28 06:22:59 WARN DAGScheduler: Broadcasting large task binary with size 1555.0 KiB
21/10/28 06:23:06 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190320-user.parquet (5132220 rows) in 32.85 minutes


21/10/28 06:54:58 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 06:55:05 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 06:55:11 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 06:55:19 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 06:55:26 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 06:55:34 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/28 06:55:40 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 06:55:49 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 06:55:56 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 06:56:04 WARN DAGScheduler: Broadcasting large task binary with size 1500.2 KiB
21/10/28 06:56:12 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 06:56:20 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190321-user.parquet (5132829 rows) in 33.62 minutes


21/10/28 07:29:22 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 07:29:29 WARN DAGScheduler: Broadcasting large task binary with size 1060.9 KiB
21/10/28 07:29:36 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 07:29:44 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 07:29:53 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 07:30:01 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/28 07:30:08 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 07:30:16 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 07:30:24 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 07:30:31 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/28 07:30:39 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 07:30:46 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190322-user.parquet (5133525 rows) in 34.98 minutes


21/10/28 08:04:06 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 08:04:13 WARN DAGScheduler: Broadcasting large task binary with size 1060.9 KiB
21/10/28 08:04:20 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 08:04:27 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 08:04:33 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 08:04:41 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/28 08:04:50 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 08:04:57 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 08:05:04 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 08:05:12 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/28 08:05:19 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 08:05:27 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190323-user.parquet (5134235 rows) in 35.00 minutes


21/10/28 08:39:42 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 08:39:49 WARN DAGScheduler: Broadcasting large task binary with size 1060.9 KiB
21/10/28 08:39:56 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 08:40:03 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 08:40:09 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 08:40:17 WARN DAGScheduler: Broadcasting large task binary with size 1280.6 KiB
21/10/28 08:40:25 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 08:40:31 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 08:40:39 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 08:40:46 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/28 08:40:54 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 08:41:03 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190324-user.parquet (5134882 rows) in 36.23 minutes


21/10/28 09:17:06 WARN DAGScheduler: Broadcasting large task binary with size 1006.0 KiB
21/10/28 09:17:14 WARN DAGScheduler: Broadcasting large task binary with size 1060.9 KiB
21/10/28 09:17:22 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 09:17:29 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 09:17:36 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 09:17:43 WARN DAGScheduler: Broadcasting large task binary with size 1280.6 KiB
21/10/28 09:17:50 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 09:17:57 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 09:18:05 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 09:18:12 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/28 09:18:22 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 09:18:29 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190325-user.parquet (5135522 rows) in 38.14 minutes


21/10/28 09:55:46 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 09:55:53 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 09:55:59 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 09:56:07 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 09:56:16 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 09:56:24 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/28 09:56:32 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 09:56:40 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 09:56:49 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 09:56:56 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/28 09:57:06 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 09:57:13 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190326-user.parquet (5136045 rows) in 38.95 minutes


21/10/28 10:35:09 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 10:35:18 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 10:35:26 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 10:35:34 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 10:35:42 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 10:35:49 WARN DAGScheduler: Broadcasting large task binary with size 1280.5 KiB
21/10/28 10:35:57 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 10:36:05 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 10:36:14 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 10:36:22 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/28 10:36:29 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 10:36:37 WAR

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190327-user.parquet (5136699 rows) in 40.41 minutes


21/10/28 11:16:36 WARN DAGScheduler: Broadcasting large task binary with size 1005.9 KiB
21/10/28 11:16:45 WARN DAGScheduler: Broadcasting large task binary with size 1060.8 KiB
21/10/28 11:16:53 WARN DAGScheduler: Broadcasting large task binary with size 1115.8 KiB
21/10/28 11:17:02 WARN DAGScheduler: Broadcasting large task binary with size 1170.7 KiB
21/10/28 11:17:12 WARN DAGScheduler: Broadcasting large task binary with size 1225.6 KiB
21/10/28 11:17:21 WARN DAGScheduler: Broadcasting large task binary with size 1280.6 KiB
21/10/28 11:17:31 WARN DAGScheduler: Broadcasting large task binary with size 1335.5 KiB
21/10/28 11:17:39 WARN DAGScheduler: Broadcasting large task binary with size 1390.4 KiB
21/10/28 11:17:50 WARN DAGScheduler: Broadcasting large task binary with size 1445.3 KiB
21/10/28 11:17:58 WARN DAGScheduler: Broadcasting large task binary with size 1500.3 KiB
21/10/28 11:18:06 WARN DAGScheduler: Broadcasting large task binary with size 1555.2 KiB
21/10/28 11:18:14 WAR



21/10/28 11:25:27 WARN DAGScheduler: Broadcasting large task binary with size 3.7 MiB
21/10/28 11:25:38 WARN DAGScheduler: Broadcasting large task binary with size 3.7 MiB
21/10/28 11:25:53 WARN DAGScheduler: Broadcasting large task binary with size 3.8 MiB
21/10/28 11:26:06 WARN DAGScheduler: Broadcasting large task binary with size 3.8 MiB
21/10/28 11:26:18 WARN DAGScheduler: Broadcasting large task binary with size 3.9 MiB
21/10/28 11:26:31 WARN DAGScheduler: Broadcasting large task binary with size 3.9 MiB
21/10/28 11:26:51 WARN DAGScheduler: Broadcasting large task binary with size 4.0 MiB
21/10/28 11:27:04 WARN DAGScheduler: Broadcasting large task binary with size 4.0 MiB
21/10/28 11:27:19 WARN DAGScheduler: Broadcasting large task binary with size 4.1 MiB
21/10/28 11:27:32 WARN DAGScheduler: Broadcasting large task binary with size 4.1 MiB
21/10/28 11:27:53 WARN DAGScheduler: Broadcasting large task binary with size 4.2 MiB
21/10/28 11:28:08 WARN DAGScheduler: Broadcasting larg

processed ../nvme/pageview_complete_processed/2019/2019-03/pageviews-20190328-user.parquet (5137245 rows) in 42.50 minutes


KeyboardInterrupt: 

In [None]:
sc.stop()