In [1]:
from datetime import datetime, timedelta, timezone
import os
import time
import pandas as pd

from pyspark import SparkContext, StorageLevel
from pyspark.sql import SparkSession
from pyspark.sql.functions import (
    current_user,
    col, collect_list, concat_ws, greatest, lit, lower, when,
    avg as _avg,
    count as _count,
    hex as _hex,
    max as _max,
    min as _min,
    round as _round,
    sum as _sum,
)
from pyspark.sql.types import (
    StructType,
    LongType,
    StringType,
    StructField,
    DoubleType,
    IntegerType,
)

In [2]:
spark = SparkSession\
        .builder\
        .appName('tape-recall-history')\
        .getOrCreate()
spark

24/10/01 21:54:15 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [3]:
# arguments
# secret path, also check if file exists
secretpath = os.environ.get('OPENSEARCH_SECRET_PATH', f'{os.getcwd()}/../workdir/secret_opensearch.txt')
with open(secretpath, 'r') as r:
    pass
# if PROD, index prefix will be `crab-*`, otherwise `crab-test-*`
PROD = os.environ.get('PROD', 'false').lower() in ('true', '1', 't')
# FROM_DATE, in strptime("%Y-%m-%d")
FROM_DATE = os.environ.get('FROM_DATE', None) 
TO_DATE = os.environ.get('TO_DATE', None)

In [4]:
# try to import osearch from current directory, fallback to $PWD/../workdir if not found
try:
    import osearch
except ModuleNotFoundError:
    import sys
    sys.path.insert(0, f'{os.getcwd()}/../workdir')
    import osearch

In [5]:
# notebook variables
# modify value here when run inside notebook
TODAY = "2024-09-28"
#YESTERDAY = (datetime.strptime(TODAY, "%Y-%m-%d") - timedelta(days=1)).strftime("%Y-%m-%d")
YESTERDAY = "2024-09-01"
index_name = 'crab-test-condor-taskdb'

In [6]:
# if cronjob, replace constant with value from env
if TO_DATE and FROM_DATE:
    TODAY = TO_DATE
    YESTERDAY = FROM_DATE
if PROD:
    index_name = f'crab-{"-".join(index_name.split("-")[2:])}'
# for osearch index pattern timestamp
TODAY_datetime = datetime.strptime(TODAY, "%Y-%m-%d").replace(tzinfo=timezone.utc)
YESTERDAY_datetime = datetime.strptime(YESTERDAY, "%Y-%m-%d").replace(tzinfo=timezone.utc)

In [7]:
# debug
print(TODAY) 
print(YESTERDAY)
print(index_name)

2024-09-28
2024-09-01
crab-test-condor-taskdb


In [8]:
def get_candidate_files(start_date, end_date, spark, base, day_delta=1):
    """Returns a list of hdfs folders that can contain data for the given dates.
    """
    st_date = start_date - timedelta(days=day_delta)
    ed_date = end_date + timedelta(days=day_delta)
    days = (ed_date - st_date).days

    sc = spark.sparkContext
    # The candidate files are the folders to the specific dates,
    # but if we are looking at recent days the compaction procedure could
    # have not run yet, so we will consider also the .tmp folders.
    candidate_files = [
        f"{base}/{(st_date + timedelta(days=i)).strftime('%Y/%m/%d')}{{,.tmp}}"
        for i in range(0, days)
    ]
    fsystem = sc._gateway.jvm.org.apache.hadoop.fs.FileSystem
    uri = sc._gateway.jvm.java.net.URI
    path = sc._gateway.jvm.org.apache.hadoop.fs.Path
    fs = fsystem.get(uri("hdfs:///"), sc._jsc.hadoopConfiguration())
    candidate_files = [url for url in candidate_files if fs.globStatus(path(url))]
    return candidate_files
candidate_files = get_candidate_files(YESTERDAY_datetime, TODAY_datetime, spark=spark, base=_DEFAULT_HDFS_FOLDER, day_delta=2)

In [10]:
_DEFAULT_HDFS_FOLDER = "/project/monitoring/archive/condor/raw/metric"
crab_username = spark.sql("""SELECT current_user() AS user""").toPandas().to_dict('records')[0]['user']
candidate_files = get_candidate_files(YESTERDAY_datetime, TODAY_datetime, spark=spark, base=_DEFAULT_HDFS_FOLDER, day_delta=2)
schema = StructType(
        [
            StructField(
                "data",
                StructType(
                    [
                        StructField("RecordTime", LongType(), nullable=False),
                        StructField("CMSPrimaryDataTier", StringType(), nullable=True),
                        StructField("Status", StringType(), nullable=True),
                        StructField("WallClockHr", DoubleType(), nullable=True),
                        StructField("CoreHr", DoubleType(), nullable=True),
                        StructField("CpuTimeHr", DoubleType(), nullable=True),
                        StructField("Type", StringType(), nullable=True),
                        StructField("CRAB_DataBlock", StringType(), nullable=True),
                        StructField("GlobalJobId", StringType(), nullable=False),
                        StructField("ExitCode", LongType(), nullable=True),
                        StructField("CRAB_Workflow", StringType(), nullable=True),
                        StructField("CommittedCoreHr", StringType(), nullable=True),
                        StructField("CommittedWallClockHr", StringType(), nullable=True),
                    ]
                ),
            ),
        ]
    )

In [None]:
( spark.read.option("basePath", _DEFAULT_HDFS_FOLDER)
        .json(
            candidate_files,
            schema=schema,
        ).select("data.*")
        .filter(
            f"""Status IN ('Completed')
            AND Type IN ('analysis')
            AND RecordTime >= {int(YESTERDAY_datetime.timestamp()) * 1000}
            AND RecordTime < {int(TODAY_datetime.timestamp()) * 1000}
            """
        )
        .drop_duplicates(["GlobalJobId"])
        .write.mode('overwrite').parquet(f"/cms/users/{crab_username}/condor_vir_data" ,compression='zstd')
)

In [11]:
condor_df = spark.read.format('parquet').load(f"/cms/users/{crab_username}/condor_vir_data").cache()
condor_df.createOrReplaceTempView("condor")

In [12]:
!hdfs dfs -du -s -h /cms/users/tseethon/condor_vir_data

24/10/01 21:55:12 WARN ipc.Client: Exception encountered while connecting to the server ithdp6013.cern.ch/10.116.10.25:9000
org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.ipc.StandbyException): Operation category READ is not supported in state standby. Visit https://s.apache.org/sbnn-error
	at org.apache.hadoop.security.SaslRpcClient.saslConnect(SaslRpcClient.java:376)
	at org.apache.hadoop.ipc.Client$Connection.setupSaslConnection(Client.java:561)
	at org.apache.hadoop.ipc.Client$Connection.access$2100(Client.java:347)
	at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:783)
	at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:779)
	at java.base/java.security.AccessController.doPrivileged(Native Method)
	at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
	at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:779)
	at org.apache.hadoop.ipc.Cl

In [13]:
HDFS_CRAB_part = f'/project/awg/cms/crab/tasks/{TODAY}/'
crab_df = spark.read.format('avro').load(HDFS_CRAB_part)
crab_df = crab_df.select('TM_TASKNAME', 'TM_IGNORE_LOCALITY').cache()
crab_df.createOrReplaceTempView("tasks")

24/10/01 21:55:13 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


In [14]:
print("==============================================="
      , "Condor Matrix and CRAB Table"
      , "==============================================="
      , "File Directory:", HDFS_CRAB_part, candidate_files
      , "Work Directory:", os.getcwd()
      , "==============================================="
      , "===============================================", sep='\n')

Condor Matrix and CRAB Table
File Directory:
/project/awg/cms/crab/tasks/2024-09-28/
['/project/monitoring/archive/condor/raw/metric/2024/08/30{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/08/31{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/09/01{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/09/02{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/09/03{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/09/04{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/09/05{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/09/06{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/09/07{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/09/08{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/09/09{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/09/10{,.tmp}', '/project/monitoring/archive/condor/raw/metric/2024/09/11{,.tmp}', '/project/monitoring/archive/condor/raw/met

In [15]:
query = f"""\
WITH filter_tb AS (
SELECT *
FROM condor
WHERE 1=1
AND RecordTime >= {int(YESTERDAY_datetime.timestamp()) * 1000}
AND RecordTime < {int(TODAY_datetime.timestamp()) * 1000}
),
join_tb AS (
SELECT RecordTime, CMSPrimaryDataTier, WallClockHr, CoreHr, CpuTimeHr, ExitCode, CRAB_DataBlock, TM_IGNORE_LOCALITY, GlobalJobId, CommittedCoreHr, CommittedWallClockHr
FROM filter_tb
INNER JOIN tasks 
ON filter_tb.CRAB_Workflow = tasks.TM_TASKNAME 
), 
finalize_tb AS (
SELECT RecordTime, CMSPrimaryDataTier, WallClockHr, CoreHr, CpuTimeHr, ExitCode, CRAB_DataBlock, TM_IGNORE_LOCALITY, GlobalJobId, CommittedCoreHr, CommittedWallClockHr,
       CASE 
           WHEN CRAB_DataBlock = 'MCFakeBlock' THEN 'PrivateMC'  
           ELSE 'Analysis'
       END AS CRAB_Type,        --- to differentiate between analysis and mc
       'condor' AS type,        --- use to match specific data when use wildcard index pattern on grafana side
       RecordTime AS timestamp  --- use `RecordTime` as timestamp
FROM join_tb
)
SELECT * 
FROM finalize_tb 
"""
tmpdf = spark.sql(query)
tmpdf.show(10)



+-------------+------------------+-------------------+-------------------+--------------------+--------+--------------------+------------------+--------------------+-------------------+--------------------+---------+------+-------------+
|   RecordTime|CMSPrimaryDataTier|        WallClockHr|             CoreHr|           CpuTimeHr|ExitCode|      CRAB_DataBlock|TM_IGNORE_LOCALITY|         GlobalJobId|    CommittedCoreHr|CommittedWallClockHr|CRAB_Type|  type|    timestamp|
+-------------+------------------+-------------------+-------------------+--------------------+--------+--------------------+------------------+--------------------+-------------------+--------------------+---------+------+-------------+
|1725202677000|        MINIAODSIM|             2.4125|             2.4125|  2.2816666666666667|       0|/ST_tW_top_5f_NoF...|                 F|crab3@vocms0197.c...| 2.4122222222222223|  2.4122222222222223| Analysis|condor|1725202677000|
|1725194678000|        MINIAODSIM|  4.2419444444

In [16]:
tmpdf.count()

12279275

In [17]:
schema = {
            "settings": {"index": {"number_of_shards": "1", "number_of_replicas": "1"}},
            "mappings": {
                "properties": {
                    "RecordTime": {"format": "epoch_millis", "type": "date"},
                    "CMSPrimaryDataTier": {"ignore_above": 2048, "type": "keyword"},
                    "GlobalJobId": {"ignore_above": 2048, "type": "keyword"},
                    "WallClockHr": {"type": "long"},
                    "CoreHr": {"type": "long"},
                    "CpuTimeHr": {"type": "long"},
                    "ExitCode": {"ignore_above": 2048, "type": "keyword"},
                    "TM_IGNORE_LOCALITY": {"ignore_above": 2048, "type": "keyword"},
                    "CRAB_Type": {"ignore_above": 2048, "type": "keyword"},
                    "CRAB_DataBlock": {"ignore_above": 2048, "type": "keyword"},
                    "CommittedCoreHr": {"type": "long"}, 
                    "CommittedWallClockHr": {"type": "long"},
                    "type": {"ignore_above": 2048, "type": "keyword"},
                    "timestamp": {"format": "epoch_millis", "type": "date"},
                }
            }
        }

In [18]:
import importlib
import osearch
importlib.reload(osearch)

<module 'osearch' from '/eos/home-i00/t/tseethon/SWAN_projects/CRABServer/src/script/Monitor/crab-spark/notebooks/../workdir/osearch.py'>

In [22]:
# for 12M rows, number of from 27 days of data is 51, around 250k per partition.
# try reducing partition to 20 once but make python kernel out-of-memory. 
# so, try to keep it around 200k per partition instead.
partition_num = tmpdf.count() // 200000
tmpdf = tmpdf.repartition(partition_num, 'RecordTime')
total_part = tmpdf.rdd.getNumPartitions()

print(f"Number of partition: {total_part}")

Number of partition: 61


In [24]:
# send to es, serialize df one rdd partition at a time
part = 0
for docs in tmpdf.rdd.mapPartitions(lambda p: [[x.asDict() for x in p]]).toLocalIterator():
    part += 1
    print(f"Partition: {part}/{total_part}, Length of partition: {len(docs)}")
    osearch.send_os_parallel(docs, index_name, schema, secretpath, TODAY_datetime.timestamp(), 20000) # batch_size is just arbitrary number

Partition: 1/61, Length of partition: 204764




FINISHED : 
204764
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 2/61, Length of partition: 199365




FINISHED : 
199365
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 3/61, Length of partition: 201530




FINISHED : 
201530
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 4/61, Length of partition: 202556




FINISHED : 
202556
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 5/61, Length of partition: 202104




FINISHED : 
202104
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 6/61, Length of partition: 199419




FINISHED : 
199419
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 7/61, Length of partition: 201184




FINISHED : 
201184
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 8/61, Length of partition: 199767




FINISHED : 
199767
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 9/61, Length of partition: 201153




FINISHED : 
201153
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 10/61, Length of partition: 202183




FINISHED : 
202183
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 11/61, Length of partition: 201815




FINISHED : 
201815
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 12/61, Length of partition: 201467




FINISHED : 
201467
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 13/61, Length of partition: 199033




FINISHED : 
199033
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 14/61, Length of partition: 199696




FINISHED : 
199696
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 15/61, Length of partition: 203013




FINISHED : 
203013
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 16/61, Length of partition: 202106




FINISHED : 
202106
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 17/61, Length of partition: 202705




FINISHED : 
202705
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 18/61, Length of partition: 199202




FINISHED : 
199202
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 19/61, Length of partition: 202319




FINISHED : 
202319
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 20/61, Length of partition: 202814




FINISHED : 
202814
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 21/61, Length of partition: 199502




FINISHED : 
199502
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 22/61, Length of partition: 201234




FINISHED : 
201234
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 23/61, Length of partition: 200980




FINISHED : 
200980
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 24/61, Length of partition: 199865




FINISHED : 
199865
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 25/61, Length of partition: 198497




FINISHED : 
198497
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 26/61, Length of partition: 201931




FINISHED : 
201931
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 27/61, Length of partition: 201691




FINISHED : 
201691
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 28/61, Length of partition: 202256




FINISHED : 
202256
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 29/61, Length of partition: 199494




FINISHED : 
199494
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 30/61, Length of partition: 200530




FINISHED : 
200530
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 31/61, Length of partition: 201356




FINISHED : 
201356
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 32/61, Length of partition: 202043




FINISHED : 
202043
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 33/61, Length of partition: 203705




FINISHED : 
203705
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 34/61, Length of partition: 200753




FINISHED : 
200753
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 35/61, Length of partition: 202939




FINISHED : 
202939
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 36/61, Length of partition: 202580




FINISHED : 
202580
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 37/61, Length of partition: 199643




FINISHED : 
199643
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 38/61, Length of partition: 200953




FINISHED : 
200953
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 39/61, Length of partition: 202577




FINISHED : 
202577
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 40/61, Length of partition: 203933




FINISHED : 
203933
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 41/61, Length of partition: 202203




FINISHED : 
202203
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 42/61, Length of partition: 202319




FINISHED : 
202319
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 43/61, Length of partition: 199503




FINISHED : 
199503
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 44/61, Length of partition: 201095




FINISHED : 
201095
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 45/61, Length of partition: 199263




FINISHED : 
199263
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 46/61, Length of partition: 203179




FINISHED : 
203179
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 47/61, Length of partition: 200323




FINISHED : 
200323
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 48/61, Length of partition: 201415




FINISHED : 
201415
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 49/61, Length of partition: 198931




FINISHED : 
198931
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 50/61, Length of partition: 201919




FINISHED : 
201919
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 51/61, Length of partition: 199097




FINISHED : 
199097
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 52/61, Length of partition: 203128




FINISHED : 
203128
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 53/61, Length of partition: 200300




FINISHED : 
200300
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 54/61, Length of partition: 204396




FINISHED : 
204396
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 55/61, Length of partition: 201850




FINISHED : 
201850
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 56/61, Length of partition: 201062




FINISHED : 
201062
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 57/61, Length of partition: 200363




FINISHED : 
200363
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 58/61, Length of partition: 202871




FINISHED : 
202871
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 59/61, Length of partition: 201124




FINISHED : 
201124
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 60/61, Length of partition: 200064




FINISHED : 
200064
ROWS ARE SENT
0
ROWS ARE FAILED
Partition: 61/61, Length of partition: 200213




FINISHED : 
200213
ROWS ARE SENT
0
ROWS ARE FAILED


In [26]:
print("Done!")

Done!
