# Clusterlogs Notebook

In [1]:
import pandas as pd
from clusterlogs import pipeline

## 1. Download data from file and create pandas DataFrame with index 

In [2]:
df = pd.read_csv('samples/harvester_errors24.csv', sep=';')

In [3]:
df.dropna(inplace=True)

In [4]:
df.shape

(75058, 2)

In [5]:
df.head()

Unnamed: 0,id,message
0,CERN_central_B|111388583,Payload execution error: returned non-zero
1,CERN_central_B|111388549,Payload execution error: returned non-zero
2,CERN_central_B|111388581,Payload execution error: returned non-zero
3,CERN_central_B|111388573,Payload execution error: returned non-zero
4,CERN_central_B|111389035,Payload execution error: returned non-zero


In [6]:
target = 'message'

## 2. Execute clusterization pipeline

In [7]:
cluster = pipeline.Chain(df, target, mode='create', model_name='harvester_test.model', matching_accuracy=0.7)

In [8]:
cluster.process()

Tokenization finished
Found 1343 equal groups
Postprocessed with 88 clusters


## 3. Result: all clusters (big clusters and outliers) - sorted by cluster size 

In [18]:
cluster.result

Unnamed: 0,pattern,indices,cluster_size
79,Payload execution error: returned non-zero,"[0, 1, 2, 3, 4, 5, 8, 9, 10, 12, 13, 14, 15, 1...",21436
67,JOB id=｟*｠ not found,"[44, 45, 46, 47, 56, 57, 58, 83, 84, 94, 95, 9...",10838
3,Condor HoldReason: CREAM error: BLAH error: su...,"[31, 33, 59, 60, 61, 88, 90, 198, 209, 232, 23...",9202
81,submission failed: Exception OSError: [Errno 2...,"[55, 62, 63, 64, 65, 66, 67, 108, 109, 116, 11...",7771
69,LRMS error: ()｟*｠,"[133, 381, 1030, 1065, 1100, 1101, 1105, 1110,...",4289
...,...,...,...
62,Failed in data staging: Transfer｟*｠,[37992],1
78,LRMS error: (｟*｠) Job was cancelled,[6219],1
55,Failed in data staging: Failed reading from so...,[53099],1
56,Failed in data staging: Failed to prepare dest...,[41731],1


### Print only patterns

In [19]:
cluster.result['pattern'].values

array(['Payload execution error: returned non-zero',
       'JOB id=｟*｠ not found',
       'Condor HoldReason: CREAM error: BLAH error: submission command failedexit code = 1)stdout:)stderr:qsub: number of jobs already in｟*｠ MSG=total number of jobs in｟*｠ exceeds the｟*｠ limit: user@｟*｠',
       'submission failed: Exception OSError: [Errno 28] No space left on device',
       'LRMS error: ()｟*｠',
       'Condor HoldReason:｟*｠ ; Worker canceled by harvester due to held too long or not',
       'LRMS error: ()｟*｠',
       "Condor HoldReason: HTCondor-CE｟*｠ due to no matching routes,｟*｠ limit,｟*｠ failure threshold; see 'HTCondor-CE Troubleshooting Guide' ; Worker canceled by harvester due to｟*｠ too long｟*｠ not found",
       'Condor HoldReason: CREAM error: Transfer failed: globus_ftp_client: the server responded with an error 500 500-Command failed : failed 500-globus_xio: Unable to｟*｠',
       '｟*｠ not submitted due to incomplete data of the worker',
       'Failed in data staging: Fail

### Split clusters to big (cluster_size >= 1000) and small (cluster_size < 1000)

In [21]:
big, small = cluster.split_clusters(cluster.result, 'cluster_size', 1000)

In [22]:
big

Unnamed: 0,pattern,indices,cluster_size
79,Payload execution error: returned non-zero,"[0, 1, 2, 3, 4, 5, 8, 9, 10, 12, 13, 14, 15, 1...",21436
67,JOB id=｟*｠ not found,"[44, 45, 46, 47, 56, 57, 58, 83, 84, 94, 95, 9...",10838
3,Condor HoldReason: CREAM error: BLAH error: su...,"[31, 33, 59, 60, 61, 88, 90, 198, 209, 232, 23...",9202
81,submission failed: Exception OSError: [Errno 2...,"[55, 62, 63, 64, 65, 66, 67, 108, 109, 116, 11...",7771
69,LRMS error: ()｟*｠,"[133, 381, 1030, 1065, 1100, 1101, 1105, 1110,...",4289
17,Condor HoldReason:｟*｠ ; Worker canceled by har...,"[966, 2367, 2899, 3339, 3615, 3663, 3667, 3669...",3245
74,LRMS error: ()｟*｠,"[241, 841, 870, 871, 872, 873, 1016, 1047, 104...",2985
36,Condor HoldReason: HTCondor-CE｟*｠ due to no ma...,"[35, 36, 37, 38, 39, 40, 41, 52, 53, 54, 110, ...",2964
19,Condor HoldReason: CREAM error: Transfer faile...,"[70663, 65803, 67254, 71232, 71293, 71911, 723...",2192
84,｟*｠ not submitted due to incomplete data of th...,"[6, 7, 120, 121, 122, 143, 329, 330, 382, 383,...",1981


In [23]:
small

Unnamed: 0,pattern,indices,cluster_size
14,Condor HoldReason: CREAM error:｟*｠ ; Worker ca...,"[32923, 1844, 2223, 2363, 2365, 2368, 2369, 23...",599
15,Condor HoldReason: CREAM error: Cannot move IS...,"[466, 716, 1277, 2286, 2302, 2322, 2545, 2612,...",476
39,Condor HoldReason: The system macro SYSTEM_PER...,"[51, 82, 206, 560, 561, 1331, 1508, 1715, 1755...",447
12,Condor HoldReason: Error: Received NULL fault;...,"[1743, 2013, 2014, 2015, 5277, 5278, 9316, 121...",346
9,Condor HoldReason: CREAM error: CREAM_Job_Regi...,"[6612, 10117, 27209, 30719, 5393, 8640, 27189,...",336
...,...,...,...
62,Failed in data staging: Transfer｟*｠,[37992],1
78,LRMS error: (｟*｠) Job was cancelled,[6219],1
55,Failed in data staging: Failed reading from so...,[53099],1
56,Failed in data staging: Failed to prepare dest...,[41731],1


### Print all messages from cluster #40

In [16]:
cluster.in_cluster(clusters, 40)

array(["Condor HoldReason: The system macro SYSTEM_PERIODIC_HOLD expression '(x509userproxysubject =?= undefined) || (x509UserProxyExpiration =?= undefined) || ((time() > x509UserProxyExpiration) && JobStatus =!= 3 && JobStatus =!= 4) || (RoutedBy =?= null && JobU",
       "Condor HoldReason: The system macro SYSTEM_PERIODIC_HOLD expression '(x509userproxysubject =?= undefined) || (x509UserProxyExpiration =?= undefined) || ((time() > x509UserProxyExpiration) && JobStatus =!= 3 && JobStatus =!= 4) || (RoutedBy =?= null && JobU",
       "Condor HoldReason: The system macro SYSTEM_PERIODIC_HOLD expression '(x509userproxysubject =?= undefined) || (x509UserProxyExpiration =?= undefined) || ((time() > x509UserProxyExpiration) && JobStatus =!= 3 && JobStatus =!= 4) || (RoutedBy =?= null && JobU",
       ...,
       "Condor HoldReason: The system macro SYSTEM_PERIODIC_HOLD expression '(x509userproxysubject =?= undefined) || (x509UserProxyExpiration =?= undefined) || ((time() > x509UserProxyExp

### Display the performance of all stages

In [17]:
cluster.timings

{'tokenization': 11.5561, 'group_equals': 1.1382, 'process': 13.6269}