# Clusterlogs Notebook

In [1]:
import pandas as pd
from clusterlogs import pipeline
import numpy as np

## 1. Download data from file and create pandas DataFrame with index 

In [2]:
df = pd.read_csv('samples/harvester_errors24.csv', sep=';')

In [14]:
df = pd.read_csv('/Users/maria/cernbox/LogsClusterization/Harvester/data_sample.csv', sep='\t')

In [15]:
df.dropna(inplace=True)

In [16]:
df.shape

(5726, 3)

In [17]:
df.head()

Unnamed: 0.1,Unnamed: 0,id,message
0,0,CERN_central_A|118722113,Condor HoldReason: None ; Condor RemoveReason:...
1,1,CERN_central_B|133202736,Payload execution error: returned non-zero
2,2,CERN_central_B|133202747,Payload execution error: returned non-zero
3,3,CERN_central_B|133201846,Payload execution error: returned non-zero
4,4,CERN_central_B|133201823,Payload execution error: returned non-zero


In [18]:
target = 'message'

In [19]:
len(np.unique(df['message'].values))

369

## 2. Execute clusterization pipeline

In [24]:
cluster = pipeline.Chain(df, target, mode='process', model_name='harvester_test.model', matching_accuracy=0.8)

In [25]:
cluster.process()

Initial size of vocabulary: 610
Size of vocabulary after removing tokens that appears only once: 223
Size of vocabulary after removing rare tokens: 174
Tokenization finished
Found 51 equal groups
Matching Clusterization!
Postprocessed with 39 clusters


In [26]:
cluster.groups['pattern'].values

array(['Condor HoldReason: CREAM_Delegate Error: Received NULL fault; the error is due to another cause: FaultString=[connection error] - FaultCode=[SOAP-ENV:Client] - FaultSubCode=[SOAP-ENV:Client] - FaultDetail=[Connection timed out] ; Worker canceled by harves',
       'Condor HoldReason: CREAM error: BLAH error: submission command failed (exit code = 1) (stdout:) (stderr:qsub: Maximum number of jobs already in queue MSG=total number of jobs in queue exceeds the queue limit: user atlasp@lapp-ce01.in2p3.fr, queue atlasMC8-',
       'Condor HoldReason: CREAM error: BLAH error: submission command failed (exit code = 1) (stdout:) (stderr:qsub: Maximum number of jobs already in queue MSG=total number of jobs in queue exceeds the queue limit: user atlasp@lapp-ce03.in2p3.fr, queue atlasMC8-',
       'Condor HoldReason: CREAM error: BLAH error: submission command failed (exit code = 1) (stdout:) (stderr:qsub: submit error (Maximum number of jobs already in queue MSG=total number of jobs in 

## 3. Result: all clusters (big clusters and outliers) - sorted by cluster size 

In [27]:
cluster.result

Unnamed: 0,pattern,indices,cluster_size,sequence
11,Condor HoldReason: HTCondor-CE held job due to...,"[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1...",951,"[Condor, ▁, HoldReason, :, ▁, HTCondor-CE, ▁, ..."
28,Condor HoldReason: CREAM error:｟*｠ ; Worker ca...,"[1163, 1277, 1494, 1661, 1948, 1950, 1951, 195...",873,"[Condor, ▁, HoldReason, :, ▁, CREAM, ▁, error,..."
21,Payload execution error: returned non-zero,"[1, 2, 3, 4, 92, 93, 94, 95, 96, 97, 98, 99, 1...",741,"[Payload, ▁, execution, ▁, error, :, ▁, return..."
35,Condor HoldReason: CREAM error: BLAH error: su...,"[71, 72, 73, 76, 184, 185, 187, 189, 190, 192,...",673,"[Condor, ▁, HoldReason, :, ▁, CREAM, ▁, error,..."
2,Condor HoldReason: None ; Condor RemoveReason:...,"[0, 22, 114, 115, 116, 117, 118, 126, 127, 128...",667,"[Condor, ▁, HoldReason, :, ▁, None, ▁, ;, ▁, C..."
13,Condor HoldReason: The system macro SYSTEM_PER...,"[217, 218, 248, 249, 250, 251, 252, 253, 254, ...",532,"[Condor, ▁, HoldReason, :, ▁, The, ▁, system, ..."
38,｟*｠ not submitted due to incomplete data of th...,"[64, 65, 66, 123, 124, 133, 134, 135, 136, 137...",308,"[｟*｠, ▁, not, ▁, submitted, ▁, due, ▁, to, ▁, ..."
4,Condor HoldReason:｟*｠ ; Worker canceled by har...,"[58, 59, 60, 61, 62, 63, 122, 230, 368, 369, 3...",183,"[Condor, ▁, HoldReason, :, ▁, ｟*｠, ▁, ｟*｠, ▁, ..."
20,LRMS error: (271) job killed:｟*｠,"[446, 1098, 1135, 1493, 1705, 1729, 2146, 2486...",159,"[LRMS, ▁, error, :, ▁, (, 271, ), ▁, job, ▁, k..."
8,Condor HoldReason: Unspecified gridmanager err...,"[5, 119, 131, 132, 222, 223, 226, 227, 279, 28...",132,"[Condor, ▁, HoldReason, :, ▁, Unspecified, ▁, ..."


In [11]:
cluster.in_cluster(cluster.result, 5)

array(['Failed in data staging: Failed to prepare destination srm://srm.ndgf.org:8443/srm/managerv2?SFN=/atlas/disk/atlasdatadisk/rucio/mc16_13TeV/e8/e9/AOD.19967297._000199.pool.root.1:checksumtype=adler32:checksumvalue=2b028a5e: Failed to prepare destination: Fi'],
      dtype=object)

### Print only patterns

In [13]:
cluster.result['pattern'].values

array(['Payload execution error: returned non-zero',
       'JOB id=｟*｠ not found',
       'Condor HoldReason: CREAM error: BLAH error: submission command failed (exit code = 1) (stdout:) (stderr:qsub: Maximum number of jobs already in queue MSG=total number of jobs in queue exceeds the queue limit: user｟*｠@｟*｠ queue｟*｠',
       'submission failed: Exception OSError: [Errno 28] No space left on device',
       "Condor HoldReason: HTCondor-CE held job due to no matching routes, route job limit, or route failure threshold; see 'HTCondor-CE Troubleshooting Guide'",
       'LRMS error: (271) job killed:｟*｠',
       'LRMS error: (-1) Job finished with unknown exit code',
       'Condor HoldReason: CREAM error: Transfer failed: globus_ftp_client: the server responded with an error 500 500-Command failed. : globus_l_gfs_file_open failed. 500-globus_xio: Unable to open file /｟*｠/cream_sandbox/｟*｠/｟*｠',
       '｟*｠ not submitted due to incomplete data of the worker',
       'Condor HoldReason:｟

In [27]:
cluster.in_cluster(cluster.result, 43)

KeyError: 43

### Split clusters to big (cluster_size >= 1000) and small (cluster_size < 1000)

In [None]:
big, small = cluster.split_clusters(cluster.result, 'cluster_size', 1000)

In [None]:
big

In [None]:
small

### Print all messages from cluster #40

In [None]:
cluster.in_cluster(clusters, 40)

### Display the performance of all stages

In [None]:
cluster.timings