# Clusterlogs Notebook

In [1]:
import pandas as pd
from clusterlogs import pipeline
import numpy as np

## 1. Download data from file and create pandas DataFrame with index 

In [None]:
df = pd.read_csv('samples/harvester_errors24.csv', sep=';')

In [2]:
df = pd.read_csv('/Users/maria/cernbox/LogsClusterization/Harvester/data_sample.csv', sep='\t')

In [3]:
df.dropna(inplace=True)

In [4]:
df.shape

(5726, 3)

In [5]:
df.head()

Unnamed: 0.1,Unnamed: 0,id,message
0,0,CERN_central_A|118722113,Condor HoldReason: None ; Condor RemoveReason:...
1,1,CERN_central_B|133202736,Payload execution error: returned non-zero
2,2,CERN_central_B|133202747,Payload execution error: returned non-zero
3,3,CERN_central_B|133201846,Payload execution error: returned non-zero
4,4,CERN_central_B|133201823,Payload execution error: returned non-zero


In [6]:
target = 'message'

In [7]:
len(np.unique(df['message'].values))

369

## 2. Execute clusterization pipeline

In [8]:
cluster = pipeline.Chain(df, target, mode='process', model_name='harvester_test.model', matching_accuracy=0.7)

In [9]:
cluster.process()

Tokenization finished
Found 55 equal groups
Matching Clusterization!
Postprocessed with 31 clusters


In [10]:
cluster.groups['pattern'].values

array(['Condor HoldReason: CREAM error: BLAH error: submission command failed (exit code = 1) (stdout:) (stderr:qsub: Maximum number of jobs already in queue MSG=total number of jobs in queue exceeds the queue limit: user atlasp@lapp-ce01.in2p3.fr, queue atlasMC8-',
       'Condor HoldReason: CREAM error: BLAH error: submission command failed (exit code = 1) (stdout:) (stderr:qsub: Maximum number of jobs already in queue MSG=total number of jobs in queue exceeds the queue limit: user atlasp@lapp-ce03.in2p3.fr, queue atlasMC8-',
       'Condor HoldReason: CREAM error: BLAH error: submission command failed (exit code = 1) (stdout:) (stderr:qsub: submit error (Maximum number of jobs already in queue MSG=total number of jobs in queue exceeds the queue limit: user atlasprodusr01@localhost, qu',
       'Condor HoldReason: CREAM error: CREAM_Job_Register Error: MethodName=[jobRegister] ErrorCode=[0] Description=[The CREAM service cannot accept jobs at the moment] FaultCause=[Submissions are d

## 3. Result: all clusters (big clusters and outliers) - sorted by cluster size 

In [11]:
cluster.result

Unnamed: 0,pattern,indices,cluster_size
4,Condor HoldReason:｟*｠:｟*｠ ; Worker canceled by...,"[1163, 1661, 1948, 1950, 1951, 1952, 1953, 272...",1199
10,Condor HoldReason: HTCondor-CE held job due to...,"[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1...",951
30,Payload execution error: returned non-zero,"[1, 2, 3, 4, 92, 93, 94, 95, 96, 97, 98, 99, 1...",741
0,Condor HoldReason: CREAM error: BLAH error: su...,"[71, 72, 73, 76, 184, 185, 187, 189, 190, 192,...",673
12,Condor HoldReason: None ; Condor RemoveReason:...,"[0, 22, 114, 115, 116, 117, 118, 126, 127, 128...",667
15,Condor HoldReason: The system macro SYSTEM_PER...,"[217, 218, 248, 249, 250, 251, 252, 253, 254, ...",532
29,｟*｠ not submitted due to incomplete data of th...,"[64, 65, 66, 123, 124, 133, 134, 135, 136, 137...",308
27,LRMS error: (271)｟*｠,"[158, 160, 183, 282, 435, 577, 658, 666, 667, ...",237
6,Condor HoldReason: CREAM error: Transfer faile...,"[741, 283, 284, 292, 685, 1123, 1345, 1648, 20...",112
8,Condor HoldReason: CREAM error: reason=｟*｠,"[2371, 5218, 648, 649, 650, 657, 927, 1115, 11...",41


In [12]:
cluster.in_cluster(cluster.result, 5)

array(['Condor HoldReason: CREAM error: Job has been terminated (got SIGTERM); Cannot upload file:///home/gridpools/atlaspilot0071/home_cream_668136194/CREAM668136194/grid.17552583.8.out into gsiftp://calc2.t1.grid.kiae.ru/var/cream_sandbox/atlaspilot/CN_Robot__AT'],
      dtype=object)

### Print only patterns

In [13]:
cluster.result['pattern'].values

array(['Condor HoldReason:｟*｠:｟*｠ ; Worker canceled by harvester due to held too long or not found',
       "Condor HoldReason: HTCondor-CE held job due to no matching routes, route job limit, or route failure threshold; see 'HTCondor-CE Troubleshooting Guide' ; Worker canceled by harvester due to held too long or not found",
       'Payload execution error: returned non-zero',
       'Condor HoldReason: CREAM error: BLAH error: submission command failed (exit code = 1) (stdout:) (stderr:qsub: Maximum number of jobs already in queue MSG=total number of jobs in queue exceeds the queue limit: user｟*｠@｟*｠, queue｟*｠',
       'Condor HoldReason: None ; Condor RemoveReason: removed by SYSTEM_PERIODIC_REMOVE due to job restarted undesirably.',
       "Condor HoldReason: The system macro SYSTEM_PERIODIC_HOLD expression '(x509userproxysubject =?= undefined) | (x509UserProxyExpiration =?= undefined) | (time() > x509UserProxyExpiration) & JobStatus =!= 3 & JobStatus =!= 4) | (RoutedBy =?= null & 

In [27]:
cluster.in_cluster(cluster.result, 43)

KeyError: 43

### Split clusters to big (cluster_size >= 1000) and small (cluster_size < 1000)

In [None]:
big, small = cluster.split_clusters(cluster.result, 'cluster_size', 1000)

In [None]:
big

In [None]:
small

### Print all messages from cluster #40

In [None]:
cluster.in_cluster(clusters, 40)

### Display the performance of all stages

In [None]:
cluster.timings