In [1]:
import pandas as pd
import multiprocessing
import pickle
import numpy as np
import difflib
import regex as re

from math import sqrt
from time import time
from sklearn.cluster import DBSCAN, AgglomerativeClustering, KMeans
from sklearn.neighbors import NearestNeighbors
from gensim.models import Word2Vec
from kneed import KneeLocator

In [2]:
pd.set_option('max_colwidth', 800)

In [3]:
cpu_number = multiprocessing.cpu_count()

w2v_window= 7

In [4]:
solver_total_errors_df= pd.read_csv('solver-error-clean-data.csv')

with open("solver-errors-clean-clustering-data.txt", "rb") as fp:
    clean_clustering_data = pickle.load(fp)

In [5]:
def detect_embedding_size(tokens):
    flat_list = [item for row in tokens for item in row]
    vocab = set(flat_list)
    embedding_size = round(len(vocab) ** (2/3))
    if embedding_size >= 400:
        embedding_size = 400
    return embedding_size

w2v_size = detect_embedding_size(clean_clustering_data)
w2v_size

26

In [6]:
def tokens_vectorization(clustering_data, w2v_size, w2v_window, cpu_number, model_name):
    iterations = 100
    word2vec = Word2Vec(clustering_data,
                           size = w2v_size, 
                           window = w2v_window, 
                           min_count=1, 
                           workers = cpu_number,
                           iter=iterations)
    word2vec.save(model_name)
    return word2vec

In [7]:
word2vec = tokens_vectorization(clean_clustering_data, 
                                 w2v_size = w2v_size, 
                                 w2v_window= w2v_window, 
                                 cpu_number = cpu_number, 
                                 model_name='word2vec.model')

In [8]:
def sentence_vectorization(clustering_data, word2vec):
    sent2vec = []
    for sent in clustering_data:
        sent_vec = []
        numw = 0
        for w in sent:
            try:
                sent_vec = word2vec[w] if numw == 0 else np.add(sent_vec, word2vec[w])
                numw += 1
            except Exception:
                pass
        sent2vec.append(np.asarray(sent_vec) / numw)   
    return np.vstack(sent2vec)

In [9]:
sent2vec = sentence_vectorization(clean_clustering_data, word2vec)

  


In [10]:
sent2vec.shape

(1624, 26)

In [11]:
def kneighbors(sent2vec):
    k = round(sqrt(len(sent2vec)))
    neigh = NearestNeighbors(n_neighbors=k)
    nbrs = neigh.fit(sent2vec)
    distances, indices = nbrs.kneighbors(sent2vec)
    distances = [np.mean(d) for d in np.sort(distances, axis=0)]
    return distances

avg_distances = kneighbors(sent2vec)

In [12]:
def epsilon_search(distances):
    kneedle = KneeLocator(distances, list(range(len(distances))))
    epsilon = max(kneedle.all_elbows) if (len(kneedle.all_elbows) > 0) else 1
    return epsilon


def hierarchical(epsilon, sent2vec):
        cluster_labels = AgglomerativeClustering(n_clusters=None,
                                                 distance_threshold=epsilon).fit_predict(sent2vec)
        return cluster_labels

In [13]:
epsilon = epsilon_search(avg_distances)
cluster_labels = hierarchical(epsilon, sent2vec)

In [14]:
cluster_labels

array([13, 28, 37, ..., 15,  7, 13])

In [15]:
solver_total_errors_df['cluster'] = cluster_labels

In [16]:
solver_total_errors_df.head(10)

Unnamed: 0,index,package_name,package_version,index_url,type,command,message,return_code,stderr,stdout,timeout,Error_info,command_info,cwd,Complete_output,ERROR,specific_error,clustering_data,clean_clustering_data,cluster
0,0,tryton,3.2.20,https://pypi.org/simple,command_error,"/home/solver/venv/bin/python3 -m pip install --force-reinstall --no-cache-dir --no-deps tryton==3.2.20 --index-url ""https://pypi.org/simple"" --trusted-host pypi.org","Command exited with non-zero status code (1): ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-_gczw75i/tryton/setup.py'""'""'; __file__='""'""'/tmp/pip-install-_gczw75i/tryton/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-_gczw75i/tryton/pip-egg-info\n cwd: /tmp/pip-install-_gczw75i/tryton/\n Complete output (6 lines):\n Traceback (most recent call last):\n File ""<string>"", line 1, in <module>\n File ""/tmp/pip-install-_gczw75i/tryton/setup.py"", line 289\n print ""makensis.exe n...",1.0,"ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-_gczw75i/tryton/setup.py'""'""'; __file__='""'""'/tmp/pip-install-_gczw75i/tryton/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-_gczw75i/tryton/pip-egg-info\n cwd: /tmp/pip-install-_gczw75i/tryton/\n Complete output (6 lines):\n Traceback (most recent call last):\n File ""<string>"", line 1, in <module>\n File ""/tmp/pip-install-_gczw75i/tryton/setup.py"", line 289\n print ""makensis.exe not found: installers can not be created, ""\\n ...",Collecting tryton==3.2.20\n Downloading https://files.pythonhosted.org/packages/8d/b6/a88e6e4df1557cf3f265b12fd585f36e212fd8669aa394c9fbb5a86e56d3/tryton-3.2.20.tar.gz (811kB)\n,60.0,Command exited with non zero status code 1 ERROR Command errored out with exit status 1,['command home solver venv bin python3 c import sys setuptools tokenize sys argv 0 tmp pip install gczw75i tryton setup py file tmp pip install gczw75i tryton setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec egg info egg base tmp pip install gczw75i tryton pip egg info'],['cwd tmp pip install gczw75i tryton'],"['Complete output 6 lines', 'Traceback most recent call last', 'File string line 1 in module', 'File tmp pip install gczw75i tryton setup py line 289', 'print makensis exe not found installers can not be created', '', 'SyntaxError Missing parentheses in call to print Did you mean print makensis exe not found installers can not be created']",['ERROR Command errored out with exit status 1 python setup py egg info Check the logs for full command output'],['SyntaxError Missing parentheses in call to print Did you mean print makensis exe not found installers can not be created'],SyntaxError,['SyntaxError'],13
1,1,pyobjc-framework-contacts,4.0b1,https://pypi.org/simple,command_error,"/home/solver/venv/bin/python3 -m pip install --force-reinstall --no-cache-dir --no-deps pyobjc-framework-contacts==4.0b1 --index-url ""https://pypi.org/simple"" --trusted-host pypi.org","Command exited with non-zero status code (1): ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-t57p8sf3/pyobjc-framework-contacts/setup.py'""'""'; __file__='""'""'/tmp/pip-install-t57p8sf3/pyobjc-framework-contacts/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-t57p8sf3/pyobjc-framework-contacts/pip-egg-info\n cwd: /tmp/pip-install-t57p8sf3/pyobjc-framework-contacts/\n Complete output (15 lines):\n Traceback (most recent call last):\n File ""<string>"", line 1, in <module>\n File ""/tmp/p...",1.0,"ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-t57p8sf3/pyobjc-framework-contacts/setup.py'""'""'; __file__='""'""'/tmp/pip-install-t57p8sf3/pyobjc-framework-contacts/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-t57p8sf3/pyobjc-framework-contacts/pip-egg-info\n cwd: /tmp/pip-install-t57p8sf3/pyobjc-framework-contacts/\n Complete output (15 lines):\n Traceback (most recent call last):\n File ""<string>"", line 1, in <module>\n File ""/tmp/pip-install-t57p8sf3/pyobjc-framework-contacts/...",Collecting pyobjc-framework-contacts==4.0b1\n Downloading https://files.pythonhosted.org/packages/ae/af/03f444de5ef55bb037412d25f696e037fe8c132192a96c0e8e0a167157d5/pyobjc-framework-Contacts-4.0b1.tar.gz\n,60.0,Command exited with non zero status code 1 ERROR Command errored out with exit status 1,['command home solver venv bin python3 c import sys setuptools tokenize sys argv 0 tmp pip install t57p8sf3 pyobjc framework contacts setup py file tmp pip install t57p8sf3 pyobjc framework contacts setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec egg info egg base tmp pip install t57p8sf3 pyobjc framework contacts pip egg info'],['cwd tmp pip install t57p8sf3 pyobjc framework contacts'],"['Complete output 15 lines', 'Traceback most recent call last', 'File string line 1 in module', 'File tmp pip install t57p8sf3 pyobjc framework contacts setup py line 26 in module', 'for fn in os listdir Modules', 'File tmp pip install t57p8sf3 pyobjc framework contacts pyobjc setup py line 375 in Extension', 'os level get os level', 'File tmp pip install t57p8sf3 pyobjc framework contacts pyobjc setup py line 189 in get os level', 'pl plistlib readPlist System Library CoreServices SystemVersion plist', 'File usr lib64 python3 7 plistlib py line 99 in readPlist', 'with maybe open pathOrFile rb as fp', 'File usr lib64 python3 7 contextlib py line 112 in enter', 'return next self gen', 'File usr lib64 python3 7 plistlib py line 82 in ...",['ERROR Command errored out with exit status 1 python setup py egg info Check the logs for full command output'],['FileNotFoundError Errno 2 No such file or directory System Library CoreServices SystemVersion plist'],FileNotFoundError,['FileNotFoundError'],28
2,2,numpy,1.14.0,https://pypi.org/simple,command_error,"/home/solver/venv/bin/python3 -m pip install --force-reinstall --no-cache-dir --no-deps numpy==1.14.0 --index-url ""https://pypi.org/simple"" --trusted-host pypi.org","Command exited with non-zero status code (1): ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-vul_9fm4/numpy/setup.py'""'""'; __file__='""'""'/tmp/pip-install-vul_9fm4/numpy/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' bdist_wheel -d /tmp/pip-wheel-spg1ccfy --python-tag cp37\n cwd: /tmp/pip-install-vul_9fm4/numpy/\n Complete output (306 lines):\n Running from numpy source directory.\n blas_opt_info:\n blas_mkl_info:\n customize UnixCCompiler\n libraries mkl_rt not found in ['/home/solver/venv/lib', '/usr/local/lib64', '/usr/local...",1.0,"ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-vul_9fm4/numpy/setup.py'""'""'; __file__='""'""'/tmp/pip-install-vul_9fm4/numpy/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' bdist_wheel -d /tmp/pip-wheel-spg1ccfy --python-tag cp37\n cwd: /tmp/pip-install-vul_9fm4/numpy/\n Complete output (306 lines):\n Running from numpy source directory.\n blas_opt_info:\n blas_mkl_info:\n customize UnixCCompiler\n libraries mkl_rt not found in ['/home/solver/venv/lib', '/usr/local/lib64', '/usr/local/lib', '/usr/lib64', '/usr/lib']\n NOT AVAI...",Collecting numpy==1.14.0\n Downloading https://files.pythonhosted.org/packages/ee/66/7c2690141c520db08b6a6f852fa768f421b0b50683b7bbcd88ef51f33170/numpy-1.14.0.zip (4.9MB)\nBuilding wheels for collected packages: numpy\n Building wheel for numpy (setup.py): started\n Building wheel for numpy (setup.py): finished with status 'error'\n Running setup.py clean for numpy\nFailed to build numpy\nInstalling collected packages: numpy\n Running setup.py install for numpy: started\n Running setup.py install for numpy: finished with status 'error'\n,60.0,Command exited with non zero status code 1 ERROR Command errored out with exit status 1,"['command home solver venv bin python3 u c import sys setuptools tokenize sys argv 0 tmp pip install vul 9fm4 numpy setup py file tmp pip install vul 9fm4 numpy setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec bdist wheel d tmp pip wheel spg1ccfy python tag cp37', 'command home solver venv bin python3 u c import sys setuptools tokenize sys argv 0 tmp pip install vul 9fm4 numpy setup py file tmp pip install vul 9fm4 numpy setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec ...",['cwd tmp pip install vul 9fm4 numpy'],"['Complete output 306 lines', 'Running from numpy source directory', 'blas opt info', 'blas mkl info', 'customize UnixCCompiler', 'libraries mkl rt not found in home solver venv lib usr local lib64 usr local lib usr lib64 usr lib', 'NOT AVAILABLE', '', 'blis info', 'customize UnixCCompiler', 'libraries blis not found in home solver venv lib usr local lib64 usr local lib usr lib64 usr lib', 'NOT AVAILABLE', '', 'openblas info', 'customize UnixCCompiler', 'customize UnixCCompiler', 'libraries openblas not found in home solver venv lib usr local lib64 usr local lib usr lib64 usr lib', 'NOT AVAILABLE', '', 'atlas 3 10 blas threads info', 'Setting PTATLAS ATLAS', 'customize UnixCCompiler', 'libraries tatlas not found in home solve...","['ERROR Command errored out with exit status 1', 'ERROR Command errored out with exit status 1 home solver venv bin python3 u c import sys setuptools tokenize sys argv 0 tmp pip install vul 9fm4 numpy setup py file tmp pip install vul 9fm4 numpy setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec install record tmp pip record u0wqiumz install record txt single version externally managed compile install headers home solver venv include site python3 7 numpy Check the logs for full command output']","['raise RuntimeError Broken toolchain cannot link a simple C program', 'RuntimeError Broken toolchain cannot link a simple C program', 'raise RuntimeError Broken toolchain cannot link a simple C program', 'RuntimeError Broken toolchain cannot link a simple C program']",RuntimeError,['RuntimeError'],37
3,3,hachoir-core,1.3,https://pypi.org/simple,command_error,"/home/solver/venv/bin/python3 -m pip install --force-reinstall --no-cache-dir --no-deps hachoir-core==1.3 --index-url ""https://pypi.org/simple"" --trusted-host pypi.org","Command exited with non-zero status code (1): ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-b_cqxm9t/hachoir-core/setup.py'""'""'; __file__='""'""'/tmp/pip-install-b_cqxm9t/hachoir-core/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-b_cqxm9t/hachoir-core/pip-egg-info\n cwd: /tmp/pip-install-b_cqxm9t/hachoir-core/\n Complete output (6 lines):\n Traceback (most recent call last):\n File ""<string>"", line 1, in <module>\n File ""/tmp/pip-install-b_cqxm9t/hachoir-core/setup.py"", line 36\n...",1.0,"ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-b_cqxm9t/hachoir-core/setup.py'""'""'; __file__='""'""'/tmp/pip-install-b_cqxm9t/hachoir-core/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-b_cqxm9t/hachoir-core/pip-egg-info\n cwd: /tmp/pip-install-b_cqxm9t/hachoir-core/\n Complete output (6 lines):\n Traceback (most recent call last):\n File ""<string>"", line 1, in <module>\n File ""/tmp/pip-install-b_cqxm9t/hachoir-core/setup.py"", line 36\n print ""Sorry, you need Python 2.4 or g...",Collecting hachoir-core==1.3\n Downloading https://files.pythonhosted.org/packages/c1/cf/8a4dd95559ffedc1b455f5759f80150e182b02537784ca69f0fa50eb9125/hachoir-core-1.3.tar.gz (57kB)\n,60.0,Command exited with non zero status code 1 ERROR Command errored out with exit status 1,['command home solver venv bin python3 c import sys setuptools tokenize sys argv 0 tmp pip install b cqxm9t hachoir core setup py file tmp pip install b cqxm9t hachoir core setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec egg info egg base tmp pip install b cqxm9t hachoir core pip egg info'],['cwd tmp pip install b cqxm9t hachoir core'],"['Complete output 6 lines', 'Traceback most recent call last', 'File string line 1 in module', 'File tmp pip install b cqxm9t hachoir core setup py line 36', 'print Sorry you need Python 2 4 or greater to run install Hachoir', '', 'SyntaxError invalid syntax']",['ERROR Command errored out with exit status 1 python setup py egg info Check the logs for full command output'],['SyntaxError invalid syntax'],SyntaxError,['SyntaxError'],13
4,4,xattr,0.7.7,https://pypi.org/simple,command_error,"/home/solver/venv/bin/python3 -m pip install --force-reinstall --no-cache-dir --no-deps xattr==0.7.7 --index-url ""https://pypi.org/simple"" --trusted-host pypi.org","Command exited with non-zero status code (1): ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-x7gd4535/xattr/setup.py'""'""'; __file__='""'""'/tmp/pip-install-x7gd4535/xattr/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-x7gd4535/xattr/pip-egg-info\n cwd: /tmp/pip-install-x7gd4535/xattr/\n Complete output (102 lines):\n WARNING: The directory '/.cache/pip/http' or its parent directory is not owned by the current user and the cache has been disabled. Please check the permissions and owner of tha...",1.0,"ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-x7gd4535/xattr/setup.py'""'""'; __file__='""'""'/tmp/pip-install-x7gd4535/xattr/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-x7gd4535/xattr/pip-egg-info\n cwd: /tmp/pip-install-x7gd4535/xattr/\n Complete output (102 lines):\n WARNING: The directory '/.cache/pip/http' or its parent directory is not owned by the current user and the cache has been disabled. Please check the permissions and owner of that directory. If executing pip with sudo, you m...",Collecting xattr==0.7.7\n Downloading https://files.pythonhosted.org/packages/15/90/6f2fe2acf288fdf961d0b416c4e7885d1d9843dc6d6e1baabf77ee5966c8/xattr-0.7.7.tar.gz\n,60.0,Command exited with non zero status code 1 ERROR Command errored out with exit status 1,['command home solver venv bin python3 c import sys setuptools tokenize sys argv 0 tmp pip install x7gd4535 xattr setup py file tmp pip install x7gd4535 xattr setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec egg info egg base tmp pip install x7gd4535 xattr pip egg info'],['cwd tmp pip install x7gd4535 xattr'],"['Complete output 102 lines', 'WARNING The directory cache pip http or its parent directory is not owned by the current user and the cache has been disabled Please check the permissions and owner of that directory If executing pip with sudo you may want sudo s H flag', 'WARNING The directory cache pip http or its parent directory is not owned by the current user and the cache has been disabled Please check the permissions and owner of that directory If executing pip with sudo you may want sudo s H flag', 'Warning classifiers should be a list got type filter', 'running egg info', 'creating tmp pip install x7gd4535 xattr pip egg info xattr egg info', 'writing tmp pip install x7gd4535 xattr pip egg info xattr egg info PKG INFO', 'writing dependency links to tmp ...",['ERROR Command errored out with exit status 1 python setup py egg info Check the logs for full command output'],"['distutils errors DistutilsExecError command gcc failed with exit status 1', 'raise CompileError msg', 'distutils errors CompileError command gcc failed with exit status 1', 'raise VerificationError s s e class name e', 'cffi VerificationError CompileError command gcc failed with exit status 1']",cffi VerificationError CompileError,"['cffi', 'VerificationError', 'CompileError']",15
5,5,spacy,2.1.1.dev0,https://pypi.org/simple,command_error,"/home/solver/venv/bin/python3 -m pip install --force-reinstall --no-cache-dir --no-deps spacy==2.1.1.dev0 --index-url ""https://pypi.org/simple"" --trusted-host pypi.org",Command exited with non-zero status code (1): ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 /home/solver/venv/lib/python3.7/site-packages/pip/_vendor/pep517/_in_process.py build_wheel /tmp/tmprf6lv1dx\n cwd: /tmp/pip-install-43ioejjp/spacy\n Complete output (727 lines):\n running bdist_wheel\n running build\n running build_py\n creating build\n creating build/lib.linux-x86_64-3.7\n creating build/lib.linux-x86_64-3.7/spacy\n copying spacy/about.py -> build/lib.linux-x86_64-3.7/spacy\n copying spacy/util.py -> build/lib.linux-x86_64-3.7/spacy\n copying spacy/_ml.py -> build/lib.linux-x86_64-3.7/spacy\n copying spacy/glossary.py -> build/lib.linux-x86_64-3.7/spacy\n copying spacy/errors.py -> build/lib.linux-x86_64-3.7/spacy...,1.0,ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 /home/solver/venv/lib/python3.7/site-packages/pip/_vendor/pep517/_in_process.py build_wheel /tmp/tmprf6lv1dx\n cwd: /tmp/pip-install-43ioejjp/spacy\n Complete output (727 lines):\n running bdist_wheel\n running build\n running build_py\n creating build\n creating build/lib.linux-x86_64-3.7\n creating build/lib.linux-x86_64-3.7/spacy\n copying spacy/about.py -> build/lib.linux-x86_64-3.7/spacy\n copying spacy/util.py -> build/lib.linux-x86_64-3.7/spacy\n copying spacy/_ml.py -> build/lib.linux-x86_64-3.7/spacy\n copying spacy/glossary.py -> build/lib.linux-x86_64-3.7/spacy\n copying spacy/errors.py -> build/lib.linux-x86_64-3.7/spacy\n copying spacy/lemmatizer.py -> build/lib.l...,Collecting spacy==2.1.1.dev0\n Downloading https://files.pythonhosted.org/packages/e8/43/0d3de8b12a220ff28e034e121369b20bc96a463620cfe279c11113d038a1/spacy-2.1.1.dev0.tar.gz (27.7MB)\n Installing build dependencies: started\n Installing build dependencies: finished with status 'done'\n Getting requirements to build wheel: started\n Getting requirements to build wheel: finished with status 'done'\n Preparing wheel metadata: started\n Preparing wheel metadata: finished with status 'done'\nBuilding wheels for collected packages: spacy\n Building wheel for spacy (PEP 517): started\n Building wheel for spacy (PEP 517): finished with status 'error'\n Running setup.py clean for spacy\nFailed to build spacy\n,60.0,Command exited with non zero status code 1 ERROR Command errored out with exit status 1,['command home solver venv bin python3 home solver venv lib python3 7 site packages pip vendor pep517 in process py build wheel tmp tmprf6lv1dx'],['cwd tmp pip install 43ioejjp spacy'],"['Complete output 727 lines', 'running bdist wheel', 'running build', 'running build py', 'creating build', 'creating build lib linux x86 64 3 7', 'creating build lib linux x86 64 3 7 spacy', 'copying spacy about py build lib linux x86 64 3 7 spacy', 'copying spacy util py build lib linux x86 64 3 7 spacy', 'copying spacy ml py build lib linux x86 64 3 7 spacy', 'copying spacy glossary py build lib linux x86 64 3 7 spacy', 'copying spacy errors py build lib linux x86 64 3 7 spacy', 'copying spacy lemmatizer py build lib linux x86 64 3 7 spacy', 'copying spacy language py build lib linux x86 64 3 7 spacy', 'copying spacy init py build lib linux x86 64 3 7 spacy', 'copying spacy main py build lib linux x86 64 3 7 spacy', 'copying spacy scorer py bu...","['ERROR Failed building wheel for spacy', 'ERROR Could not build wheels for spacy which use PEP 517 and cannot be installed directly']",,ERROR Failed building wheel for spacy,"['ERROR', 'Failed', 'building', 'wheel']",7
6,6,fastavro,0.16.5,https://pypi.org/simple,command_error,"/home/solver/venv/bin/python3 -m pip install --force-reinstall --no-cache-dir --no-deps fastavro==0.16.5 --index-url ""https://pypi.org/simple"" --trusted-host pypi.org","Command exited with non-zero status code (1): ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-k1e0u355/fastavro/setup.py'""'""'; __file__='""'""'/tmp/pip-install-k1e0u355/fastavro/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' bdist_wheel -d /tmp/pip-wheel-lfjfh55c --python-tag cp37\n cwd: /tmp/pip-install-k1e0u355/fastavro/\n Complete output (26 lines):\n running bdist_wheel\n running build\n running build_py\n creating build\n creating build/lib.linux-x86_64-3.7\n creating build/lib.linux-x86_64-3.7/fastavro\n copying fastavro/__...",1.0,"ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-k1e0u355/fastavro/setup.py'""'""'; __file__='""'""'/tmp/pip-install-k1e0u355/fastavro/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' bdist_wheel -d /tmp/pip-wheel-lfjfh55c --python-tag cp37\n cwd: /tmp/pip-install-k1e0u355/fastavro/\n Complete output (26 lines):\n running bdist_wheel\n running build\n running build_py\n creating build\n creating build/lib.linux-x86_64-3.7\n creating build/lib.linux-x86_64-3.7/fastavro\n copying fastavro/__init__.py -> build/lib.linux-x86_64-3.7/fastav...",Collecting fastavro==0.16.5\n Downloading https://files.pythonhosted.org/packages/1b/e1/b961eeb318beea7fa8a44e6a0546569cf53bfbc3c934146e80458503255d/fastavro-0.16.5.tar.gz (408kB)\nBuilding wheels for collected packages: fastavro\n Building wheel for fastavro (setup.py): started\n Building wheel for fastavro (setup.py): finished with status 'error'\n Running setup.py clean for fastavro\nFailed to build fastavro\nInstalling collected packages: fastavro\n Running setup.py install for fastavro: started\n Running setup.py install for fastavro: finished with status 'error'\n,60.0,Command exited with non zero status code 1 ERROR Command errored out with exit status 1,"['command home solver venv bin python3 u c import sys setuptools tokenize sys argv 0 tmp pip install k1e0u355 fastavro setup py file tmp pip install k1e0u355 fastavro setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec bdist wheel d tmp pip wheel lfjfh55c python tag cp37', 'command home solver venv bin python3 u c import sys setuptools tokenize sys argv 0 tmp pip install k1e0u355 fastavro setup py file tmp pip install k1e0u355 fastavro setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file e...",['cwd tmp pip install k1e0u355 fastavro'],"['Complete output 26 lines', 'running install', 'running build', 'running build py', 'creating build', 'creating build lib linux x86 64 3 7', 'creating build lib linux x86 64 3 7 fastavro', 'copying fastavro init py build lib linux x86 64 3 7 fastavro', 'copying fastavro main py build lib linux x86 64 3 7 fastavro', 'copying fastavro reader common py build lib linux x86 64 3 7 fastavro', 'copying fastavro reader py py build lib linux x86 64 3 7 fastavro', 'copying fastavro schema common py build lib linux x86 64 3 7 fastavro', 'copying fastavro schema py py build lib linux x86 64 3 7 fastavro', 'copying fastavro writer common py build lib linux x86 64 3 7 fastavro', 'copying fastavro writer py py build lib linux x86 64 3 7 fastavro', 'copying fast...","['ERROR Failed building wheel for fastavro', 'ERROR Command errored out with exit status 1', 'ERROR Command errored out with exit status 1 home solver venv bin python3 u c import sys setuptools tokenize sys argv 0 tmp pip install k1e0u355 fastavro setup py file tmp pip install k1e0u355 fastavro setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec install record tmp pip record is9ivuct install record txt single version externally managed compile install headers home solver venv include site python3 7 fastavro Check the logs for full command output']",,ERROR Failed building wheel for fastavro,"['ERROR', 'Failed', 'building', 'wheel']",7
7,7,pyeclib,1.6.0,https://pypi.org/simple,command_error,"/home/solver/venv/bin/python3 -m pip install --force-reinstall --no-cache-dir --no-deps pyeclib==1.6.0 --index-url ""https://pypi.org/simple"" --trusted-host pypi.org","Command exited with non-zero status code (1): ERROR: Command errored out with exit status 255:\n command: /home/solver/venv/bin/python3 -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-t1z6_8hy/pyeclib/setup.py'""'""'; __file__='""'""'/tmp/pip-install-t1z6_8hy/pyeclib/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' bdist_wheel -d /tmp/pip-wheel-2r985f1t --python-tag cp37\n cwd: /tmp/pip-install-t1z6_8hy/pyeclib/\n Complete output (14 lines):\n /usr/lib64/python3.7/distutils/dist.py:274: UserWarning: Unknown distribution option: 'bugtrack_url'\n warnings.warn(msg)\n running bdist_wheel\n running build\n *****************...",1.0,"ERROR: Command errored out with exit status 255:\n command: /home/solver/venv/bin/python3 -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-t1z6_8hy/pyeclib/setup.py'""'""'; __file__='""'""'/tmp/pip-install-t1z6_8hy/pyeclib/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' bdist_wheel -d /tmp/pip-wheel-2r985f1t --python-tag cp37\n cwd: /tmp/pip-install-t1z6_8hy/pyeclib/\n Complete output (14 lines):\n /usr/lib64/python3.7/distutils/dist.py:274: UserWarning: Unknown distribution option: 'bugtrack_url'\n warnings.warn(msg)\n running bdist_wheel\n running build\n **************************************************************\...",Collecting pyeclib==1.6.0\n Downloading https://files.pythonhosted.org/packages/aa/d6/ca6bba5e66fc7a9810a995b17a3675492da2bec405806d8ac3db18cfd93b/pyeclib-1.6.0.tar.gz (6.7MB)\nBuilding wheels for collected packages: pyeclib\n Building wheel for pyeclib (setup.py): started\n Building wheel for pyeclib (setup.py): finished with status 'error'\n Running setup.py clean for pyeclib\nFailed to build pyeclib\nInstalling collected packages: pyeclib\n Running setup.py install for pyeclib: started\n Running setup.py install for pyeclib: finished with status 'error'\n,60.0,Command exited with non zero status code 1 ERROR Command errored out with exit status 255,"['command home solver venv bin python3 u c import sys setuptools tokenize sys argv 0 tmp pip install t1z6 8hy pyeclib setup py file tmp pip install t1z6 8hy pyeclib setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec bdist wheel d tmp pip wheel 2r985f1t python tag cp37', 'command home solver venv bin python3 u c import sys setuptools tokenize sys argv 0 tmp pip install t1z6 8hy pyeclib setup py file tmp pip install t1z6 8hy pyeclib setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec ...",['cwd tmp pip install t1z6 8hy pyeclib'],"['Complete output 14 lines', 'usr lib64 python3 7 distutils dist py 274 UserWarning Unknown distribution option bugtrack url', 'warnings warn msg', 'running install', 'running build', '', '', 'Can not locate liberasurecode so 1', '', 'Install', 'Manual https github com openstack liberasurecode', 'Fedora Red Hat variants liberasurecode devel', 'Debian Ubuntu variants liberasurecode dev', '', '']","['ERROR Failed building wheel for pyeclib', 'ERROR Command errored out with exit status 255', 'ERROR Command errored out with exit status 255 home solver venv bin python3 u c import sys setuptools tokenize sys argv 0 tmp pip install t1z6 8hy pyeclib setup py file tmp pip install t1z6 8hy pyeclib setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec install record tmp pip record mxrv56m1 install record txt single version externally managed compile install headers home solver venv include site python3 7 pyeclib Check the logs for full command output']",,ERROR Failed building wheel for pyeclib,"['ERROR', 'Failed', 'building', 'wheel']",7
8,8,happybase,0.8,https://pypi.org/simple,command_error,"/home/solver/venv/bin/python3 -m pip install --force-reinstall --no-cache-dir --no-deps happybase==0.8 --index-url ""https://pypi.org/simple"" --trusted-host pypi.org","Command exited with non-zero status code (1): ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-xr2ydtx4/happybase/setup.py'""'""'; __file__='""'""'/tmp/pip-install-xr2ydtx4/happybase/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-xr2ydtx4/happybase/pip-egg-info\n cwd: /tmp/pip-install-xr2ydtx4/happybase/\n Complete output (5 lines):\n Traceback (most recent call last):\n File ""<string>"", line 1, in <module>\n File ""/tmp/pip-install-xr2ydtx4/happybase/setup.py"", line 5, in <module>\n ...",1.0,"ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-xr2ydtx4/happybase/setup.py'""'""'; __file__='""'""'/tmp/pip-install-xr2ydtx4/happybase/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-xr2ydtx4/happybase/pip-egg-info\n cwd: /tmp/pip-install-xr2ydtx4/happybase/\n Complete output (5 lines):\n Traceback (most recent call last):\n File ""<string>"", line 1, in <module>\n File ""/tmp/pip-install-xr2ydtx4/happybase/setup.py"", line 5, in <module>\n execfile('happybase/_version.py')\n Na...",Collecting happybase==0.8\n Downloading https://files.pythonhosted.org/packages/50/53/a2bdeb32c9ecb300263a58da346945437559d183bf9b5e00948e8d0df965/happybase-0.8.tar.gz (60kB)\n,60.0,Command exited with non zero status code 1 ERROR Command errored out with exit status 1,['command home solver venv bin python3 c import sys setuptools tokenize sys argv 0 tmp pip install xr2ydtx4 happybase setup py file tmp pip install xr2ydtx4 happybase setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec egg info egg base tmp pip install xr2ydtx4 happybase pip egg info'],['cwd tmp pip install xr2ydtx4 happybase'],"['Complete output 5 lines', 'Traceback most recent call last', 'File string line 1 in module', 'File tmp pip install xr2ydtx4 happybase setup py line 5 in module', 'execfile happybase version py', 'NameError name execfile is not defined']",['ERROR Command errored out with exit status 1 python setup py egg info Check the logs for full command output'],['NameError name execfile is not defined'],NameError,['NameError'],34
9,9,mahotas,0.6.1,https://pypi.org/simple,command_error,"/home/solver/venv/bin/python3 -m pip install --force-reinstall --no-cache-dir --no-deps mahotas==0.6.1 --index-url ""https://pypi.org/simple"" --trusted-host pypi.org","Command exited with non-zero status code (1): ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-biqp6olf/mahotas/setup.py'""'""'; __file__='""'""'/tmp/pip-install-biqp6olf/mahotas/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-biqp6olf/mahotas/pip-egg-info\n cwd: /tmp/pip-install-biqp6olf/mahotas/\n Complete output (9 lines):\n Traceback (most recent call last):\n File ""<string>"", line 1, in <module>\n File ""/tmp/pip-install-biqp6olf/mahotas/setup.py"", line 30\n On linux, the pac...",1.0,"ERROR: Command errored out with exit status 1:\n command: /home/solver/venv/bin/python3 -c 'import sys, setuptools, tokenize; sys.argv[0] = '""'""'/tmp/pip-install-biqp6olf/mahotas/setup.py'""'""'; __file__='""'""'/tmp/pip-install-biqp6olf/mahotas/setup.py'""'""';f=getattr(tokenize, '""'""'open'""'""', open)(__file__);code=f.read().replace('""'""'\r\n'""'""', '""'""'\n'""'""');f.close();exec(compile(code, __file__, '""'""'exec'""'""'))' egg_info --egg-base /tmp/pip-install-biqp6olf/mahotas/pip-egg-info\n cwd: /tmp/pip-install-biqp6olf/mahotas/\n Complete output (9 lines):\n Traceback (most recent call last):\n File ""<string>"", line 1, in <module>\n File ""/tmp/pip-install-biqp6olf/mahotas/setup.py"", line 30\n On linux, the package is often called python-setuptools'''\n ...",Collecting mahotas==0.6.1\n Downloading https://files.pythonhosted.org/packages/e5/e6/0a514f6385938bfaa980fa01f20f25d36482cf815020ce89dadafdac1221/mahotas-0.6.1.tar.gz (55kB)\n,60.0,Command exited with non zero status code 1 ERROR Command errored out with exit status 1,['command home solver venv bin python3 c import sys setuptools tokenize sys argv 0 tmp pip install biqp6olf mahotas setup py file tmp pip install biqp6olf mahotas setup py f getattr tokenize open open file code f read replace r n n f close exec compile code file exec egg info egg base tmp pip install biqp6olf mahotas pip egg info'],['cwd tmp pip install biqp6olf mahotas'],"['Complete output 9 lines', 'Traceback most recent call last', 'File string line 1 in module', 'File tmp pip install biqp6olf mahotas setup py line 30', 'On linux the package is often called python setuptools', '', 'SyntaxError Missing parentheses in call to print Did you mean print', 'setuptools not found', '', 'On linux the package is often called python setuptools']",['ERROR Command errored out with exit status 1 python setup py egg info Check the logs for full command output'],['SyntaxError Missing parentheses in call to print Did you mean print'],SyntaxError,['SyntaxError'],13


In [17]:
def clustered_output(solver_total_errors_df, mode='INDEX'):
    groups = {}
    for key, value in solver_total_errors_df.groupby(['cluster']):
        if mode == 'ALL':
            groups[str(key)] = value.to_dict(orient='records')
        elif mode == 'INDEX':
            groups[str(key)] = value.index.values.tolist()
        elif mode == 'TARGET':
            groups[str(key)] = value[self.target].values.tolist()
        elif mode == 'CLEANED':
            groups[str(key)] = value['clean_clustering_data'].values.tolist()
    return groups

In [18]:
def find_matching_blocks(strings):
    curr = strings[0]
    if len(strings) == 1:
        return curr
    else:
        cnt = 1
        for i in range(cnt, len(strings)):
            matches = difflib.SequenceMatcher(None, curr, strings[i])
            common = []
            for match in matches.get_matching_blocks():
                common.append(curr[match.a:match.a + match.size])
            #curr = ''.join(common)
            curr = ','.join(str(v) for v in common)
            cnt = cnt + 1
            if cnt == len(strings):
                break
        if curr == '':
            'NO COMMON PATTERNS HAVE BEEN FOUND'
        return curr

def get_similarity(rows):
    s = []
    for i in range(0, len(rows)):
        s.append(difflib.SequenceMatcher(None, rows[0], rows[i]).ratio() * 100)
    return s

In [19]:
STATISTICS = ["cluster_name", "cluster_size", "pattern",
              "mean_length", "mean_similarity", "std_length", "std_similarity"]

def statistics(solver_total_errors_df, output_mode='frame'):
    """
    Returns dictionary with statistic for all clusters
    "cluster_name" - name of a cluster
    "cluster_size" = number of log messages in cluster
    "pattern" - all common substrings in messages in the cluster
    "vocab" - vocabulary of all messages within the cluster (without punctuation and stop words)
    "vocab_length" - the length of vocabulary
    "mean_length" - average length of log messages in cluster
    "std_length" - standard deviation of length of log messages in cluster
    "mean_similarity" - average similarity of log messages in cluster
    (calculated as the levenshtein distances between the 1st and all other log messages)
    "std_similarity" - standard deviation of similarity of log messages in cluster
    :param clustered_df:
    :param output_mode: frame | dict
    :return:
    """
    clusters = []
    clustered_df = clustered_output(solver_total_errors_df, mode='CLEANED')
    for item in clustered_df:
        row = clustered_df[item]
        matcher = find_matching_blocks(row)
        lengths = [len(s) for s in row]
        similarity = get_similarity(row)
        #tokens = Tokens(row, self.tokenizer)
        #tokens.process()
        # vocab = tokens.get_vocabulary()
        # vocab_length = len(vocab)
        clusters.append([item,
                         len(row),
                         matcher,
                         # vocab,
                         # vocab_length,
                         np.mean(lengths),
                         np.mean(similarity),
                         np.std(lengths) if len(row) > 1 else 0,
                         np.std(similarity)])
    df = pd.DataFrame(clusters, columns=STATISTICS).round(2).sort_values(by='cluster_size', ascending=False)
    if output_mode == 'frame':
        return df
    else:
        return df.to_dict(orient='records')

In [20]:
stat = statistics(solver_total_errors_df, output_mode='frame')

In [21]:
stat_df = pd.DataFrame.from_dict(stat)
len(stat_df)

49

In [22]:
stat_df.sort_values(by='cluster_size', ascending=False)

Unnamed: 0,cluster_name,cluster_size,pattern,mean_length,mean_similarity,std_length,std_similarity
1,1,365,"['ERROR', 'No', 'matching', 'distribution', 'found'],",52.0,100.0,0.0,0.0
13,13,317,"['SyntaxError'],",15.0,100.0,0.0,0.0
7,7,283,"['ERROR', 'Failed', 'building', 'wheel'],",40.0,100.0,0.0,0.0
27,27,178,"['ModuleNotFoundError'],",23.0,100.0,0.0,0.0
28,28,143,"['FileNotFoundError'],",21.0,100.0,0.0,0.0
34,34,69,"['NameError'],",13.0,100.0,0.0,0.0
36,36,40,"['ERROR', 'Command', 'errored', 'out', 'with', 'exit', 'status', 'python', 'setup', 'py', 'egg', 'info', 'Check', 'the', 'logs'],",128.0,100.0,0.0,0.0
9,9,33,"['AttributeError'],",18.0,100.0,0.0,0.0
23,23,30,"['TypeError'],",13.0,100.0,0.0,0.0
3,3,23,"['urllib', 'error', 'HTTPError', 'HTTP', 'Error'],",49.0,100.0,0.0,0.0


In [23]:
def in_cluster(all_cluster_labels, cluster_label):
    results = []
    for idx, l in enumerate(all_cluster_labels):
        if l == cluster_label:
            results.append(solver_total_errors_df['clean_clustering_data'].values[idx])
    return results

In [24]:
in_cluster(solver_total_errors_df['cluster'], 0)

["['Command', 'exited', 'with', 'non', 'zero', 'status', 'code', 'ERROR', 'Files', 'directories', 'not', 'found', 'in', 'tmp', 'pip', 'install', 'ro', 'td', 'pip', 'egg', 'info']",
 "['Command', 'exited', 'with', 'non', 'zero', 'status', 'code', 'ERROR', 'Files', 'directories', 'not', 'found', 'in', 'tmp', 'pip', 'install', 'mi', 'bv', 'pip', 'egg', 'info']"]

In [25]:
in_cluster(solver_total_errors_df['cluster'], 1)

["['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', 'distribution', 'found']",
 "['ERROR', 'No', 'matching', '

In [26]:
in_cluster(solver_total_errors_df['cluster'], 2)

["['error', 'HTTP', 'Error']",
 "['error', 'HTTP', 'Error']",
 "['error', 'HTTP', 'Error']"]

In [27]:
in_cluster(solver_total_errors_df['cluster'], 3)

["['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPError', 'HTTP', 'Error']",
 "['urllib', 'error', 'HTTPE

In [28]:
in_cluster(solver_total_errors_df['cluster'], 4)

["['Error', 'config', 'executable', 'not', 'found']",
 "['Error', 'config', 'executable', 'not', 'found']",
 "['Error', 'config', 'executable', 'not', 'found']"]

In [31]:
in_cluster(solver_total_errors_df['cluster'], 5)

["['ERROR', 'CUDA', 'could', 'not', 'be', 'found', 'on', 'your', 'system']",
 "['ERROR', 'CUDA', 'could', 'not', 'be', 'found', 'on', 'your', 'system']",
 "['ERROR', 'CUDA', 'could', 'not', 'be', 'found', 'on', 'your', 'system']"]