In [0]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score

documents = [
    "Visual Studio 2019 crashing when click RMB on rule in Analyzers' dependencies",#https://github.com/dotnet/roslyn/issues/40720
    "Avoid crash on concat on structs with ToString member", #https://github.com/dotnet/roslyn/pull/38860/commits
    "Enum implicit cast to string fails when element is named ToString", #https://github.com/dotnet/roslyn/issues/40256
    "Enum with ToString member crashes in string concatenation", #https://github.com/dotnet/roslyn/issues/38858   
    "Crash on right click a Analyze rule in Solution-Explorer", #https://github.com/dotnet/roslyn/issues/36304
    "Handle lazy loading of analyzer command handlers", #https://github.com/dotnet/roslyn/pull/36740
]

vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(documents)

true_k = 2
model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1)
model.fit(X)

print("Top terms per cluster:")
order_centroids = model.cluster_centers_.argsort()[:, ::-1]
terms = vectorizer.get_feature_names()
for i in range(true_k):
    print("Cluster %d:" % i),
    for ind in order_centroids[i, :10]:
        print(' %s' % terms[ind]),
    print

print("\n")
print("Prediction")

Y = vectorizer.transform(["Avoid crash on concat on structs with ToString member"])
prediction = model.predict(Y)
print(prediction)

Y = vectorizer.transform(["Visual Studio 2019 crashing when click RMB on rule in Analyzers' dependencies"])
prediction = model.predict(Y)
print(prediction)
 

Top terms per cluster:
Cluster 0:
 rule
 click
 handle
 analyzer
 command
 loading
 lazy
 handlers
 explorer
 analyze
Cluster 1:
 tostring
 member
 string
 enum
 crashes
 concatenation
 concat
 structs
 avoid
 element


Prediction
[1]
[0]
