Training the model if you're using the clustering function in a different context.

### 0. Installing the requirements

In [None]:
import pandas as pd
import os
import numpy as np
from sentence_transformers import SentenceTransformer
from bertopic import BERTopic
from umap import UMAP
from sklearn.cluster import KMeans
#from sklearn import svc
from hdbscan import HDBSCAN
from sklearn.feature_extraction.text import CountVectorizer

### 1. Data Preparation
Go to ServiceNow to export your training data in csv format.

In [None]:
file_path = 'incident_230630_240111.csv' #replace the file path with the file that you will be using
data = pd.read_csv(file_path, encoding='latin1').apply(lambda x: x.astype(str)).rename(columns={'problem_id.u_component': 'problem_component', 'inc_short_description': 'short_description'})
description = data['short_description'].tolist()

In [None]:
vectorizer_model = CountVectorizer(stop_words="english", ngram_range=(2,3))
umap_model = UMAP(n_neighbors=15, n_components=5, min_dist=0.0, metric='cosine')
cluster_model = KMeans(n_clusters=20) 

In [None]:
from sentence_transformers import SentenceTransformer
 
model = SentenceTransformer("BAAI/bge-small-en-v1.5")

In [None]:
# DEFINING THE MODEL

topic_model = BERTopic(min_topic_size=2, language="english", calculate_probabilities=True,
                       embedding_model="BAAI/bge-small-en-v1.5",
                       umap_model=umap_model,
                       hdbscan_model=cluster_model,
                       vectorizer_model=vectorizer_model,
                        top_n_words=5,
                        verbose=True)

In [None]:
import requests
requests.get('https://www.huggingface.co')

In [None]:
# Train model
topics = topic_model.fit_transform(description)
topics_info = topic_model.get_topic_info()

### 2. Saving the Model as a Pickle File

In [None]:
import pickle

# Save model in pickle file

filename = open("trained_model.pkl", 'wb') # rename the model based on the training context
pickle.dump(topic_model, filename)
filename.close()

### 4. Testing the Saved Model Pickle File

In [None]:
input_file_path = 'incident_sla-051524-1850.csv' # replace with file for testing


model_file = open('trained_model.pkl', 'rb')

loaded_model = pickle.load(model_file)
model_file.close()

input_data = pd.read_csv(input_file_path, encoding='latin1').apply(lambda x: x.astype(str)).rename(columns={'problem_id.u_component': 'problem_component', 'inc_short_description': 'short_description'})

docs_processing = input_data['short_description']
topics, probs = loaded_model.fit_transform(docs_processing)

result = loaded_model.get_topic_info()

result