# Modify database.db with new clusters
Meant to be used on a separate machine

In [None]:
from server import models
from server.controllers.utils import ai, database, vstore

from flask import session
import faiss

from os import environ
from dotenv import load_dotenv

In [None]:
load_dotenv()

In [None]:
# Initialize the Flask app
app = models.create_app()

In [None]:
def cluster_all_chats(k=5):
    """
    Clusters all chats into experiences. Using k-means clustering.
    Will rewrite the entire experience table as well as reassign the experience field in the chat table.
    @param k: The number of clusters to create
    """
    models.Experiences.query.delete() # clear the table
    models.db.session.commit() # experience field in chat table will be set to null

    chats = models.Chats.query.filter_by(flag=False).all() # get all the chats, potentially not a good idea, consider just using id
    if len(chats) == 0:
        return # no chats to cluster
    chats_vstore = vstore.create_chats_vectorstore(chats, ai.llm_embedder)

    embeddings = [ai.llm_embedder.embed_query(chat.summary) for chat in chats] # RAM intensive, also may waste tokens

    # cluster using faiss.KMeans
    kmeans = faiss.Kmeans(len(embeddings[0]), k, niter=20, verbose=True)
    kmeans.train(embeddings)

    # assign the closest chat to the centroid as the centroid
    for i in range(k):
        closest_chat_doc, _ = vstore.get_k_nearest_by_vector(kmeans.centroids[i], chats_vstore, 1)[0]
        # assign closest chat as a experience, maybe give it a name
        closest_chat = models.Chats.query.get(closest_chat_doc.page_content)
        models.db.session.add(models.Experiences(name=closest_chat.summary, id=closest_chat.id))
        models.db.session.commit()

    # assign the chat to the closest experience
    experiences = models.Experiences.query.all()
    exp_vstore = vstore.create_exp_vectorstore(experiences, ai.llm_embedder)
    for chat in chats:
        closest_exp_docs, _ = vstore.get_k_nearest_by_vector(ai.llm_embedder.embed_query(chat.summary), exp_vstore, 1)[0]
        closest_exp = models.Experiences.query.get(closest_exp_docs.page_content)
        chat.experience = closest_exp.id
        models.db.session.commit()