In [1]:
import os
import random

import cv2
import numpy as np

from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN, HDBSCAN
from sklearn.mixture import GaussianMixture

In [2]:
def generate_test_dataset(path, count, size=256, x=40, y=220, rand_RGB_value=0, rand_xy_value=5, font_scale=9, font_thickness=45):
    """function to generate test dataset

    Args:
        path (str): folder path to generate images in
        count (int): number of images
        size (int, optional): size of image. Defaults to 256.
        x (int, optional): x coordinate of character. Defaults to 40.
        y (int, optional): y coordinate of character. Defaults to 220.
        rand_RGB_value (int, optional): random RGB shift. Defaults to 0.
        rand_xy_value (int, optional): random coordinate shift. Defaults to 5.
        font_scale (int, optional): font scale. Defaults to 9.
        font_thickness (int, optional): font thickness. Defaults to 45.

    """
    # generates test image
    def generate_image(character_to_put_on):
        """function to generate test dataset images

        Args:
            character_to_put_on (str): character to write on image

        Returns:
            numpy.ndarray: prepared image
        """
        bg = (220 + random.randint(-rand_RGB_value, rand_RGB_value),
            245 + random.randint(-rand_RGB_value, rand_RGB_value),
            245 + random.randint(-rand_RGB_value, rand_RGB_value))
        background = np.full((size, size, 3), bg, dtype=np.uint8)
        
        # put given character text over background
        background = cv2.putText(background, character_to_put_on,
                                (x + random.randint(-rand_xy_value, rand_xy_value),
                                y + random.randint(-rand_xy_value, rand_xy_value)), 
                                cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0,0,0), font_thickness, cv2.LINE_AA) 

        return background

    os.makedirs(path, exist_ok=True)
    for i in range(count):
        character = random.choice("0123456789")
        image = generate_image(character)
        cv2.imwrite(os.path.join(path, str(i) + character + ".png"), image)

In [3]:
# generate_test_dataset("heredenene", 1000)

In [5]:
from DL_Datasets import *
from DL_Models import *
from DL_ModelTrainer import ModelTrainer

dataset = ImageDataset("heredenene")
model = PowerOf2s256andAbove()
mt = ModelTrainer(num_of_epochs=100, lr=0.001,
                  batch_size=16, loss_type="mse",
                  dataset=dataset, model=model,
                  ckpt_path="heredenene_PowerOf2s256andAbove_mse_05:16:19:01:53/min_loss:0.06544473022222519_epoch:19.pth")

features = mt()
paths = list(features.keys())
reps = np.array(list(features.values()))

In [7]:
# AUTOMATIC NUMBER OF CLUSTER FINDING WILL BE DONE WITH ELBOW METHOD
# AUTOMATIC NUMBER OF CLUSTER FINDING WILL BE DONE WITH ELBOW METHOD
# AUTOMATIC NUMBER OF CLUSTER FINDING WILL BE DONE WITH ELBOW METHOD
# AUTOMATIC NUMBER OF CLUSTER FINDING WILL BE DONE WITH ELBOW METHOD
def get_labels(method="", number_of_clusters=None, max_iter=200, DBSCAN_eps=0.5, DBSCAN_min_samples=5, HDBSCAN_min_cluster_size=5, HDBSCAN_max_cluster_size=None, verbose=0):
    if method == "":
        kmeans = KMeans(n_clusters=number_of_clusters, max_iter=max_iter, verbose=verbose)
        labels = kmeans.fit_predict(reps)
    elif method == "":
        agg_clustering = AgglomerativeClustering(n_clusters=number_of_clusters)
        labels = agg_clustering.fit_predict(reps)
    elif method == "":
        dbscan = DBSCAN(eps=DBSCAN_eps, min_samples=DBSCAN_min_samples)
        labels = dbscan.fit_predict(reps)
    elif method == "":
        gmm = GaussianMixture(n_components=number_of_clusters, max_iter=max_iter, verbose=verbose)
        labels = gmm.fit_predict(reps)
    elif method == "":
        hdb = HDBSCAN(min_cluster_size=HDBSCAN_min_cluster_size, max_cluster_size=HDBSCAN_max_cluster_size)
        labels = hdb.fit_predict(reps)
    else:
        pass

    return labels