In [None]:
%matplotlib inline

from pathlib import Path
import shutil

from matplotlib import pyplot as plt

import numpy as np
import pandas as pd

from sklearn import cluster

from tqdm.auto import tqdm

from PIL import Image, ImageOps, ImageEnhance

In [None]:
%cd ..

In [None]:
from myanmar_digits import load_data

X, y = load_data(return_X_y=True)

In [None]:
X.shape, y.shape

In [None]:
def load_gray_image(path:str, return_type:str="numpy"):
    assert return_type in ["numpy", "pillow", "opencv"]
    with open(path, "rb") as f:
        im_sample = Image.open(f)
        im_sample_gray = ImageOps.grayscale(im_sample)
        if return_type == "pillow":
            return im_sample_gray
        elif return_type == "numpy":
            np_sample_gray = np.array(im_sample_gray)
            return np_sample_gray
        elif return_type == "opencv":
            raise NotImplementedError("Not supported yet.")

img = load_gray_image("../raw_data/knn_sample_grid.1.jpeg")
plt.imshow(img, cmap="gray")

In [None]:
def get_temp_image(img:np.array, copy:bool):
    if copy:
        _img = img.copy()
    else:
        _img = img
    return _img

def normalize(img:np.array, copy:bool=True):
    _img = get_temp_image(img, copy)
    _img = 1.0 - (_img - _img.min()) / (_img.max() - _img.min())
    _img = (255 * _img).astype(np.uint8)
    return _img

MAX_COLOR = 255

def enhance(img:np.array, thr:int=64, copy:bool=True):
    _img = get_temp_image(img, copy)
    _img[_img < thr] = 0
    _img[_img >= MAX_COLOR - thr] = MAX_COLOR
    return _img

img_norm = normalize(img)
img_enh = enhance(img_norm, thr=100)
plt.imshow(img_enh, cmap="gray")

In [None]:
enhancer = ImageEnhance.Contrast(Image.fromarray(img_enh))
img_enh_ = enhancer.enhance(1.50)
plt.imshow(img_enh_, cmap="gray")

In [None]:
df = pd.read_csv("../pseudo_label.csv", index_col=0)
df.head

In [None]:
kmean = cluster.KMeans(n_clusters=10, init=np.linspace(0.0, 1.0, 10).reshape((10, 1)), max_iter=500, random_state=42)
df["p_label"] = kmean.fit_predict(df[["cx"]].values)

In [None]:
for l in range(0, 10):
    print ("working on {}".format(l))
    fnames = df.fid[df.p_label == l]
    dp = Path("..").joinpath("data_labelled").joinpath("c{}".format(l))
    print ("dp is {}".format(dp))
    dp.mkdir(parents=True, exist_ok=True)
    print ("num files : {}".format(len(fnames)))
    for f in fnames:
        sp = Path("..").joinpath("data").joinpath(f)
        shutil.copy(str(sp), str(dp))

In [None]:
df.fid[df.p_label == 0]