In [None]:
#Data management
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split
import cv2
import re
import string
import os
from tqdm import tqdm

#Model management
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,models
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
import torch
import joblib

#data visualize
import matplotlib.pyplot as plt
import seaborn as sns

#image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image

# Preprocessing Test Data

In [None]:
def line_break(image , show_graph = False) :
    projection = [0]
    (H,W) = image.shape
    found = False
    for i in range(H) :
        found = False
        for j in range(W) :
            if image[i][j] == 0 :
                found = True
                projection.append(1)
                break
        if found == False :
            projection.append(0)
    projection.append(0)
    height_for_cut = []
    for i in range(len(projection)) :
        if projection[i] == 0 :
            height_for_cut.append(i)
    final_cut = []
    if show_graph :
        plt.plot(projection)
        plt.show()
    for i in range(1,len(height_for_cut)) :
        if(height_for_cut[i] != height_for_cut[i-1]+1) :
            final_cut.append((height_for_cut[i-1],height_for_cut[i]))
    if final_cut == [] :
        return [image]
    payload= []
    for (start,stop) in final_cut :
        im1 = image.copy()
        im1=im1[start:stop,:]
        if im1.shape[0]<8:
            continue
        if im1.shape[0]>50 and im1.shape[1]>200:
            payload.append(im1[:25,:200])
        elif im1.shape[0]>50:
            payload.append(im1[:25,:])
        elif im1.shape[1]>200:
            payload.append(im1[:,:200])
        else:
            payload.append(im1)
    return payload

In [None]:
def padding(gray_im,h = 25,w = 200):
    y = gray_im.shape[0]
    x = gray_im.shape[1]
    if y>h:
        gray_im = cv2.resize(gray_im,(x,h))
    if x>w:
        gray_im = cv2.resize(gray_im,(w,y))
    y = gray_im.shape[0]
    x = gray_im.shape[1]
    top = h//2 - y//2
    bottom = h - (y+top)
    left = w//2 - x//2
    right = w - (x+left)
    im_resize = cv2.copyMakeBorder(gray_im, top, bottom, left, right, cv2.BORDER_CONSTANT, value = 255)
    return im_resize

In [None]:
try:
    os.mkdir("crop_images")
except:
    pass

In [None]:
def get_img(idim,floderpath):
    imtarget = floderpath + idim
    impath = imtarget + "/image.png"
    dfpath = imtarget + "/test.json"
    
    df = pd.read_json(dfpath)
    
    im = cv2.imread(impath)
    gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY )
    gray = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,11,2)
    
    num = df.shape[0]
    
    image_list = []
    file_list = []
    id_list = []
    
    for i in range(num):
        x  = df["x"][i]
        y  = df["y"][i]
        width  = df["width"][i]
        height  = df["height"][i]
        
        gray_crop = np.array(gray[int(y)-1:int(y+height)+1,int(x)-1:int(x+width)+1])
        list_break = line_break(gray_crop)
        if len(list_break)>=2:
            break_img = list_break[1]
        if len(list_break) == 0:
            break_img = gray_crop
        else:
            break_img = list_break[0]
        break_img = padding(break_img,h = 25,w = 200)
        imagename = idim +"_" + df["id"][i]
        cv2.imwrite(f"./crop_images/{imagename}.png",break_img)
        
        image_list.append(f"{imagename}")
        file_list.append(f"{imagename}.png")
        id_list.append(df["id"][i])
    
    dfn = pd.DataFrame(image_list,columns =['id'])
    dfn["file"] = pd.Series(file_list)
    dfn["id"] = pd.Series(id_list)
    
    return dfn

In [None]:
floderpath = "../input/super-ai-engineer-2021-font-recognition/test/"
filelist = os.listdir(floderpath)
df1 = get_img(filelist[0],floderpath)
for i in tqdm(range(len(filelist))):
    idim = filelist[i]
    if idim != filelist[0]:
        df3 = get_img(idim,floderpath)
        df1 = df1.append(df3,ignore_index=True)

In [None]:
floderpath = "../input/super-ai-engineer-2021-font-recognition/final_test/"
filelist = os.listdir(floderpath)
for i in tqdm(range(len(filelist))):
    idim = filelist[i]
    df3 = get_img(idim,floderpath)
    df1 = df1.append(df3,ignore_index=True)

In [None]:
df1.to_csv("df.csv",index=False)

In [None]:
floderpath = "./crop_images"
filelist = os.listdir(floderpath)
df1.shape[0]
len(filelist)

# True test

In [None]:
#dataset = "../input/w1n1p3-test1"
dataset = "."
#df = pd.read_csv(dataset+"/df.csv")
df = df1

img_folder = dataset + "/crop_images"
target_size = (25,200)
print(df.shape[0])
df.head()

In [None]:
datagen = ImageDataGenerator(rescale=1./255.)

test_generator=datagen.flow_from_dataframe(
dataframe=df,
directory=img_folder,
x_col="file",
y_col=None,
batch_size=32,
shuffle=False,
class_mode=None,
target_size=target_size)

In [None]:
 def get_pred(path_y_col,test_generator):
    #get pred
    model = keras.models.load_model(path_y_col)
    pred = model.predict(test_generator).argmax(axis=1)
    return pred

def decode_csv(path_y_col,pred):
    #decode if you have decode.csv
    decode = pd.read_csv(path_y_col+"decode.csv")
    decode_lamb = lambda x: decode[str(x)][0]
    decode_func = np.vectorize(decode_lamb)
    pred_decode = decode_func(pred)
    return pred_decode

def decode_dict(label_index,label_true,pred):
    #decode if you dont have decode.csv
    decode = dict(zip(label_index, label_true))
    decode_lamb = lambda x: decode[x]
    decode_func = np.vectorize(decode_lamb)
    pred_decode = decode_func(pred)
    return pred_decode

In [None]:
path_fontFamily = "../input/w1n2p4-oneline-fontfamily/model/"
path_fontSize = "../input/w1n2p7-oneline-size/model_weight/"
path_fontStyle = "../input/w1n2p5-oneline-style/model/"
path_fontWeight = "../input/w1n2p6-oneline-weight/model_weight/"

In [None]:
pred_fontFamily = get_pred(path_fontFamily,test_generator)
pred_fontSize = get_pred(path_fontSize,test_generator)
pred_fontStyle = get_pred(path_fontStyle,test_generator)
pred_fontWeight = get_pred(path_fontWeight,test_generator)

# Decode zone

In [None]:
path_fontFamily = "../input/w1n2p4-oneline-fontfamily/"
path_fontSize = "../input/w1n2p7-oneline-size/"
path_fontStyle = "../input/w1n2p5-oneline-style/"
path_fontWeight = "../input/w1n2p6-oneline-weight/"
#if have decode.csv decode by this method
de_fontFamily = decode_csv(path_fontFamily,pred_fontFamily)

de_fontStyle = decode_csv(path_fontStyle,pred_fontStyle)
de_fontWeight = decode_csv(path_fontWeight,pred_fontWeight)

In [None]:
#Pred FontSize
size_index = [0, 1, 2, 3, 4, 5, 6]
size_true = ['12px', '14px', '16px', '18px', '20px', '22px', '24px']
de_fontSize = decode_dict(size_index,size_true,pred_fontSize)

In [None]:
#Pred filename
#FontFamily = ['ANGSA', 'CORDIA', 'DM Shining Star Regular', 'FC Knomphing Regular', 'fonttintin', 'Kunlasatri', 'TH Chakra Petch', 'TH Charm of AU', 'TH Mali Grade6', 'THSarabun']
FontFamily = ['Angsana_New', 'Cordia_New', 'DM_Shining_Star_Regular', 'FC_Knomphing_Regular', 'fonttintin', 'Kunlasatri', 'TH_Chakra_Petch', 'TH_Charm_of_AU', 'TH_Mali_Grade6', 'TH_Sarabun']
filename = ['ANGSA.ttf', 'CORDIA.ttf', 'DM Shining Star Regular.ttf', 'FC Knomphing Regular.ttf', 'fonttintin.ttf', 'Kunlasatri.ttf', 'TH Chakra Petch.ttf', 'TH Charm of AU.ttf', 'TH Mali Grade6.ttf', 'THSarabun.ttf']
de_filename = decode_dict(FontFamily,filename,de_fontFamily)

# submission

In [None]:
submission = pd.read_csv("../input/super-ai-engineer-2021-font-recognition/sample_submission.csv",index_col ="Id")
for ind in tqdm(range(df.shape[0])):
    id_df = df["id"][ind]
    submission["Predicted"][id_df+"_name"] = de_fontFamily[ind]
    submission["Predicted"][id_df+"_file"] = de_filename[ind]
    submission["Predicted"][id_df+"_size"] = de_fontSize[ind]
    submission["Predicted"][id_df+"_style-weight"] = str(de_fontStyle[ind])+"-"+str(de_fontWeight[ind])
submission.dropna().tail(20)

In [None]:
submission.dropna().shape[0]

In [None]:
submission.to_csv("submission.csv")