# Note:
This notebook is from a collaboration between 
### **Sirawich Smistomboon (SUPERAI2-4849 Kiddee)** and
### **Nisit Smitsomboon (SUPERAI2-2134 The Scamper)**.

Share with Super AI Engineer member for font recognition hackathon

You only need to modify the `PredictImage(id,image)` function with your model result and then submit your result to kaggle.

In [None]:
!pip install typing-extensions

In [None]:
import os
import numpy as np
from typing_extensions import TypedDict
from typing import Generator, List, NamedTuple, Iterable
import json
import cv2
import pandas as pd

In [None]:
class DataAndLabel(NamedTuple):
    Mat : np.ndarray
    Text : str
    FontFamily : str
    FontSize : int
    IsBold : bool
    IsItalic : bool
def readSetWithLabel(setpath = "./train/set1") -> Generator[DataAndLabel, None, None]:
    class RectFormat(TypedDict):
        x: float
        y: float
        width: float
        height: float
    class StyleFormat(TypedDict):
        fontSize: str
        fontStyle: str
        fontWeight: str
        fontVarient: str
    class LoadFormat(TypedDict):
        text : str
        rect : RectFormat
        style : StyleFormat
    for mainpath in os.listdir(setpath):
        mat = cv2.imread(f"{setpath}/{mainpath}/image.png")
        with open(f"{setpath}/{mainpath}/gt_text.json", encoding="utf-8") as f:
            jSArray : List[LoadFormat] = json.load(f)
        for JSON in jSArray:
            text = JSON["text"]
            #Text = text
            rect = JSON["rect"]
            x = rect["x"]
            y = rect["y"]
            width = rect["width"]
            height = rect["height"]
            Mat = mat[round(y):round(y+height),round(x):round(x+width),:]
            style = JSON["style"]
            fontFamily = style["fontFamily"]
            FontSize = int(style["fontSize"][:-2])
            IsItalic = style["fontStyle"] == "italic"
            IsBold = style["fontWeight"] == "bold"
            yield DataAndLabel(Mat, text, fontFamily, FontSize, IsBold, IsItalic)
        del mat
def readSetWithoutLabel(setpath = "./train/set2") -> Generator[np.ndarray, None, None]:
    class RectFormat(TypedDict):
        x: float
        y: float
        width: float
        height: float
    class LoadFormat(TypedDict):
        id : str
        rect : RectFormat
    for mainpath in os.listdir(setpath):
        mat = cv2.imread(f"{setpath}/{mainpath}/image.png")
        with open(f"{setpath}/{mainpath}/gt_text.json", encoding="utf-8") as f:
            jSArray : List[LoadFormat] = json.load(f)
        for JSON in jSArray:
            rect = JSON["rect"]
            x = rect["x"]
            y = rect["y"]
            width = rect["width"]
            height = rect["height"]
            Mat = mat[round(y):round(y+height),round(x):round(x+width),:]
            yield Mat
        del mat
class DataTest(NamedTuple):
    Mat : np.ndarray
    Id : str
def readTestSet(setpath = "./test") -> Generator[DataTest, None, None]:
    class RectFormat(TypedDict):
        x: float
        y: float
        width: float
        height: float
    class LoadFormat(TypedDict):
        id : str
        x : float
        y : float
        width : float
        height : float
    for mainpath in os.listdir(setpath):
        mat = cv2.imread(f"{setpath}/{mainpath}/image.png")
        with open(f"{setpath}/{mainpath}/test.json", encoding="utf-8") as f:
            jSArray : List[LoadFormat] = json.load(f)
        for JSON in jSArray:
            text = JSON["id"]
            x = JSON["x"]
            y = JSON["y"]
            width = JSON["width"]
            height = JSON["height"]
            Mat = mat[round(y):round(y+height),round(x):round(x+width),:]
            yield DataTest(Mat, text)
        del mat

In [None]:
def make1IdSubmission(Id : str, Font : str, FontSize : int, IsItalic : bool, IsBold : bool):
    FileNameMap = {
        "Angsana_New": "ANGSA.ttf",
        "Cordia_New": "CORDIA.ttf",
        "DM_Shining_Star_Regular": "DM Shining Star Regular.ttf",
        "FC_Knomphing_Regular": "FC Knomphing Regular.ttf",
        "fonttintin": "fonttintin.ttf",
        "Kunlasatri": "Kunlasatri.ttf",
        "TH_Chakra_Petch": "TH Chakra Petch.ttf",
        "TH_Charm_of_AU": "TH Charm of AU.ttf",
        "TH_Mali_Grade6": "TH Mali Grade6.ttf",
        "TH_Sarabun": "THSarabun.ttf"
    }
    D = {
        f"{Id}_name": Font,
        f"{Id}_file": FileNameMap[Font],
        f"{Id}_size": f"{FontSize}px",
        f"{Id}_style-weight": f"{'italic' if IsItalic else 'normal'}-{'bold' if IsBold else 'normal'}"
    }
    return pd.DataFrame(D.values(),pd.Series(D.keys(),name="Id"),columns=["myPredicted"])
class OneSubmission(NamedTuple):
    Id : str
    Font : str
    FontSize : int
    IsItalic : bool
    IsBold : bool
def makeSubmission(iters : Iterable[OneSubmission]):
    df = pd.DataFrame(columns=["myPredicted"],index=pd.Series([], name="Id", dtype=str))
    for x in iters:
        df = df.append(make1IdSubmission(x.Id, x.Font, x.FontSize, x.IsItalic, x.IsBold))
    return df

In [None]:
datapath = '../input/super-ai-engineer-2021-font-recognition/'

In [None]:
import matplotlib as mpl
mpl.__version__  
!wget -q https://github.com/Phonbopit/sarabun-webfont/raw/master/fonts/thsarabunnew-webfont.ttf
mpl.font_manager.fontManager.addfont('thsarabunnew-webfont.ttf')
mpl.rc('font', family='TH Sarabun New', size=14)
plt = mpl.pyplot

In [None]:
# _, ax = plt.subplots(8, 2, figsize=(20, 40))
# # sample
# i = 0
# for oneset in readSetWithLabel(setpath = datapath+"./train/set1"):
#     # print("Image", oneset.Mat) # np.ndarray Mat
#     label = f"FontFamily {oneset.FontFamily} FontSize {oneset.FontSize} IsBold {oneset.IsBold} IsItalic {oneset.IsItalic} \n[{oneset.Text}]"
# #    print(label)
# #     print("FontSize", oneset.FontSize)
# #     print("IsBold", oneset.IsBold)
# #     print("IsItalic", oneset.IsItalic)

#     img = oneset.Mat #(images[i] * 255).numpy().astype("uint8")

#     #label = oneset.Text # tf.strings.reduce_join(num_to_char(labels[i])).numpy().decode("utf-8")
#     ax[i // 2, i % 2].imshow(img[:, :, 0], cmap="gray")
#     ax[i // 2, i % 2].set_title(label)
#     ax[i // 2, i % 2].axis("off")
#     i += 1
#     if i >= 16:
#         break
# plt.show()

In [None]:
# sample
for mat in readSetWithoutLabel(setpath=datapath + "./train/set2"):
    #print("Image", mat) # np.ndarray Mat
    break

In [None]:
def PredictImage(id,image):
    #------------------
    #  ******************************
    #  Your Process Here
    FontFam = "Angsana_New"
    FontSize = 16
    IsBold = True
    IsItalic = False
    #------------------
    return OneSubmission(id,FontFam, FontSize,IsItalic, IsBold)

In [None]:
# sample
testResult = []
for oneset in readTestSet(setpath=datapath +"./test"):
#     if oneset.Id == 'b68ab614':
#         print(oneset.Id)
    testResult.append(PredictImage(oneset.Id,oneset.Mat))
    #print("Image", oneset.Mat) # np.ndarray Mat
    #print("Id", oneset.Id)
    #break

In [None]:
len(testResult)

In [None]:
# sample
makeSubmission([
    OneSubmission("id1","Angsana_New", 14, False, False),
    OneSubmission("id2","Angsana_New", 16, False, True),
    OneSubmission("id3","Angsana_New", 18, True, False),
    OneSubmission("id4","Angsana_New", 20, True, True)
])

In [None]:

dfResult = makeSubmission(testResult)
dfResult

In [None]:
len(dfResult)

In [None]:
result = pd.read_csv("../input/super-ai-engineer-2021-font-recognition/sample_submission.csv", dtype=str)
result

In [None]:
result = result.merge(dfResult, on='Id', how='left')
result

In [None]:
result[["Id","myPredicted"]].rename(columns={'myPredicted': 'Predicted'}).to_csv("submission.csv", index=False) 