# Load, Extract, Transform data

In [28]:
from dataloader import *

In [25]:
TEST_DATA_DIR = "data/test_data/"
TEST_GT_DIR = TEST_DATA_DIR + "test_gt"
TEST_IMG_DIR = TEST_DATA_DIR + "Images"
NUM_FILES = 1

In [31]:
gt_filenames = get_file_names(TEST_GT_DIR, num_files=NUM_FILES)
img_filenames = get_file_names(TEST_IMG_DIR, num_files=NUM_FILES)

In [32]:
GTs = get_GTs(TEST_GT_DIR, gt_filenames)
images = get_images(TEST_IMG_DIR, img_filenames)

extracting data/test_data/test_gt/tr_img_01001.txt

extracting data/test_data/Images/tr_img_01001.jpg


In [33]:
len(GTs), len(images)

(1, 1)

In [34]:
GTs

[[([[559, 1207], [2830, 1235], [2821, 1662], [548, 1622]], 'TICKETS')]]

In [None]:
images

In [35]:
from preprocessing import clean_GTs


GTs = clean_GTs(GTs)

# Get inference results

## Load models

### Glass

In [10]:
from glass.inference.glass_runner import GlassRunner
from glass.utils.visualizer import visualize

In [9]:
#download model
#!wget 'https://glass-text-spotting.s3.eu-west-1.amazonaws.com/models/glass_250k_full_textocr_finetune.pth' -O 'pretrained_models/glass_textocr.pth'

In [30]:
%cd Experiments/

/home/fatcat/Project/Experiments



This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.



In [13]:
model_path = './pretrained_models/glass_textocr.pth'
config_path = './configs/glass_finetune_textocr.yaml'
glass_runner = GlassRunner(model_path=model_path, config_path=config_path, post_process=True)

## Perform Inference

In [36]:
import time

glass_results = []

start = time.time()
for img in images:
    glass_preds = glass_runner(img)
    glass_results.append(glass_preds)
end = time.time()
duration = end - start

## Post processing


### Glass

In [None]:
# glass_results[0]

In [37]:
from glass.evaluation.text_evaluator import get_instances_text

glass_preds = []

for preds in glass_results:
    glass_texts, glass_text_scores, _ = get_instances_text(preds.pred_text_prob, text_encoder=glass_runner.text_encoder)
    glass_bb_texts_pairs = []
    
    for item in zip(preds.pred_polygons.tolist(),glass_texts):
        glass_bb_texts_pairs.append(item)
    
    glass_preds.append(glass_bb_texts_pairs)

In [38]:
len(glass_preds)

1

## Visulaization

In [21]:
id=0
figure = visualize(preds=glass_results[id], image=images[id], text_encoder=glass_runner.text_encoder, vis_width=720, vis_text=True)
figure.show()

# Evaluate models

In [11]:
from evaluation import avg_metrics

In [28]:
glass_preds[4][0]

([[197.3206024169922, 189.82675170898438],
  [274.50457763671875, 191.91014099121094],
  [273.57537841796875, 226.33447265625],
  [196.3914031982422, 224.25108337402344]],
 'FIRST')

In [39]:
g_cer, g_wer, g_ioup, g_iour, g_iouf1, g_tp, g_tr, g_tf1 = avg_metrics(GTs, glass_preds)
g_cer, g_wer, g_ioup, g_iour, g_iouf1, g_tp, g_tr, g_tf1

(0.1181354325750878,
 0.35,
 0.8343522271502145,
 0.8648402533392888,
 0.830042275865597,
 0.7946548927242051,
 0.810517104874928,
 0.7863726834200897)

# Tracking with MLflow

In [None]:
# !mlflow server --backend-store-uri sqlite:///mlflow.db

In [13]:

import mlflow

# mlflow.set_tracking_uri("sqlite:///mlflow.db")
# mlflow.set_experiment("Models Comparision")

In [18]:
import dagshub
dagshub.init(repo_owner='nhatminh297', repo_name='scenetext_mlops', mlflow=True)

mlflow.set_tracking_uri("https://dagshub.com/nhatminh297/scenetext_mlops.mlflow")
mlflow.set_experiment("models comparision")

<Experiment: artifact_location='mlflow-artifacts:/18714f8b53eb4f90ac206a9854b880d7', creation_time=1718760735138, experiment_id='1', last_update_time=1718760735138, lifecycle_stage='active', name='models comparision', tags={}>

### Glass

In [15]:
model_path = './pretrained_models/glass_textocr.pth'
config_path = './configs/glass_finetune_textocr.yaml'

In [16]:
from mlflow.pyfunc import PythonModel, PythonModelContext

class MyGlassModel(PythonModel):
    def load_context(self, context: PythonModelContext):
        from glass.inference.glass_runner import GlassRunner
        from glass.evaluation.text_evaluator import get_instances_text
        
        self._config_path = context.artifacts["config_path"]
        self._model_path = context.artifacts["model_path"]
        self._model = GlassRunner(model_path=self._model_path, config_path=self._config_path, post_process=True)


    def predict(self, context: PythonModelContext, image):
        self._image = image
        self.preds = self._model(image)
        self.glass_texts, glass_text_scores, _ = get_instances_text(self.preds.pred_text_prob, text_encoder=self._model.text_encoder)
        glass_bb_texts_pairs = []
        
        for item in zip(preds.pred_polygons.tolist(),glass_texts):
            glass_bb_texts_pairs.append(item)
        return [glass_bb_texts_pairs]
    
    def visualize(self, vis_width=720, vis_text=True):
        from glass.utils.visualizer import visualize
        
        figure = visualize(preds=self.preds, image=self._image, text_encoder=self._model.text_encoder, vis_width=vis_width, vis_text=vis_text)
        figure.show()
        
    

In [20]:
with mlflow.start_run() as run:
    glass_results = []
    for img in images:
        glass_preds = glass_runner(img)
        glass_results.append(glass_preds)
        
    glass_preds = []
    for preds in glass_results:
        glass_texts, glass_text_scores, _ = get_instances_text(preds.pred_text_prob, text_encoder=glass_runner.text_encoder)
        glass_bb_texts_pairs = []
        
        for item in zip(preds.pred_polygons.tolist(),glass_texts):
            glass_bb_texts_pairs.append(item)
        
        glass_preds.append(glass_bb_texts_pairs)
        
    mlflow.log_param("model_path", model_path)
    mlflow.log_param("config_path", config_path)
    mlflow.log_param("num_files", NUM_FILES)
        
    g_cer, g_wer, g_ioup, g_iour, g_iouf1, g_tp, g_tr, g_tf1 = avg_metrics(GTs, glass_preds)
    mlflow.set_tag("post_processing", "glass_post_processing")
    
    mlflow.log_metric("CER", g_cer)
    mlflow.log_metric("WER", g_wer)
    mlflow.log_metric("IOU Precision", g_ioup)
    mlflow.log_metric("IOU Recall", g_iour)
    mlflow.log_metric("IOU F1", g_iouf1)
    mlflow.log_metric("Text precision", g_tp)
    mlflow.log_metric("Text recall", g_tr)
    mlflow.log_metric("Text F1", g_tf1)
    mlflow.pyfunc.log_model("glassrunner", 
                        python_model=MyGlassModel(),
                        artifacts={ 
                            'config_path': config_path,
                            'model_path': model_path 
                        })
    
    

  image_tensor = torch.as_tensor(original_image.transpose((2, 0, 1)))
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


# test

In [17]:
model_path = './pretrained_models/glass_textocr.pth'
config_path = './configs/glass_finetune_textocr.yaml'
glass_runner = GlassRunner(model_path=model_path, config_path=config_path, post_process=True)

In [41]:
# %cd ..
from glass.evaluation.text_evaluator import get_instances_text

img = get_image(path = "data/test_data/Images/tr_img_01200.jpg")
pred = glass_runner(img)
glass_texts, glass_text_scores, _ = get_instances_text(pred.pred_text_prob, text_encoder=glass_runner.text_encoder)
glass_bb_texts_pairs = []

for item in zip(pred.pred_polygons.tolist(),glass_texts):
    glass_bb_texts_pairs.append(item)
glass_bb_texts_pairs

[([[267.4954833984375, 190.84812927246094],
   [441.0558166503906, 186.52183532714844],
   [442.54486083984375, 246.25901794433594],
   [268.9845275878906, 250.58531188964844]],
  'GROVE'),
 ([[220.21658325195312, 124.26400756835938],
   [414.301513671875, 123.93438720703125],
   [414.4021911621094, 183.22491455078125],
   [220.3172607421875, 183.55453491210938]],
  'LOCUST'),
 ([[258.9290466308594, 286.6731262207031],
   [365.6589050292969, 286.53082275390625],
   [365.6858825683594, 306.7665710449219],
   [258.9560241699219, 306.90887451171875]],
  'HISTORIC'),
 ([[177.5558319091797, 286.527587890625],
   [250.91445922851562, 287.63446044921875],
   [250.58180236816406, 309.68182373046875],
   [177.22317504882812, 308.574951171875]],
  'STATE'),
 ([[372.8724060058594, 285.7107238769531],
   [419.61944580078125, 286.49273681640625],
   [419.2844543457031, 306.5171813964844],
   [372.53741455078125, 305.73516845703125]],
  'SITE')]

In [42]:
figure = visualize(preds=pred, image=img, text_encoder=glass_runner.text_encoder, vis_width=720, vis_text=True)
figure.show()