A requirement to run this notebook is to already have a folder with images, with their respective coordinates and heading angle information written in their file name 

The file names should follow the pattern: <latitude>,<longitude>_<angle>.jpg

In [15]:
from ast import literal_eval
from glob import glob
import os

import numpy as np
import pandas as pd
import cv2

In [16]:
data_path = ENTER PATH HERE # TODO

def img_load_func(img_path):
    return cv2.imread(img_path)

def fix_read_cols(df, cols):
    not_nan = lambda x: not isinstance(x, float) or np.isfinite(x)
    for col in cols:
        if col in df.columns:
            df[col] = df[col].transform(lambda x: literal_eval(x) if not_nan(x) else x)


In [17]:
imgs_df_path = data_path + 'extended_center_imgs_df.csv'
imgs_folder_path = data_path + 'images/extended_center_pics/'

if os.path.exists(imgs_df_path):
    df = pd.read_csv(imgs_df_path)
    fix_read_cols(df, ['img_shape'])
    print('Loading images dataframe')
else:
    imgs_paths = glob(imgs_folder_path + '*.jpg')

    df_dict = []

    for path in imgs_paths:
        _, filename = os.path.split(path)
        lat = filename[:filename.find(',')]
        lon = filename[filename.find(',') + 1:filename.find('_')]
        angle = filename[filename.find('_') + 1:filename.rfind('.')]

        df_dict += [{'img_path': os.path.normpath(path), 'img_lat': lat, 'img_lon': lon,
                     'img_angle': angle, 'img_shape': img_load_func(path).shape}]
        
    df = pd.DataFrame(df_dict)
    df['idx'] = np.arange(df.shape[0])

    df.to_csv(imgs_df_path)

Loading images dataframe


# Text Detection and Recognition

In [18]:
COLS2FIX = ['img_shape', 'real_detected_bboxes',
            'real_img_shape', 'det_rect', 'detected_bbox']

dset2path_col = {'synth':'synth_img_path', 'real':'img_path'}

det_path = data_path + '/extended_center_det_df.csv'
rec_path = data_path + '/extended_center_rec_df.csv'
tmp_rec_path = data_path + '/extended_center_rec_tmp_df.csv'

## Text Detection

In [19]:
from EAST.eval import east_detect

In [20]:
if os.path.exists(det_path):
    df = pd.read_csv(det_path)
    fix_read_cols(df, COLS2FIX)
    print('Reading saved detections')
else:
    det_tmp_path = data_path + '/tmp_detections/'

    detections = east_detect(test_data_path=imgs_folder_path, gpu_list='0', 
                             checkpoint_path='./EAST/nets/',
                             output_dir=det_tmp_path, max_im_height=620, nr_imgs=100)

    df['real_detected_bboxes'] = df.img_path.transform(lambda path: detections[path] if path in detections else None)
    df = df.dropna(subset=['real_detected_bboxes'])
    df['real_detected_bboxes'] = df.real_detected_bboxes.transform(lambda boxes: [box.tolist() for box in boxes])
    df.to_csv(det_path)

Reading saved detections


In [21]:
from citywords.text_det import is_valid_rec, box2rec

unrolled = df.apply(lambda x: pd.Series(x['real_detected_bboxes']), axis=1).stack().reset_index(level=1, drop=True)
unrolled.name = 'real_detected_bbox'

unrolled_df = df.join(unrolled)

rect_lambda = lambda row: box2rec(row['real_detected_bbox'], row['img_shape'])

unrolled_df["det_rect"] = unrolled_df.apply(rect_lambda, axis=1)
is_valid_rect = unrolled_df["det_rect"].transform(lambda rect: is_valid_rec(*rect))

unrolled_df = unrolled_df[is_valid_rect]

unrolled_df["rec_idx"] = unrolled_df.apply(lambda row: (row.idx, tuple(map(tuple, row["det_rect"]))), axis=1)

### Crop detections

In [22]:
from citywords.text_det import crop_det

cropped_folder_path = data_path + "/cropped_imgs/"

def rec_idx2cropped_path(rec_idx):
    return os.path.normpath(cropped_folder_path + os.sep + str(rec_idx) + ".jpg")

In [23]:
if not os.path.exists(rec_path):
    if not os.path.exists(cropped_folder_path):
        os.makedirs(cropped_folder_path)

    for _, row in unrolled_df.iterrows():
        path = rec_idx2cropped_path(row.rec_idx)

        if os.path.exists(path):
            continue
        else:
            cropped_img = crop_det(img_load_func(row.img_path), row.det_rect)
            cv2.imwrite(path, cropped_img)

## Text Recognition

In [24]:
from deep_text_recognition_benchmark.demo import recognize

In [25]:
if os.path.exists(rec_path):
    det_rec_df = pd.read_csv(rec_path)
else:
    recognitions = recognize(image_folder=cropped_folder_path, 
                            saved_model='deep_text_recognition_benchmark/TPS-ResNet-BiLSTM-Attn.pth')
    recognitions = {os.path.normpath(k): v for k, v in recognitions.items()}

    unrolled_df['rec_txt'] = unrolled_df.rec_idx.transform(lambda idx: recognitions[rec_idx2cropped_path(idx)]['pred'])
    unrolled_df['rec_conf'] = unrolled_df.rec_idx.transform(lambda idx: recognitions[rec_idx2cropped_path(idx)]['confidence_score'])
    unrolled_df['rec_conf'] = pd.to_numeric(unrolled_df.rec_conf)

    det_rec_df = unrolled_df

    det_rec_df.to_csv(rec_path)

# Text Embedding

In [26]:
import pickle
import time

from citywords.gpt import gpt_get_descriptions_and_topics

## Description querying

In [27]:
result_path = data_path + "gpt_result.pickle"

if os.path.exists(result_path):
    with open(result_path, 'rb') as f:
        descriptions = pickle.load(f)
else:
    descriptions = dict()

chunk_size = 30

words = list(det_rec_df.rec_txt[~det_rec_df.rec_txt.isin(descriptions.keys())].unique())
words_part = list(np.random.choice(words, min(chunk_size, len(words)), replace=False))

while words_part:
    result = gpt_get_descriptions_and_topics(words_part, custom_prompt=None)
    result = literal_eval(result)

    descriptions = {**descriptions, **result}

    with open(result_path, 'wb') as f:
        pickle.dump(descriptions, f)

    time.sleep(1)

    words = list(det_rec_df.rec_txt[~det_rec_df.rec_txt.isin(descriptions.keys())].unique())
    words_part = list(np.random.choice(words, min(chunk_size, len(words)), replace=False))

    print('words left:', len(words))

## Description embedding

In [28]:
from citywords.w2v import TransformerEncoder

In [29]:
embd_save_path = data_path + 'embds_extended_center.pickle'

encoder = TransformerEncoder(model_name='xlm-r-100langs-bert-base-nli-stsb-mean-tokens')

if os.path.exists(embd_save_path):
    print('loading embeds')
    with open(embd_save_path, 'rb') as f:
        embd_dict = pickle.load(f)
else:            
    words = descriptions.keys()
    texts = [descriptions[key] for key in words]

    embds = encoder.encode(texts).cpu().numpy()
    embd_dict = {word:embds[i] for i, word in enumerate(words)}
            
    with open(embd_save_path, 'wb') as f:
        pickle.dump(embd_dict, f)

max sentence lenght: 128
loading embeds


# Semantic Matching

In [30]:
from citywords.analysis import classify_topics

In [31]:
topic_descriptions = {'alimentacao': 'restaurantes, bares, mercados, cafes, e outros estabelecimentos relacionados a compra de comida e bebidas.',
                      'imoveis': 'placas de aluga-se, vende-se, corretores de imóveis ou nomes de imobiliarias, placas que indicam construção.',
                      'saude e bem estar': 'hospitais, farmacias, clínicas médicas, clínicas estéticas, qualquer estabelecimento relacionado a tratamentos para a saúde, estética ou bem estar.',
                      'lazer e entretenimento': 'cinemas, teatros, hoteis, casas de show, centros esportivos, quadras, parques e qualquer tipo de estabelecimento destinado a entretenimento ou lazer das pessoas.',
                      'escolar': 'escolas, transporte escolar, universidades, faculdades, escolas de idiomas, qualquer estabelecimento ligado a educação e treinamento em geral.',
                      'transporte': 'estacionamentos, vagas para veículos, lojas de auto peças, aluguel de veículos, garagens, concessionarias de venda de veículos novos, semi novos ou usados.',
                      'religiao': 'estabelecimentos religiosos, igrejas, centros religiosos.',
                      'comercio': 'lojas em geral, roupas, calçados, jóias, presentes, brinquedos, eletrônicos, móveis, lojas de departamento.',
                      'financeiro': 'bancos, caixas eletrônicos, agencias financeiras em geral.',
                      'servicos': 'cartório, serviços de advocacia, escritórios, serviços de reparos.',
                      'sinalizacao e locais': 'placas de trânsito, placas com nomes de ruas, nomes de país, cidade, bairro ou região, sinalização de aviso, atenção e proibição.'}

det_rec_df = classify_topics(det_rec_df, embd_dict, encoder, topic_descriptions)

In [32]:
det_rec_df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,img_path,img_lat,img_lon,img_angle,img_shape,idx,real_detected_bboxes,real_detected_bbox,det_rect,rec_idx,rec_txt,rec_conf,topic_cls_label
0,6,6,G:\My Drive\Master\data\images\extended_center...,-23.574498,-46.667948,338.96,"(640, 640, 3)",6,"[[[412, 232], [443, 233], [443, 246], [412, 24...","[[412, 232], [443, 233], [443, 246], [412, 245]]","([412, 232], [443, 246])","(6, ((412, 232), (443, 246)))",shissant,0.001355,sinalizacao e locais
0,6,6,G:\My Drive\Master\data\images\extended_center...,-23.574498,-46.667948,338.96,"(640, 640, 3)",6,"[[[412, 232], [443, 233], [443, 246], [412, 24...","[[441, 234], [470, 235], [470, 248], [441, 247]]","([441, 234], [470, 248])","(6, ((441, 234), (470, 248)))",tishop,0.062474,sinalizacao e locais
1,9,9,G:\My Drive\Master\data\images\extended_center...,-23.574442,-46.632113,203.625,"(640, 640, 3)",9,"[[[-1, 331], [52, 330], [52, 344], [-1, 345]],...","[[-1, 331], [52, 330], [52, 344], [-1, 345]]","([0, 331], [52, 344])","(9, ((0, 331), (52, 344)))",rearial,0.169805,sinalizacao e locais
1,9,9,G:\My Drive\Master\data\images\extended_center...,-23.574442,-46.632113,203.625,"(640, 640, 3)",9,"[[[-1, 331], [52, 330], [52, 344], [-1, 345]],...","[[49, 330], [105, 331], [105, 343], [49, 343]]","([49, 330], [105, 343])","(9, ((49, 330), (105, 343)))",camarimi,0.844555,sinalizacao e locais
2,16,16,G:\My Drive\Master\data\images\extended_center...,-23.572943,-46.632123,313.703,"(640, 640, 3)",16,"[[[418, 173], [564, 149], [568, 178], [423, 20...","[[418, 173], [564, 149], [568, 178], [423, 202]]","([418, 173], [568, 178])","(16, ((418, 173), (568, 178)))",some,0.02,escolar


In [33]:
det_rec_df.to_csv(data_path + 'extended_center_final_df.csv')