# Help vision impaired people to see the world

Combining leading edge technologies for helping people – Connect the dots:


1. **Classifying** different **pictures** (tickets, floorplans and documents)
2. **Recognizing text** in the images
3. Reading out loud through **text-to-speech** (English)



In [None]:
import os
import telepot
import time
import torch
from PIL import Image
from torchvision import transforms
import pandas as pd
import numpy as np

from gtts import gTTS
import json

from dateutil import parser
import re

import pytesseract
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
import pickle
import cv2


######################
# proxy connection with pythonAnyWhere
import urllib3
import telepot.api
proxy_url = 'http://proxy.server:3128'

telepot.api._pools = {'default': urllib3.ProxyManager(proxy_url=proxy_url, num_pools=3, maxsize=10, retries=False, timeout=30)}
telepot.api._onetime_pool_spec = (urllib3.ProxyManager, dict(proxy_url=proxy_url, num_pools=1, maxsize=1, retries=False, timeout=30))
######################


# constants for BOT
API_KEY = ''

# constants for prediction model
MODEL_PATH = 'classification_model_HP.pt'
TAG_PATH = 'tag_model.sav'
VOCABULARY_PATH = 'tfidf_vector.pkl'

#pytesseract configuration
options = "--psm 4 --oem 3"

class_names = ['Documents', 'Plans', 'Tickets']
doc_type = {'Documents': 'document', 'Plans': 'plan', 'Tickets': 'ticket'}
data_transforms = {
    'test': transforms.Compose([
        transforms.Resize(256),       # Resize the short side of the image to 150 keeping aspect ratio
        transforms.CenterCrop(224),   # Crop a square in the center of the image
        transforms.ToTensor(),        # Convert the image to a tensor with pixels in the range [0, 1]
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
ROOT = '/home/patriciaandolz/'
folder = 'photosRecibed/'
folder_summary = 'summarySent/'
folder_audio = 'audioSent/'
folder_images = 'photosSent/'


MONTHS = ["january", "february", "march", "april", "may", "june","july", "august", "september","october", "november", "december"]
DAY_EXTENTIONS = ["st", "nd", "rd", "th"]



################################
##                            ##
##   1. CLASSIFY AN IMAGE     ##
##                            ##
################################

# load the model
model = torch.load(ROOT+MODEL_PATH)
model.eval()

# function to load an image and transforme it
def image_loader(imagen_name):
  imgFile = Image.open(imagen_name)

  if imgFile.mode == 'L':
    imgFile = imgFile.convert('RGB')

  return data_transforms['test'](imgFile).unsqueeze(0)

# function to predict the class of the picture
def get_prediction(image):
  tensor = image_loader(image)
  tensor = tensor.to(device)
  output = model.forward(tensor)

  probs = torch.nn.functional.softmax(output, dim=1)
  conf, classes_output = torch.max(probs, 1)
  return conf.item(), doc_type[class_names[classes_output.item()]]

# main function to classify a picture
def classify(line, bot, chat_id, msg):
    start = time.time()
    bot.sendMessage(chat_id, "Give me a second please, I'm checking it...")
    photo_id=msg['photo'][-1]['file_id']
    width=msg['photo'][-1]['width']
    height=msg['photo'][-1]['height']
    if width < 224 or height < 224:
        bot.sendMessage(chat_id, "Sorry... I need images bigger than 224x224. That one only has " + str(width) + "x" + str(height))
        return
    image_path=ROOT+folder + str(msg['from']['username']) + '_' + str(msg['from']['first_name']) + '_' + str(msg['date']) + ".jpg"
    bot.download_file(photo_id, image_path)
    conf,y_pre=get_prediction(image_path)
    print(y_pre, 'at confidence score:{0:.2f}'.format(conf))

    with open('outputClassifier.txt', 'a') as f:
      content = 'Image {}, is a {} at confidence score: {:.2f}'.format(image_path, y_pre, conf) + os.linesep
      f.write(content)

    end = time.time()
    line=line+";" + str(y_pre) + str(conf) + ";" + str(end-start)
    messgToSend = "I'm *{:.2f}%* sure it's a *{}*".format(conf*100, y_pre)
    bot.sendMessage(chat_id, messgToSend, parse_mode= 'Markdown')
    return image_path, y_pre



################################
##                            ##
##   2. RECOGNIZE THE TEXT    ##
##                            ##
################################

# load the tag model
tag_model = pickle.load(open(TAG_PATH, 'rb'))

# load the vocabulary
transformer = TfidfTransformer()
vocabulary = CountVectorizer(decode_error="replace",vocabulary=pickle.load(open(VOCABULARY_PATH, "rb")))

# function to find the total amount in a receipt
def find_total(text):
  pricePattern = r'\b(?:TOTAL|TTL)\b'

  p = []
  pos=[]
  i=0
  for row in text.split("\n"):

    if re.search(pricePattern, row) is not None:
      p.append(row)
      pos.append(i)
    i+=1

  price_pat = re.compile(r'\d+\.\d{2}|\d+\,\s?\d{2}')
  price_saved = []
  j=0

  for price in p:
    match = price_pat.search(price)
    if match:
        sa = (match.group()).replace(",", ".")
        sa = sa.replace(" ", "")
        price_saved.append((sa, pos[j]))
    j+=1

  #amounts = re.findall(r'\d+\.\d{2}\b | \d+\,\s?\d{2}\b', p)
  #amounts = [x.replace(",", ".") for x in amounts]
  #amounts = [x.replace(" ", "") for x in amounts]

  if len(price_saved)>0:
    return (price_saved[len(price_saved)-1][0],price_saved[len(price_saved)-1][1])
  else:
    return ('',-1)


# function to compute a mean value of a list
def Average(lst):
    if len(lst)>0:
        return sum(lst) / len(lst)
    else:
        return -1


# function to find the address and company name in a receipt
def find_addr_comp(text):
  print('ADDRESS and COMPANY search')
  values = [('',-1), ('',-1)]
  vec_tess = vocabulary.transform(text['text'])
  Y_pred = tag_model.predict(vec_tess)
  text['Y_tag'] = Y_pred

  tags_to_search = ['address', 'company']
  t_to_find = text.groupby((text['Y_tag'].shift() != text['Y_tag']).cumsum())
  i = 0
  for t in tags_to_search:
    max_group=pd.DataFrame()
    for k, v in t_to_find:

      tag_values = v['Y_tag'].tolist()
      tag_list = list(set(tag_values))
      if len(tag_list) == 1 and tag_list[0] == t:

        if len(v) > len(max_group):
          max_group = v

    print('max_group', max_group)
    if len(max_group)>0:
        values[i] = ((' '.join(max_group['text'].tolist())), Average(max_group['conf'].tolist()), max_group['bbox'].tolist())
    else:
        values[i] = ('', -1, [])
    i+=1

  return values


# function to find the date in a receipt
def find_date(text):
  print('DATE search')
  datePattern1 = r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}'
  datePattern2 = r'(\d{1,2})?[\s-]?(?:JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)[a-zA-Z.,-]*[\s]? \d{4}'
  datePattern3 = r'\d{1,2}[/-]\d{2,4}'

  p = str()
  pos=[]
  i=0

  print('FOR 1')
  for row in text.split("\n"):
    if re.search(datePattern1, row) is not None:
      p += ' ' + row
      pos.append(i)
      print('p', p)
    i += 1
  dates = re.findall(datePattern1, p)

  if len(dates)>0:
    return (dates[0],pos[0])

  else:
    p = []
    pos=[]
    i=0
    print('FOR 2')
    for row in text.split("\n"):
      if re.search(datePattern2, row, re.M|re.I) is not None:
        p.append(row)
        pos.append(i)
        print('p', p)
      i+=1

    re_dates_with_monthname = re.compile(datePattern2)
    i=0
    for date in p:
      match = re_dates_with_monthname.search(date)
      if match:
        return (match.group(), pos[i])
      i+=1

    p = []
    pos=[]
    i=0
    print('FOR 3')
    for row in text.split("\n"):
      if re.search(datePattern3, row, re.M|re.I) is not None:
        p.append(row)
        pos.append(i)
        print('p', p)
      i+=1

    re_dates_with_monthname = re.compile(datePattern3)
    i=0
    for date in p:
      match = re_dates_with_monthname.search(date)
      if match:
        return (match.group(), pos[i])
      i+=1

  return ('',-1)


# function to extract rellevant information of a receipt
def summarize_ticket(text):
  infTicket = {'company': ('', -1, ''),
             'date': ('', -1, ''),
             'address': ('', -1, ''),
             'total': ('', -1, '')}

  t_total = ('\n').join(text['text'].tolist())
  t_total = t_total.upper()
  total_values = find_total(t_total)
  if total_values[1]!=-1:
    infTicket['total']= (total_values[0], text.iloc[total_values[1]]['conf'], [text.iloc[total_values[1]]['bbox']])

  date_values = find_date(t_total)

  if date_values[1]!=-1:
    infTicket['date']= (date_values[0], text.iloc[date_values[1]]['conf'], [text.iloc[date_values[1]]['bbox']])

  t_address, t_company = find_addr_comp(text)
  infTicket['address'] = t_address
  infTicket['company'] = t_company

  return infTicket


def join_bbox_result(df):
  df = df.copy(deep=True)
  bboxNewColumn = []
  for r in range(len(df)):
    top = df.iloc[r]['top']
    left = df.iloc[r]['left']
    height = df.iloc[r]['height']
    width = df.iloc[r]['width']

    bbox_list = [top, left, height, width]

    bboxNewColumn.append(bbox_list)

  df['bbox'] = bboxNewColumn

  df = df.drop(['top', 'left', 'height', 'width'], axis='columns')

  return df


def draw_image_highlighted(msg, image_path,n_boxes_pred=[], boxcolor_pred=(0, 0, 255), boxsize_pred=1):
    img = cv2.imread(image_path)

    #colors = [(159, 44, 114), (14, 79, 100), (94, 24, 22),  (69, 74, 178)] (0, 95, 26)
    colors = [(54, 105, 139), (13, 170, 0), (139, 72, 26), (0, 0, 139)]
    # Initialize black image of same dimensions for drawing the rectangles
    blk = np.zeros(img.shape, np.uint8)

    if len(n_boxes_pred) > 1 :
      if len(n_boxes_pred) > 1:
        c = 0
        for boxes_rellevant_word in n_boxes_pred:
          iter=0
          for boxes in boxes_rellevant_word:
            if type(boxes) != list:
              for b in boxes:
                #bbox = BoundingBox(b)

                top_left = (int(b[1]), int(b[0]+b[2]))
                bottom_right = (int(b[1]+b[3]), int(b[0]))

                cv2.rectangle(blk, top_left, bottom_right, colors[c], boxsize_pred)
                out = cv2.addWeighted(img, 1.0, blk, 0.8, 1)
                iter+=1
            else:
              top_left = (int(b[1]), int(b[0]+b[2]))
              bottom_right = (int(b[1]+b[3]), int(b[0]))

              cv2.rectangle(blk, top_left, bottom_right, colors[c], boxsize_pred)
              out = cv2.addWeighted(img, 1.0, blk, 0.8, 1)
              iter+=1
          c+=1

        print('drawing')

    image_path_sent = ROOT+folder_images+ str(msg['from']['username']) + '_' + str(msg['from']['first_name']) + '_' + str(msg['date']) + ".jpg"
    cv2.imwrite(image_path_sent, out)
    return image_path_sent



# main function to extract rellevant information of an image
def extract(line, bot, chat_id, msg, image_path):
    start = time.time()
    bot.sendMessage(chat_id, "I'm summarizing it for you...")

    resTesseract = pytesseract.image_to_data(image_path, lang='eng', output_type='data.frame', config=options)
    resTesseract_text = resTesseract[resTesseract.conf != -1]
    resTesseract_text = join_bbox_result(resTesseract_text)
    tesseract_lines = resTesseract_text.groupby(by=['page_num', 'block_num', 'par_num', 'line_num'], as_index = False).agg({ 'text': lambda x: ' '.join(list(x)), 'conf': 'mean', 'bbox':lambda x: x.apply(list)})
    rellevant_info = summarize_ticket(tesseract_lines)
    print('summarized')

    bboxes_to_draw = []
    for i in rellevant_info.keys():
      bboxes_to_draw.append(rellevant_info[i][2])

    image_drawn = draw_image_highlighted(msg, image_path, n_boxes_pred=bboxes_to_draw, boxcolor_pred=(0,0,255), boxsize_pred=-1)

    with open('outputClassifier.txt', 'a') as f:
        content = 'Information extracted from'+image_path+ ':'+ str(rellevant_info) + os.linesep
        f.write(content)

    end = time.time()
    line=line+";" + str(rellevant_info) + ";" + str(end-start)
    messgToSend = 'Rellevant information:\n'+'*Company:* '+rellevant_info['company'][0] +' ('+str(round(rellevant_info['company'][1],2))+')\n'+'*Address:* '+rellevant_info['address'][0] +' ('+str(round(rellevant_info['address'][1],2))+')\n'+'*Date*: '+rellevant_info['date'][0] +' ('+str(round(rellevant_info['date'][1],2))+')\n'+'*Total amount:* '+rellevant_info['total'][0] +' ('+str(round(rellevant_info['total'][1],2))+')\n'
    bot.sendMessage(chat_id, messgToSend, parse_mode= 'Markdown')
    bot.sendPhoto(chat_id, photo=open(image_drawn, 'rb'))

    infoToSave = {'company': (rellevant_info['company'][0], rellevant_info['company'][1]),
                  'date': (rellevant_info['date'][0], rellevant_info['date'][1]),
                  'address': (rellevant_info['address'][0], rellevant_info['address'][1]),
                  'total': (rellevant_info['total'][0], rellevant_info['total'][1])}

    summary_path=ROOT+folder_summary + str(msg['from']['username']) + '_' + str(msg['from']['first_name']) + '_' + str(msg['date']) + ".json"
    with open(summary_path, 'w') as f:
        json.dump(infoToSave, f)

    return rellevant_info





################################
##                            ##
##     3. TEXT TO SPEECH      ##
##                            ##
################################
# functions to express the date in an spoken format
def get_date(text):
    text = text.lower()
    dt =parser.parse(text, dayfirst=True)

    month = MONTHS[dt.month-1]
    day = ordinal(dt.day)
    return str(month + " " + day + " " + str(dt.year))

def ordinal(n):
    suffix = {1:'st', 2:'nd', 3:'rd', 11:'th', 12:'th', 13:'th' }
    return str(n)+(suffix.get(n%100) or suffix.get(n%10,'th'))

# functions to express the total amount in an spoken format
def get_money(text):
  qt = str()
  m = re.split(r'\.|,', text)

  qt = str(m[0] + ' euros')
  if len(m)>1 and int(m[1])>0:
    qt += str(' and '+m[1] + ' cents')

  return qt

# main function to transform text to speech
def read_aloud(line, bot, chat_id, msg, data):
    start = time.time()

    text = 'This is a receipt'
    if data['company'][0] != '':
      text += ' from ' + data['company'][0]

    if data['date'][0] != '':
      text += ', date at ' + get_date(data['date'][0]) + '.'

    if data['total'][0] != '':
      text += ' Total amount is ' + get_money(data['total'][0]) + '.'

    if data['address'][0] != '':
      text += ' Comming from '+ data['address'][0]

    tts = gTTS(text)
    audio_path=ROOT+folder_audio + str(msg['from']['username']) + '_' + str(msg['from']['first_name']) + '_' + str(msg['date']) + ".mp3"
    tts.save(audio_path)
    bot.sendVoice(chat_id, voice=open(audio_path, 'rb'))
    end = time.time()
    line=line+";" + text + ";" + str(end-start)



################################
##                            ##
##       BOT INTERACTION      ##
##                            ##
################################

# function to get the messages sent by users to the bot
def handle(msg):

  content_type, chat_type, chat_id = telepot.glance(msg)
  line= str(msg['from']['username']) + ";" + str(msg['from']['first_name'])  + ";" + str(msg['from']['language_code']) + ";" + str(msg['date'])
  print(line)

  if content_type != 'photo':
      bot.sendMessage(chat_id, "I help vision impaired people to see the world. Send me a picture!")

  if content_type == 'photo':
    image, y_pre = classify(line, bot, chat_id, msg)
    if y_pre == 'ticket':
        summarizedText = extract(line, bot, chat_id, msg, image)
        read_aloud(line, bot, chat_id, msg, summarizedText)


token=API_KEY
bot = telepot.Bot(token)
bot.message_loop(handle)
print ('Listening ...')
while 1:
    time.sleep(10)

Mounted at /content/drive
