In [11]:
import pytesseract
import cv2
from textblob import TextBlob
import numpy as np
from pytesseract import Output
import re

In [12]:
import nltk
nltk.download('words')

from nltk.corpus import words

word_list = words.words()

[nltk_data] Downloading package words to /Users/faustina/nltk_data...
[nltk_data]   Package words is already up-to-date!


In [13]:
# get grayscale image
def get_grayscale(image):
    kernel = np.ones((5,5), np.uint8)
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY, kernel)

# noise removal
def remove_noise(image):
    return cv2.medianBlur(image,5)
 
#thresholding
def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

#dilation
def dilate(image):
    kernel = np.ones((5,5), np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
#erosion
def erode(image):
    kernel = np.ones((5,5), np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5), np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
def canny(image):
    return cv2.Canny(image, 100, 200)

#skew correction
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

#template matching
def match_template(image, template):
    return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) 

In [126]:
project_folder = '/Users/faustina/METIS/BOOTCAMPWORK/Project 4/autotherapy/data/scraped_accounts/alyssamariewellness/'
ipath = '2017-06-06_07-08-49_UTC'
img = cv2.imread(project_folder + ipath + '.jpg')

morph = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY, kernel)
#print(pytesseract.image_to_string(morph))

d = pytesseract.image_to_data(morph, output_type=Output.DICT)
words = [w for w in d['text'] if len(w)]
' '.join(words)

'         Tips for Dating Someone with Depression'

In [127]:
kernel = np.ones((5,5), np.uint8)
noise = cv2.medianBlur(img, 5)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY, kernel)
erosion = cv2.erode(gray, kernel, iterations = 1)
dilation = cv2.dilate(gray, kernel, iterations = 1)
opening = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
closing = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
gradient = cv2.morphologyEx(gray, cv2.MORPH_GRADIENT, kernel)
tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, kernel)
blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel)

In [128]:
for idx, morph in enumerate([noise, gray, erosion, dilation, opening, closing, gradient, tophat, blackhat]): #[gray, thresh, opening, canny, deskewed]:
    print(idx)
    d = pytesseract.image_to_data(morph, output_type=Output.DICT)
    words = [w for w in d['text']]# if len(w) > 2]
    print(' '.join(words))
    s = ''
    for t in words:
        try:
            t = re.match('(\w+)', t).group(0)
            correction = TextBlob(t.lower()).correct().raw
            if correction in word_list:
                s += correction + ' '
        except:
            t = ''
    print(s, '\n')

0
              _/Tips for Dating  Someone with  Depression â€”
for dating someone with depression  

1
                  Tips for Dating  Someone with  Depression
for dating someone with depression  

2
                  Tips for Dating  Someone with  Depression
for dating someone with depression  

3
    Lo oWien  Could make    4, BA                epression
lo could make ba expression  

4
                  Tips for Dating  Someone with  Depression
for dating someone with depression  

5
                       L Wioh y \  Covld make ) \  Yow happy: |                 \ j= Someone with  Depression
l with y could make you happy j someone with depression  

6
                    _- Tips for Dating  _ Someone with  pce S Sion
for dating someone with pace s soon  

7
       
 

8

 



In [18]:
from google.cloud import vision
from google.oauth2 import service_account
from dotenv import load_dotenv
import os


project_folder = os.path.expanduser('../../') # the folder of your project
load_dotenv(os.path.join(project_folder, '.env'))

GOOGLE_APPLICATION_CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
credentials = service_account.Credentials.from_service_account_file(project_folder + GOOGLE_APPLICATION_CREDENTIALS)

client_options = {'api_endpoint': 'eu-vision.googleapis.com'}

client = vision.ImageAnnotatorClient(client_options=client_options, credentials=credentials)

In [4]:
import io
path = '/Users/faustina/Documents/Instagram/minaa_b/2018-04-22_12-26-34_UTC.jpg'
with io.open(path, 'rb') as image_file:
        content = image_file.read()

image = vision.types.Image(content=content)

response = client.text_detection(image=image)
texts = response.text_annotations
print('Texts:')

for text in texts:
    print('\n"{}"'.format(text.description))

    vertices = (['({},{})'.format(vertex.x, vertex.y)
                for vertex in text.bounding_poly.vertices])

    print('bounds: {}'.format(','.join(vertices)))

Texts:

"you ARE ALLOWED
TO LET GO. BUT
WHEN DO - LET
you
GO WITH KINDNESS.
LET GO WITH LOVE.
NEVER LET GO WITH
HATE IN youR HEART,
MB.
"
bounds: (110,44),(543,44),(543,578),(110,578)

"you"
bounds: (126,50),(211,48),(212,87),(127,89)

"ARE"
bounds: (212,51),(289,49),(290,75),(213,77)

"ALLOWED"
bounds: (310,49),(503,44),(504,77),(311,82)

"TO"
bounds: (113,116),(196,116),(196,152),(113,152)

"LET"
bounds: (197,120),(266,120),(266,147),(197,147)

"GO."
bounds: (284,116),(393,116),(393,152),(284,152)

"BUT"
bounds: (395,116),(442,116),(442,152),(395,152)

"WHEN"
bounds: (126,185),(240,183),(240,210),(126,212)

"DO"
bounds: (352,179),(438,178),(439,218),(353,219)

"-"
bounds: (441,177),(486,176),(487,216),(442,217)

"LET"
bounds: (488,176),(534,175),(535,216),(489,217)

"you"
bounds: (274,185),(329,184),(330,231),(275,232)

"GO"
bounds: (119,251),(196,251),(196,286),(119,286)

"WITH"
bounds: (194,256),(281,256),(281,280),(194,280)

"KINDNESS."
bounds: (314,252),(504,251),(504,282),(314,2

In [7]:
texts[0].description

'you ARE ALLOWED\nTO LET GO. BUT\nWHEN DO - LET\nyou\nGO WITH KINDNESS.\nLET GO WITH LOVE.\nNEVER LET GO WITH\nHATE IN youR HEART,\nMB.\n'