In [1]:
#For refactoring code and making adjustments

In [None]:
from PIL import Image
from pytesseract import pytesseract
import pyautogui
import os
import webbrowser
import pandas as pd
import cv2
import numpy as np
from pytesseract import Output
import imutils
import string 
from thefuzz import fuzz
from Levenshtein import distance, ratio
from numba import njit


def clean_word(word):
    for char in word:
        if char in string.punctuation:
            word = word.replace(char, "")
            
    for char in word:
        if not char.isalnum():
            word = word.replace(char, "")
            
    return word.strip().lower()


def clean_df(df):
    df['text'] = df['text'].apply(lambda x: clean_word(x))
    
    for i, row in df.iterrows():
        if row['text'] == "":
            df.at[i, 'text'] = np.nan
            
    df.dropna(inplace=True)
    df.reset_index(inplace=True)
    
    #pad dataframe for later use
    for _ in range(3):
        df = df.append(pd.Series("endofdataframe", index=df.columns), ignore_index=True)
        
    return df



@njit
def check_image_background(img):
    """Checks if a screenshots background is mainly white
        Will not invert if it is mainly white, changing a
        white photo to black will reduce performance because in
        docs it says algo performs best on white backgrounds"""
    score = 0
    PIXEL_MIN = 240
    SCORE_THRESHOLD = 0.7


    height, width, _ = img.shape
    
    for i in range(height):
        for j in range(width):
            if img[i, j][0] >= PIXEL_MIN and img[i, j][1] >=  PIXEL_MIN and img[i, j][2] >= PIXEL_MIN:
                score = score + 1
                
    if (score / (height*width)) < SCORE_THRESHOLD:
        return 1
    
    return 0



### PROCESSING OPTIONS ###
## BINARIZATION METHODS ## - Can fine tune each
#1 thresh, im_bw = cv2.threshold(gray_image, 210, 230, cv2.THRESH_BINARY)
#2 th2 = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,11,2)
#3 th3 = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)

#Otsu Binarization

# Otsu's thresholding
#ret2,th2 = cv2.threshold(img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)

# Otsu's thresholding after Gaussian filtering
#blur = cv.GaussianBlur(img,(5,5),0)
#ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)


#IDEA FOR BLACK -> OLD METHOD invert then gray (213 items found on spoitfy songs like clair)
#1 INVERT
#2 GRAYSCALE
#3 BINARIZATION -> Gaussian
#Lots of noise after this method but 246 items found
#After trying noise removal or thick font -> Do not use, bad


#IDEA FOR WHITE BACKground
#1 convert grayscale
#2 some for of binarization
#3 remove noise from binarization
# Best Process so far -> gray then adaptive thresh mean (gray adaptive mean thick font 474 items found on page)
# Runner Up -> gray then adaptive thresh mean then thick font (gray adaptive mean thick font 440 items found on page)
#Both are great actually must test more



def preprocesser(file_path):
    """Allows for different preprocessing techniques to be added
    onto our input image to improve tesseract"""
    
    
    base_image = cv2.imread(file_path)
    
    #Current process inverts mainly black screenshots and coverts to grayscale
    #Same process for white but no inverting (try binarization for white)
    
    #Black backgroun
    if check_image_background(base_image):
        inverted_image = cv2.bitwise_not(base_image)
        gray_image = cv2.cvtColor(inverted_image, cv2.COLOR_BGR2GRAY)
        binarized_image = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)

        return gray_image
        
    #White background
    else:
        gray_image = cv2.cvtColor(base_image, cv2.COLOR_BGR2GRAY)
        binarized_image_mean = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,11,2) #Solid but not great
        binarized_image_gauss = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) #Better
        
        #Both are actually good must test more and maybe adjust so the colors blue and orange get changed wht & nt blk
        
        return gray_image


def preprocesser2(file_path):
    """Exact same as preprocessor but if screen is white we do the black operations """
    
    
    base_image = cv2.imread(file_path)
    
    #Current process inverts mainly black screenshots and coverts to grayscale
    #Same process for white but no inverting (try binarization for white)
    #Black backgroun
    if check_image_background(base_image):
        gray_image = cv2.cvtColor(base_image, cv2.COLOR_BGR2GRAY)
        binarized_image_mean = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,11,2) #Solid but not great
        binarized_image_gauss = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) #Better
        
        return gray_image
        
    #White background
    else:
        inverted_image = cv2.bitwise_not(base_image)
        gray_image = cv2.cvtColor(inverted_image, cv2.COLOR_BGR2GRAY)
        binarized_image = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)

        return gray_image





def process_image(file_path):
    psm_version = "--psm 11"
    
    preprocessed_image = preprocesser(file_path)
    image_df = pd.DataFrame(pytesseract.image_to_data(preprocessed_image, output_type=Output.DICT, config=psm_version))

    n_boxes = len(image_df['level'])
    for i in range(n_boxes):
        (x, y, w, h) = (image_df['left'][i], image_df['top'][i], image_df['width'][i], image_df['height'][i])
        cv2.rectangle(preprocessed_image, (x, y), (x + w, y + h), (0,0,0), 2)

    cv2.imshow('img', preprocessed_image)
    cv2.waitKey(0)

    
    image_df = clean_df(image_df)

    return image_df


def process_image2(file_path):
    psm_version = "--psm 11"
    
    preprocessed_image = preprocesser2(file_path)
    image_df = pd.DataFrame(pytesseract.image_to_data(preprocessed_image, output_type=Output.DICT, config=psm_version))

    n_boxes = len(image_df['level'])
    for i in range(n_boxes):
        (x, y, w, h) = (image_df['left'][i], image_df['top'][i], image_df['width'][i], image_df['height'][i])
        cv2.rectangle(preprocessed_image, (x, y), (x + w, y + h), (0,0,0), 2)

    cv2.imshow('img', preprocessed_image)
    cv2.waitKey(0)

    
    image_df = clean_df(image_df)

    return image_df



def process_coordinates(text_row):
    x = text_row['left'] + (text_row['width'] // 1.5)
    y = text_row['top'] + (text_row['height'] // 1.5)
    
    return x, y



#Now take image data and find worddef locate_text(df, text):
def locate_text(df, text):
    results = []

    stripped_text = "".join(text.lower().split())
    split_text = text.split()
    
    for i, row in df.iterrows():
        word = row['text']

        second_word = df.iloc[i+1]['text']

        initial_fuzz = fuzz.ratio(word, stripped_text)
        second_fuzz = fuzz.ratio((word+second_word), stripped_text)
        
        if word == "endofdataframe":
            break
            
        if initial_fuzz >= 80:
            if initial_fuzz < second_fuzz:
                results.append([(word+second_word), process_coordinates(row), second_fuzz])
                
            else:
                results.append([word, process_coordinates(row), initial_fuzz])


        if second_fuzz >= 80 and initial_fuzz < 80:
            results.append([(word+second_word), process_coordinates(row), second_fuzz])
                
                
        else:
            if word in split_text[0] or split_text[0] in word: #may need to do partial_ratio(split_text[0], word)
                word = word
                second_word = word + df.iloc[i+1]['text']
                
                r1 = fuzz.ratio(word, stripped_text)
                r2 = fuzz.ratio(second_word, stripped_text)
                
                j = i + 2 #sets j to current word in df
                count = 0
                
                while r2 >= r1:
                    word = second_word
                    second_word = word + df.iloc[j]['text']
                    
                    r1 = fuzz.ratio(word, stripped_text)
                    r2 = fuzz.ratio(second_word, stripped_text)
                    
                    if r2 >= 80:
                        results.append([second_word, process_coordinates(row), r2])
                    
                    word = second_word
                    #print(word)
                
                    count += 1
                    j+=1
                
    return results


def find_best_location(r1, r2):
    results = []
    [results.append(location) for location in r1]
    [results.append(location) for location in r2]

    highest_fuzz = 0
    for location in results:
        highest_fuzz = max(highest_fuzz, location[2])



    for location in results:
        if location[2] == highest_fuzz:
            return location
            
            
            
def text_coordinates(image_path, text):
    """Given a list of text coordinates, returns the most accurate texts location"""
    current_dir = os.getcwd()
    path_to_tesseract = r"C:\Users\sbuca\Documents\pierre\autokm\Tesseract-OCR\tesseract.exe" #will fail in jupyter folder

    pytesseract.tesseract_cmd = path_to_tesseract
    
    
    df = process_image(image_path)
    df2 = process_image2(image_path)

    results = locate_text(df,text)
    results2 = locate_text(df2,text)

    if results == [] and results2 == []:
        return None
    
    target = find_best_location(results, results2)

    return target[1]
    #return (status, coors, df)


if __name__ == "__main__":
    text_coordinates(image_path, text)
    process_image(image_path)
    process_image2(image_path)
    locate_text(df,text)
    find_best_location(r1, r2)

In [1]:
from PIL import Image
from pytesseract import pytesseract
import pyautogui
import os
import webbrowser
import pandas as pd
import cv2
import numpy as np
from pytesseract import Output
import imutils
import string 
from thefuzz import fuzz
from Levenshtein import distance, ratio
from numba import njit
import mss
from multiprocessing import Process
import threading
from datetime import datetime
import time
from thefuzz import fuzz


def clean_word(word):
    for char in word:
        if char in string.punctuation:
            word = word.replace(char, "")
            
    for char in word:
        if not char.isalnum():
            word = word.replace(char, "")
            
    return word.strip().lower()


def clean_df(df):
    df['text'] = df['text'].apply(lambda x: clean_word(x))
    
    for i, row in df.iterrows():
        if row['text'] == "":
            df.at[i, 'text'] = np.nan
            
    df.dropna(inplace=True)
    df.reset_index(inplace=True)
    
    #pad dataframe for later use
    for _ in range(3):
        df = df.append(pd.Series("endofdataframe", index=df.columns), ignore_index=True)
        
    return df



@njit
def check_image_background(img):
    """Checks if a screenshots background is mainly white
        Will not invert if it is mainly white, changing a
        white photo to black will reduce performance because in
        docs it says algo performs best on white backgrounds"""
    score = 0
    PIXEL_MIN = 240
    SCORE_THRESHOLD = 0.7


    height, width, _ = img.shape
    
    for i in range(height):
        for j in range(width):
            if img[i, j][0] >= PIXEL_MIN and img[i, j][1] >=  PIXEL_MIN and img[i, j][2] >= PIXEL_MIN:
                score = score + 1
                
    if (score / (height*width)) < SCORE_THRESHOLD:
        return 1
    
    return 0


def preprocesser(file_path, opposite=False):
    """Allows for different preprocessing techniques to be added
    onto our input image to improve tesseract"""
    
    
    base_image = cv2.imread(file_path)
    
    #Current process inverts mainly black screenshots and coverts to grayscale
    #Same process for white but no inverting (try binarization for white)
    
    #Cant think of a better solution for the control flow of the opposite -> will think of one late
    if opposite is False:
        #For photos with mainly Black background
        if check_image_background(base_image):
            inverted_image = cv2.bitwise_not(base_image)
            gray_image = cv2.cvtColor(inverted_image, cv2.COLOR_BGR2GRAY)
            binarized_image = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)

            return gray_image


        #For photos with mainly White background
        else:
            gray_image = cv2.cvtColor(base_image, cv2.COLOR_BGR2GRAY)
            binarized_image_mean = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,11,2) #Solid but not great
            binarized_image_gauss = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) #Better

            #Both are actually good must test more and maybe adjust so the colors blue and orange get changed wht & nt blk

            return gray_image
        
        
    else:
        if not check_image_background(base_image):
            inverted_image = cv2.bitwise_not(base_image)
            gray_image = cv2.cvtColor(inverted_image, cv2.COLOR_BGR2GRAY)
            binarized_image = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)

            return gray_image

        else:
            gray_image = cv2.cvtColor(base_image, cv2.COLOR_BGR2GRAY)
            binarized_image_mean = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,11,2) #Solid but not great
            binarized_image_gauss = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) #Better
            
            return gray_image
        
        
#New program works up to this point

        
#Multithread the process to speed up the image extraction        
def process_image(file_path, df_list, alternative=False, show_image=False):
    psm_version = "--psm 11"
    
    preprocessed_image = preprocesser(file_path, alternative)
    
    image_df = pd.DataFrame(pytesseract.image_to_data(preprocessed_image, output_type=Output.DICT, config=psm_version))
    
    time.sleep(0.1)

    image_df = clean_df(image_df)
    
    df_list.append(image_df) #Add to a list because idk how to return when multithreading
    
    if show_image is True:
        n_boxes = len(image_df['level'])
        for i in range(n_boxes):
            (x, y, w, h) = (image_df['left'][i], image_df['top'][i], image_df['width'][i], image_df['height'][i])
            cv2.rectangle(preprocessed_image, (x, y), (x + w, y + h), (0,0,0), 2)

        cv2.imshow('img', preprocessed_image)
        cv2.waitKey(0)
        
    return

    
#Pretty sure it works up to here


def multithread_image_processing(folder_path, dfs):
    image_paths = get_images_paths(folder_path)
    
    for path in image_paths:
        monitor_path = folder_path + "\\" + path 
        
        threading.Thread(target=process_image, args=(monitor_path, dfs, False,)).start()
        threading.Thread(target=process_image, args=(monitor_path, dfs, True,)).start()
        
    return

#start refactoring here


#Now extract lines or words from the dataframes
def extract_text_lines(image_df):
    df = image_df.iloc[:-3]
    df = df[df.conf != -1]

    df["conf"] = pd.to_numeric(df["conf"], downcast="float")

    #Apply only works for single columns
    for column in ["left", "top", "height", "width"]:
        df[column] = pd.to_numeric(df[column], downcast="float")
    
    
    lines = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['text'].apply(lambda x: ' '.join(list(x))).tolist()
    #Ifuckging hate pandas so much its never wants to let you do things easily
    
    left_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['left'].mean().round().tolist()
    top_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['top'].mean().round().tolist()
    height_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['height'].mean().round().tolist()
    width_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['width'].mean().round().tolist()
    
    confs = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['conf'].mean().tolist()

    
    return list(zip(lines, left_coors, top_coors, height_coors, width_coors))


def phrase_matching(list_of_phrases, target_phrase):
    most_similar_phrase = ""
    highest_fuzz = 0
    similar_row = ()
    
    for row in list_of_phrases:
        current_ratio = fuzz.ratio(row[0].lower(),target_phrase.lower())
        
        if current_ratio > highest_fuzz:
            most_similar_phrase = row[0]
            highest_fuzz = current_ratio
            similar_row = row
            
    print(most_similar_phrase)
    return similar_row, highest_fuzz


def best_match(phrases):
    max_fuzz = 0
    coors = 0
    
    for match in phrases:
        if match[-1] > max_fuzz:
            max_fuzz = match[-1]
            coors = match[0][1:]
            
    return coors


def process_coordinates(coors):
    """Returns coors into a clickable item for pyautogui"""
    x = coors[0] + (coors[-1] // 1.5)
    y = coors[1] + (coors[-2] // 1.5)
    
    return x,y


def get_images_paths(folder_path):
    image_paths = []
    for file in os.listdir(folder_path):
        if ".png" in file:
            image_paths.append(file)
        
    return image_paths



def phrase_coordinates(target, folder_path):
    current_dir = os.getcwd()
    path_to_tesseract = r"C:\Users\sbuca\Documents\pierre\autokm\Tesseract-OCR\tesseract.exe" #will fail in jupyter folder

    pytesseract.tesseract_cmd = path_to_tesseract
    
    
    target = target.lower()
    best_phrases = []
    dfs = []
    
    x1 = datetime.now()
    multithread_image_processing(folder_path, dfs)
    
    while len(dfs) < (len(get_images_paths(folder_path))*2):
        time.sleep(0.05)
    print(datetime.now()-x1)
    
    for df in dfs:
        phrases = extract_text_lines(df)
        best_phrase = phrase_matching(phrases, target)
        best_phrases.append(best_phrase)
           
    final_match = best_match(best_phrases)
    
    return process_coordinates(final_match)
        
#Will need to refactor quitte abit -> conver the zip list into a dictionary or something 

In [None]:
if __name__ == "__main__":
    text_coordinates(image_path, text)
    process_image(image_path)
    process_image2(image_path)
    locate_text(df,text)
    find_best_location(r1, r2)

In [2]:
f_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots"
x = phrase_coordinates("songs that hit like",f_path)

0:00:01.884931
spotifyi
spotifyi
songs that hit like clarity
songs that hit like clarity


In [12]:
x1 = datetime.now()
x = phrase_coordinates("songs that hit like",f_path)
print(datetime.now()-x1)

spotifyi
spotifyi
songs that hit like clarity
songs that hit like clarity
0:00:02.591290


In [14]:
get_images_paths(f_path)

['monitor1.png']

In [1]:
f_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots"
# pdgkp = []
# multithread_image_processing2(f_path, pdgkp)

In [8]:
pdgkp

[              index           level        page_num       block_num  \
 0                 8               5               1               2   
 1                13               5               1               3   
 2                18               5               1               4   
 3                22               5               1               5   
 4                26               5               1               6   
 ..              ...             ...             ...             ...   
 216             725               5               1             156   
 217             729               5               1             157   
 218  endofdataframe  endofdataframe  endofdataframe  endofdataframe   
 219  endofdataframe  endofdataframe  endofdataframe  endofdataframe   
 220  endofdataframe  endofdataframe  endofdataframe  endofdataframe   
 
             par_num        line_num        word_num            left  \
 0                 1               1               1          

In [65]:
image_paths = get_images_paths(r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots")

i=0
for path in image_paths:
    monitor_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots\\" + path
    print(monitor_path)
    i+=1

C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots\\monitor1.png
C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots\\monitor2.png


In [None]:
folder_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots"
phrase_coordinates("songs that hit like", folder_path)

In [60]:
for file in os.listdir(r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots"):
    print(file)

.ipynb_checkpoints
monitor1.png
monitor2.png


In [49]:
pppp = []
xsslolo=[]
xsslolo =  multithread_image_processing("", pppp)

In [8]:
x1 = datetime.now() 
phrase_coordinates("songs that hit like",0, 0)
print(datetime.now() - x1)

songs that hit like clarity
songs that hit like clarity
songs that hit like clarity
songs that hit like clarity
0:00:02.676638


In [212]:
qqq

(103.0, 627.0)

In [196]:
lines = qq[0].groupby(['page_num', 'block_num', 'par_num', 'line_num'])['text'].apply(lambda x: ' '.join(list(x))).tolist()

In [203]:
qq

[              index           level        page_num       block_num  \
 0                 8               5               1               2   
 1                13               5               1               3   
 2                18               5               1               4   
 3                22               5               1               5   
 4                26               5               1               6   
 ..              ...             ...             ...             ...   
 216             725               5               1             156   
 217             729               5               1             157   
 218  endofdataframe  endofdataframe  endofdataframe  endofdataframe   
 219  endofdataframe  endofdataframe  endofdataframe  endofdataframe   
 220  endofdataframe  endofdataframe  endofdataframe  endofdataframe   
 
             par_num        line_num        word_num            left  \
 0                 1               1               1          

In [190]:
monitor1_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots\monitor1.png"
monitor2_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots\monitor2.png"

x1 = (datetime.now())
print(x1)
xxvb = multithread_image_processing2(monitor1_path,monitor2_path)
print(datetime.now()-x1)

2023-01-07 12:46:46.509957
0:00:02.291216


In [191]:
xxvb

[              index           level        page_num       block_num  \
 0                 8               5               1               2   
 1                13               5               1               3   
 2                18               5               1               4   
 3                22               5               1               5   
 4                26               5               1               6   
 ..              ...             ...             ...             ...   
 216             725               5               1             156   
 217             729               5               1             157   
 218  endofdataframe  endofdataframe  endofdataframe  endofdataframe   
 219  endofdataframe  endofdataframe  endofdataframe  endofdataframe   
 220  endofdataframe  endofdataframe  endofdataframe  endofdataframe   
 
             par_num        line_num        word_num            left  \
 0                 1               1               1          

In [172]:
x1 = (datetime.now())
print(x1)
xxvb = multithread_image_processing("")
print(datetime.now()-x1)

2023-01-06 23:08:55.823972
0:00:02.028067


In [176]:
phrase_coordinates(xxvb, "We Up 3")

pe to
pe to
we up 3
we up 3
(47.0, 587.0, 11.0, 14.0)


(56.0, 594.0)

In [230]:
pyautogui.moveTo(103.0, 627.0)

In [139]:
(('bcvrmaaort', 590.0, 1044.0, 31.0, 364.0, 0.0), 39)[0][1:]

(590.0, 1044.0, 31.0, 364.0, 0.0)

In [3]:
import mss
import cv2
import numpy as np

with mss.mss() as sct:
    
    # Get information of monitor 2
    monitor_number = 2
    mon = sct.monitors[monitor_number]

    # The screen part to capture
    monitor = {
        "top": mon["top"],
        "left": mon["left"],
        "width": mon["width"],
        "height": mon["height"],
        "mon": monitor_number,
    }
    output = "sct-mon{mon}_{top}x{left}_{width}x{height}.png".format(**monitor)

    # Grab the data
    sct_img = sct.grab(monitor)
    
    
#     mss.tools.to_png(sct_img.rgb, sct_img.size, ou
#                      tput=r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots\monitor2.png")

In [6]:
for i in range(1, len(sct.monitors)):
    print(i)

1
2


In [6]:
monitor1_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots\monitor1.png"
monitor2_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots\monitor2.png"

In [30]:
from datetime import datetime

current_dir = os.getcwd()
path_to_tesseract = r"C:\Users\sbuca\Documents\pierre\autokm\Tesseract-OCR\tesseract.exe" #will fail in jupyter folder

pytesseract.tesseract_cmd = path_to_tesseract

process_image(file_path, alternative=False)

0:00:00.186502
0:00:01.461094


Unnamed: 0,index,level,page_num,block_num,par_num,line_num,word_num,left,top,width,height,conf,text
0,4,5,1,1,1,1,1,1728,17,41,8,73,seabass
1,9,5,1,2,1,1,1,1873,17,6,5,70,a
2,10,5,1,2,1,1,2,1892,16,10,9,70,x
3,15,5,1,3,1,1,2,1605,9,70,24,2,el
4,19,5,1,4,1,1,1,81,46,70,15,96,store
...,...,...,...,...,...,...,...,...,...,...,...,...,...
334,926,5,1,182,1,1,1,1356,1057,18,18,41,ss
335,930,5,1,183,1,1,1,1829,1060,48,10,76,162023
336,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe
337,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe,endofdataframe


In [16]:
file_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots\spotify_screenshot.png"
preprocessed_image = preprocesser(file_path, opposite=True)

In [15]:
cv2.imshow('img', preprocessed_image)
cv2.waitKey(0)

-1

In [12]:
preprocessed_image

array([[209, 210, 210, ..., 230, 230, 230],
       [209, 223, 222, ..., 226, 226, 230],
       [209, 223, 223, ..., 226, 226, 230],
       ...,
       [222, 223, 223, ..., 223, 223, 221],
       [222, 223, 223, ..., 224, 223, 224],
       [222, 221, 222, ..., 222, 222, 223]], dtype=uint8)

In [25]:
from PIL import Image
from pytesseract import pytesseract
import pyautogui
import os
import webbrowser
import pandas as pd
import cv2
import numpy as np
from pytesseract import Output
import imutils
import string 
from thefuzz import fuzz
from Levenshtein import distance, ratio
from numba import njit
import mss
from multiprocessing import Process
import threading
from datetime import datetime
import time
from thefuzz import fuzz


def clean_word(word):
    for char in word:
        if char in string.punctuation:
            word = word.replace(char, "")
            
    for char in word:
        if not char.isalnum():
            word = word.replace(char, "")
            
    return word.strip().lower()


def clean_df(df):
    df['text'] = df['text'].apply(lambda x: clean_word(x))
    
    for i, row in df.iterrows():
        if row['text'] == "":
            df.at[i, 'text'] = np.nan
            
    df.dropna(inplace=True)
    df.reset_index(inplace=True)
    
    #pad dataframe for later use
    for _ in range(3):
        df = df.append(pd.Series("endofdataframe", index=df.columns), ignore_index=True)
        
    return df



@njit
def check_image_background(img):
    """Checks if a screenshots background is mainly white
        Will not invert if it is mainly white, changing a
        white photo to black will reduce performance because in
        docs it says algo performs best on white backgrounds"""
    score = 0
    PIXEL_MIN = 240
    SCORE_THRESHOLD = 0.7


    height, width, _ = img.shape
    
    for i in range(height):
        for j in range(width):
            if img[i, j][0] >= PIXEL_MIN and img[i, j][1] >=  PIXEL_MIN and img[i, j][2] >= PIXEL_MIN:
                score = score + 1
                
    if (score / (height*width)) < SCORE_THRESHOLD:
        return 1
    
    return 0


def preprocesser(file_path, opposite=False):
    """Allows for different preprocessing techniques to be added
    onto our input image to improve tesseract"""
    
    
    base_image = cv2.imread(file_path)
    
    #Current process inverts mainly black screenshots and coverts to grayscale
    #Same process for white but no inverting (try binarization for white)
    
    #Cant think of a better solution for the control flow of the opposite -> will think of one late
    if opposite is False:
        #For photos with mainly Black background
        if check_image_background(base_image):
            inverted_image = cv2.bitwise_not(base_image)
            gray_image = cv2.cvtColor(inverted_image, cv2.COLOR_BGR2GRAY)
            binarized_image = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)

            return gray_image


        #For photos with mainly White background
        else:
            gray_image = cv2.cvtColor(base_image, cv2.COLOR_BGR2GRAY)
            binarized_image_mean = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,11,2) #Solid but not great
            binarized_image_gauss = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) #Better

            #Both are actually good must test more and maybe adjust so the colors blue and orange get changed wht & nt blk

            return gray_image
        
        
    else:
        if not check_image_background(base_image):
            inverted_image = cv2.bitwise_not(base_image)
            gray_image = cv2.cvtColor(inverted_image, cv2.COLOR_BGR2GRAY)
            binarized_image = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)

            return gray_image

        else:
            gray_image = cv2.cvtColor(base_image, cv2.COLOR_BGR2GRAY)
            binarized_image_mean = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,11,2) #Solid but not great
            binarized_image_gauss = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) #Better
            
            return gray_image
        
        
#New program works up to this point

        
#Multithread the process to speed up the image extraction        
def process_image(file_path, df_list, alternative=False, show_image=False):
    psm_version = "--psm 11"
    
    preprocessed_image = preprocesser(file_path, alternative)
    
    image_df = pd.DataFrame(pytesseract.image_to_data(preprocessed_image, output_type=Output.DICT, config=psm_version))
    
    time.sleep(0.1)

    image_df = clean_df(image_df)
    
    df_list.append(image_df) #Add to a list because idk how to return when multithreading
    
    time.sleep(3)
    
    if show_image is True:
        image_df = image_df[:-3]
        
        n_boxes = len(image_df['level'])
        for i in range(n_boxes):
            (x, y, w, h) = (int(image_df['left'][i]), int(image_df['top'][i]), int(image_df['width'][i]), int(image_df['height'][i]))
            cv2.rectangle(preprocessed_image, (x, y), (x + w, y + h), (0,0,0), 2)

        cv2.imshow('img', preprocessed_image)
        cv2.waitKey(0)
        
    return

    
#Pretty sure it works up to here


def multithread_image_processing(folder_path, dfs):
    image_paths = get_images_paths(folder_path)
    
    for path in image_paths:
        monitor_path = folder_path + "\\" + path 
        
        threading.Thread(target=process_image, args=(monitor_path, dfs, False,)).start()
        threading.Thread(target=process_image, args=(monitor_path, dfs, True,)).start()
        
    return

#start refactoring here


#Now extract lines or words from the dataframes
def extract_text_lines(image_df):
    df = image_df.iloc[:-3]
    df = df[df.conf != -1]

    df["conf"] = pd.to_numeric(df["conf"], downcast="float")

    #Apply only works for single columns
    for column in ["left", "top", "height", "width"]:
        df[column] = pd.to_numeric(df[column], downcast="float")
    
    
    lines = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['text'].apply(lambda x: ' '.join(list(x))).tolist()
    #Ifuckging hate pandas so much its never wants to let you do things easily
    
    left_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['left'].mean().round().tolist()
    top_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['top'].mean().round().tolist()
    height_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['height'].mean().round().tolist()
    width_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['width'].mean().round().tolist()
    
    confs = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['conf'].mean().tolist()

    
    return list(zip(lines, left_coors, top_coors, height_coors, width_coors))







def phrase_matching(list_of_phrases, target_phrase):
    most_similar_phrase = ""
    highest_fuzz = 0
    similar_row = ()
    
    for row in list_of_phrases:
        current_ratio = fuzz.ratio(row[0].lower(),target_phrase.lower())
        
        if current_ratio > highest_fuzz:
            most_similar_phrase = row[0]
            highest_fuzz = current_ratio
            similar_row = row
            
    #print(most_similar_phrase, highest_fuzz)
    return similar_row, highest_fuzz


def best_match(phrases):
    max_fuzz = 0
    coors = 0
    
    for match in phrases:
        if match[-1] > max_fuzz:
            max_fuzz = match[-1]
            coors = match[0][1:]
            
    return coors, match[-1]


def process_phrase_coordinates(coors):
    """Returns coors into a clickable item for pyautogui"""
    x = int(coors[0]) + (int(coors[-1]) // 1.5)
    y = int(coors[1]) + (int(coors[-2]) // 1.5)
    
    return x,y


def process_text_coordinates(text_row):
    x = text_row['left'] + (text_row['width'] // 1.5)
    y = text_row['top'] + (text_row['height'] // 1.5)
    
    return x, y


def get_images_paths(folder_path):
    image_paths = []
    for file in os.listdir(folder_path):
        if ".png" in file:
            image_paths.append(file)
        
    return image_paths


#Now take image data and find worddef locate_text(df, text):
def locate_text(df, text):
    results = []

    stripped_text = "".join(text.lower().split())
    split_text = text.split()
    
    for i, row in df.iterrows():
        word = row['text'].lower()

        second_word = df.iloc[i+1]['text'].lower()

        initial_fuzz = fuzz.ratio(word, stripped_text)
        second_fuzz = fuzz.ratio((word+second_word), stripped_text)
        
        if word == "endofdataframe":
            break
            
        if initial_fuzz >= 80:
            if initial_fuzz < second_fuzz:
                results.append([(word+second_word), process_text_coordinates(row),second_fuzz])
                
            else:
                results.append([word, process_text_coordinates(row), initial_fuzz])


        if second_fuzz >= 80 and initial_fuzz < 80:
            results.append([(word+second_word), process_text_coordinates(row),second_fuzz])
                
                
        else:
            if word in split_text[0] or split_text[0] in word: #may need to do partial_ratio(split_text[0], word)
                word = word
                second_word = word + df.iloc[i+1]['text'].lower()
                
                r1 = fuzz.ratio(word, stripped_text)
                r2 = fuzz.ratio(second_word, stripped_text)
                
                j = i + 2 #sets j to current word in df
                count = 0
                
                while r2 >= r1:
                    word = second_word
                    second_word = word + df.iloc[j]['text'].lower()
                    
                    r1 = fuzz.ratio(word, stripped_text)
                    r2 = fuzz.ratio(second_word, stripped_text)
                    
                    if r2 >= 80:
                        results.append([second_word, process_text_coordinates(row), r2])
                    
                    word = second_word
                    #print(word)
                
                    count += 1
                    j+=1
                
    return results


def find_best_location(results):
    highest_fuzz = 0
    for location in results:
        highest_fuzz = max(highest_fuzz, location[2])

    for location in results:
        if location[2] == highest_fuzz:
            return location


def phrase_coordinates(target, folder_path):
    current_dir = os.getcwd()
    path_to_tesseract = r"C:\Users\sbuca\Documents\pierre\autokm\Tesseract-OCR\tesseract.exe" #will fail in jupyter folder

    pytesseract.tesseract_cmd = path_to_tesseract
    
    target = target.lower()
    best_phrases = []
    best_text = []
    dfs = []

    multithread_image_processing(folder_path, dfs)
    
    while len(dfs) < (len(get_images_paths(folder_path))*2):
        time.sleep(0.05)
    
    for df in dfs:
        phrases = extract_text_lines(df)
        best_phrase = phrase_matching(phrases, target)
        best_phrases.append(best_phrase)
        
        text_results = locate_text(df, target)
        best_text_result = find_best_location(text_results)
        
        if best_text_result:
            best_text.append(best_text_result)
           
    final_match, fuzz = best_match(best_phrases)
    text_final_match = find_best_location(best_text)
    
    print("BP: ", fuzz)
    print("\n")
    print("BT: ", text_final_match)
    
    if fuzz > text_final_match[-1]:
        return process_phrase_coordinates(final_match)
    
    else:
        return text_final_match[1]


#Maybe toss in text coordinates just as another check

        
#Will need to refactor quitte abit -> conver the zip list into a dictionary or something 
# if __name__ == "__main__":
#     phrase_coordinates(target, folder_path)
#     process_image(file_path, df_list, alternative=False, show_image=False)

In [26]:
xxppl = []

f_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots"

multithread_image_processing(f_path, xxppl)

In [27]:
extract_text_lines(df)

[('e', 52.0, 97.0, 6.0, 7.0),
 ('me', 48.0, 104.0, 6.0, 13.0),
 ('s', 88.0, 137.0, 9.0, 12.0),
 ('ae', 55.0, 169.0, 3.0, 16.0),
 ('ss', 157.0, 170.0, 4.0, 12.0),
 ('a', 117.0, 183.0, 5.0, 16.0),
 ('o', 1163.0, 176.0, 10.0, 10.0),
 ('a', 87.0, 187.0, 9.0, 12.0),
 ('pierre', 316.0, 189.0, 9.0, 31.0),
 ('y', 51.0, 226.0, 7.0, 6.0),
 ('ow', 293.0, 225.0, 15.0, 32.0),
 ('iy', 636.0, 229.0, 8.0, 8.0),
 ('n', 684.0, 227.0, 12.0, 10.0),
 ('view', 794.0, 231.0, 7.0, 24.0),
 ('es', 133.0, 297.0, 3.0, 13.0),
 ('uments', 478.0, 284.0, 8.0, 37.0),
 ('iene', 550.0, 286.0, 6.0, 19.0),
 ('arch pierre', 1058.0, 284.0, 9.0, 24.0),
 ('ou', 306.0, 327.0, 9.0, 29.0),
 ('me', 348.0, 330.0, 6.0, 12.0),
 ('nan', 463.0, 324.0, 9.0, 17.0),
 ('d', 711.0, 324.0, 9.0, 6.0),
 ('ified', 756.0, 324.0, 9.0, 28.0),
 ('y onedri', 306.0, 360.0, 7.0, 21.0),
 ('p', 388.0, 359.0, 9.0, 5.0),
 ('ipynbcheckpoints', 474.0, 356.0, 12.0, 97.0),
 ('2', 745.0, 364.0, 1.0, 4.0),
 ('1032 pm', 790.0, 351.0, 18.0, 18.0),
 ('file folder

In [20]:
df = xxppl[0]

In [13]:
f_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots"

phrase_coordinates("Last Checkpoint", f_path)

C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots\monitor1.png
C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots\monitor2.png
BP:  45


BT:  None


TypeError: 'NoneType' object is not subscriptable

In [15]:
"1".isdigit()

True

In [9]:
from PIL import Image
from pytesseract import pytesseract
import pyautogui
import os
import webbrowser
import pandas as pd
import cv2
import numpy as np
from pytesseract import Output
import imutils
import string 
from thefuzz import fuzz
from Levenshtein import distance, ratio
from numba import njit
import mss
from multiprocessing import Process
import threading
from datetime import datetime
import time
from thefuzz import fuzz
from screeninfo import get_monitors


def clean_word(word):
    for char in word:
        if char in string.punctuation:
            word = word.replace(char, "")
            
    for char in word:
        if not char.isalnum():
            word = word.replace(char, "")
            
    return word.strip().lower()


def clean_df(df):
    df['text'] = df['text'].apply(lambda x: clean_word(x))
    
    for i, row in df.iterrows():
        if row['text'] == "":
            df.at[i, 'text'] = np.nan
            
    df.dropna(inplace=True)
    df.reset_index(inplace=True)
    
    #pad dataframe for later use
    for _ in range(3):
        df = df.append(pd.Series("endofdataframe", index=df.columns), ignore_index=True)
        
    return df



@njit
def check_image_background(img):
    """Checks if a screenshots background is mainly white
        Will not invert if it is mainly white, changing a
        white photo to black will reduce performance because in
        docs it says algo performs best on white backgrounds"""
    score = 0
    PIXEL_MIN = 240
    SCORE_THRESHOLD = 0.7


    height, width, _ = img.shape
    
    for i in range(height):
        for j in range(width):
            if img[i, j][0] >= PIXEL_MIN and img[i, j][1] >=  PIXEL_MIN and img[i, j][2] >= PIXEL_MIN:
                score = score + 1
                
    if (score / (height*width)) < SCORE_THRESHOLD:
        return 1
    
    return 0


def preprocesser(file_path, opposite=False):
    """Allows for different preprocessing techniques to be added
    onto our input image to improve tesseract"""
    
    
    base_image = cv2.imread(file_path)
    
    #Current process inverts mainly black screenshots and coverts to grayscale
    #Same process for white but no inverting (try binarization for white)
    
    #Cant think of a better solution for the control flow of the opposite -> will think of one late
    if opposite is False:
        #For photos with mainly Black background
        if check_image_background(base_image):
            inverted_image = cv2.bitwise_not(base_image)
            gray_image = cv2.cvtColor(inverted_image, cv2.COLOR_BGR2GRAY)
            binarized_image = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)

            return gray_image


        #For photos with mainly White background
        else:
            gray_image = cv2.cvtColor(base_image, cv2.COLOR_BGR2GRAY)
            binarized_image_mean = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,11,2) #Solid but not great
            binarized_image_gauss = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) #Better

            #Both are actually good must test more and maybe adjust so the colors blue and orange get changed wht & nt blk

            return gray_image
        
        
    else:
        if not check_image_background(base_image):
            inverted_image = cv2.bitwise_not(base_image)
            gray_image = cv2.cvtColor(inverted_image, cv2.COLOR_BGR2GRAY)
            binarized_image = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)

            return gray_image

        else:
            gray_image = cv2.cvtColor(base_image, cv2.COLOR_BGR2GRAY)
            binarized_image_mean = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,11,2) #Solid but not great
            binarized_image_gauss = cv2.adaptiveThreshold(gray_image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) #Better
            
            return gray_image
        
        
#New program works up to this point

        
#Multithread the process to speed up the image extraction        
def process_image(file_path, df_list, monitor_number, alternative=False, show_image=False):
    psm_version = "--psm 11"
    
    preprocessed_image = preprocesser(file_path, alternative)
    
    image_df = pd.DataFrame(pytesseract.image_to_data(preprocessed_image, output_type=Output.DICT, config=psm_version))
    
    time.sleep(0.1)

    image_df = clean_df(image_df)
    
    df_list.append((image_df, monitor_number)) #Add to a list because idk how to return when multithreading
    
    time.sleep(3)
    
    if show_image is True:
        image_df = image_df[:-3]
        
        n_boxes = len(image_df['level'])
        for i in range(n_boxes):
            (x, y, w, h) = (int(image_df['left'][i]), int(image_df['top'][i]), int(image_df['width'][i]), int(image_df['height'][i]))
            cv2.rectangle(preprocessed_image, (x, y), (x + w, y + h), (0,0,0), 2)

        cv2.imshow('img', preprocessed_image)
        cv2.waitKey(0)
        
    return

    
#Pretty sure it works up to here
def get_moitor_number(montior_string):
    for char in montior_string:
        if char.isdigit():
            return char



def multithread_image_processing(folder_path, dfs):
    image_paths = get_images_paths(folder_path)
    
    for path in image_paths:
        monitor_path = folder_path + "\\" + path 
        monitor_number = get_moitor_number(path)
        
        threading.Thread(target=process_image, args=(monitor_path, dfs, monitor_number, False,)).start()
        threading.Thread(target=process_image, args=(monitor_path, dfs, monitor_number, True,)).start()
        
    return

#start refactoring here


#Now extract lines or words from the dataframes
def extract_text_lines(image_df):
    df = image_df.iloc[:-3]
    df = df[df.conf != -1]

    df["conf"] = pd.to_numeric(df["conf"], downcast="float")

    #Apply only works for single columns
    for column in ["left", "top", "height", "width"]:
        df[column] = pd.to_numeric(df[column], downcast="float")
    
    
    lines = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['text'].apply(lambda x: ' '.join(list(x))).tolist()
    #Ifuckging hate pandas so much its never wants to let you do things easily
    
    left_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['left'].mean().round().tolist()
    top_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['top'].mean().round().tolist()
    height_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['height'].mean().round().tolist()
    width_coors = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['width'].mean().round().tolist()
    
    confs = df.groupby(['page_num', 'block_num', 'par_num', 'line_num'])['conf'].mean().tolist()

    
    tuple_results = list(zip(lines, left_coors, top_coors, height_coors, width_coors))
    
    results = []
    for row in tuple_results:
        results.append([row[0], tuple(row[1:])])
        
    return results


def locate_text(df, text):
    results = []

    stripped_text = "".join(text.lower().split())
    split_text = text.split()
    
    for i, row in df.iterrows():
        word = row['text'].lower()

        second_word = df.iloc[i+1]['text'].lower()

        initial_fuzz = fuzz.ratio(word, stripped_text)
        second_fuzz = fuzz.ratio((word+second_word), stripped_text)
        
        if word == "endofdataframe":
            break
            
        if initial_fuzz >= 80:
            if initial_fuzz < second_fuzz:
                results.append([(word+second_word), (row["left"], row["top"], row["height"], row["width"]),second_fuzz])
                
            else:
                results.append([word, (row["left"], row["top"], row["height"], row["width"]), initial_fuzz])


        if second_fuzz >= 80 and initial_fuzz < 80:
            results.append([(word+second_word), (row["left"], row["top"], row["height"], row["width"]),second_fuzz])
                
                
        else:
            if word in split_text[0] or split_text[0] in word: #may need to do partial_ratio(split_text[0], word)
                word = word
                second_word = word + df.iloc[i+1]['text'].lower()
                
                r1 = fuzz.ratio(word, stripped_text)
                r2 = fuzz.ratio(second_word, stripped_text)
                
                j = i + 2 #sets j to current word in df
                count = 0
                
                while r2 >= r1:
                    word = second_word
                    second_word = word + df.iloc[j]['text'].lower()
                    
                    r1 = fuzz.ratio(word, stripped_text)
                    r2 = fuzz.ratio(second_word, stripped_text)
                    
                    if r2 >= 80:
                        results.append([second_word, (row["left"], row["top"], row["height"], row["width"]), r2])
                    
                    word = second_word
                    #print(word)
                
                    count += 1
                    j+=1
                
    return results


def find_best_phrases(phrase_data, text):
    results = []
    
    for i, row in enumerate(phrase_data):
        r = fuzz.ratio(row[0], text)
        phrase_data[i].append(r)
        
    for row in phrase_data:
        if row[-1] >= 70:
            results.append(row)
            
    return results


def get_images_paths(folder_path):
    image_paths = []
    for file in os.listdir(folder_path):
        if ".png" in file:
            image_paths.append(file)
        
    return image_paths


def process_textual_data(data):
    """Find the best match to target and return coors"""
    tmp_res = []
    max_fuzz = 0
    max_coors = 0
    monitor_loc = 1
    
    for item in data:
        monitor_num = item[-1]
        item_max_fuzz = 0
        item_max_coors = 0
        
        for match in item:
            if isinstance(match,str):
                print(match)
                continue
            
            if match[-1] > item_max_fuzz:
                item_max_fuzz = match[-1]
                item_max_coors = match[-2]
                
        if item_max_fuzz > max_fuzz:
            max_fuzz = item_max_fuzz
            max_coors = item_max_coors
            monitor_loc = monitor_num
        
    return process_coordinates(max_coors, monitor_loc)



def process_coordinates(coors, monitor_num):
    print(coors)
    x = coors[0] + (coors[-1] // 1.5)
    y = coors[1] + (coors[-2] // 1.5)
    
    #Check if monitor two and adjust coors
    if monitor_num == "2":
        monitor_data = get_monitors()[1]
        
        x += monitor_data.x + x
        
    return x, y
        

def get_coordinates(target, folder_path):
    #Setup PyTesseract
    current_dir = os.getcwd()
    path_to_tesseract = r"C:\Users\sbuca\Documents\pierre\autokm\Tesseract-OCR\tesseract.exe" #will fail in jupyter folder

    pytesseract.tesseract_cmd = path_to_tesseract
    
    target = target.lower()

    #Extract Raw Data From The Screen
    dfs = []
    multithread_image_processing(folder_path, dfs) 
    
    
    #Dynamic wait to let all threads finish
    while len(dfs) < (len(get_images_paths(folder_path))*2):
        time.sleep(0.05)
    

    #Finds all instances of our target (coors and monitor too)
    data = []
    for df, monitor_num in dfs:
        phrase_data = extract_text_lines(df)
        text_data = locate_text(df, target)

        #Get coor and fuzz data for the lines method
        best_phrases = find_best_phrases(phrase_data, target)

        results = best_phrases + text_data

        if results != []:
            results.append(monitor_num)
            data.append(results)
            
            
    coors = process_textual_data(data)
    return coors

In [10]:
get_coordinates("songs that hit like", r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots")

2
2
(24, 622, 10, 35)


(2014.0, 628.0)

In [32]:
if __name__ == "__main__":
    phrase_coordinates(target, folder_path)
    process_image(file_path, df_list, alternative=False, show_image=False)

Monitor(x=0, y=0, width=1920, height=1080, width_mm=579, height_mm=336, name='\\\\.\\DISPLAY1', is_primary=True)
Monitor(x=1920, y=0, width=1920, height=1080, width_mm=520, height_mm=320, name='\\\\.\\DISPLAY2', is_primary=False)


In [37]:
get_monitors()[1].y

0

In [41]:
xxppl = []
text = "songs that hit like"

f_path = r"C:\Users\sbuca\Documents\pierre\music_files\autogui_screenshots"

multithread_image_processing(f_path, xxppl) 

In [42]:
data = []

for df, monitor_num in xxppl:
    phrase_data = extract_text_lines(df)
    text_data = locate_text(df, text)
    
    #Get coor and fuzz data for the lines method
    best_phrases = find_best_phrases(phrase_data)
    
    results = best_phrases + text_data
    
    if results != []:
        results.append(monitor_num)
        data.append(results)


In [43]:
def process_textual_data(data):
    """Find the best match to target and return coors"""
    tmp_res = []
    max_fuzz = 0
    max_coors = 0
    monitor_loc = 1
    
    for item in data:
        monitor_num = item[-1]
        item_max_fuzz = 0
        item_max_coors = 0
        
        for match in item:
            if isinstance(match,str):
                print(match)
                continue
            
            if match[-1] > item_max_fuzz:
                item_max_fuzz = match[-1]
                item_max_coors = match[-2]
                
        if item_max_fuzz > max_fuzz:
            max_fuzz = item_max_fuzz
            max_coors = item_max_coors
            monitor_loc = monitor_num
        
    return process_coordinates(max_coors, monitor_loc)

In [44]:
process_textual_data(data)

2
2


(2014.0, 628.0)

In [28]:
data

[[['songs that hit like clarity', (86.0, 620.0, 11.0, 26.0), 83],
  ['songsthathit', (24, 622, 10, 35), 86],
  ['songsthathitlike', (24, 622, 10, 35), 100],
  ['songsthathitlikeclarity', (24, 622, 10, 35), 82],
  '1'],
 [['songs that hit like clarity', (86.0, 620.0, 11.0, 26.0), 83],
  ['songsthathit', (24, 622, 10, 35), 86],
  ['songsthathitlike', (24, 622, 10, 35), 100],
  ['songsthathitlikeclarity', (24, 622, 10, 35), 82],
  '1']]

In [45]:
pyautogui.moveTo(2014.0, 628.0)

In [70]:
pop = extract_text_lines(df)

In [64]:
pop

[['x', (1893.0, 9.0, 10.0, 10.0), 0],
 ['sbucarion', (1656.0, 26.0, 10.0, 66.0), 21],
 ['ed', (511.0, 11.0, 40.0, 364.0), 10],
 ['home', (64.0, 62.0, 10.0, 37.0), 17],
 ['artists', (641.0, 83.0, 10.0, 40.0), 31],
 ['playlists', (718.0, 83.0, 13.0, 50.0), 21],
 ['profiles', (805.0, 83.0, 10.0, 46.0), 30],
 ['podcasts shows', (926.0, 83.0, 10.0, 48.0), 30],
 ['audiobooks', (1039.0, 83.0, 10.0, 75.0), 21],
 ['search', (63.0, 102.0, 10.0, 45.0), 24],
 ['all aioums j songs', (502.0, 73.0, 30.0, 29.0), 27],
 ['your library', (80.0, 142.0, 12.0, 36.0), 26],
 ['5', (1287.0, 143.0, 7.0, 11.0), 0],
 ['0', (25.0, 137.0, 20.0, 20.0), 0],
 ['i', (1287.0, 151.0, 21.0, 11.0), 10],
 ['8888', (1521.0, 159.0, 34.0, 120.0), 0],
 ['3', (1788.0, 179.0, 19.0, 23.0), 0],
 ['9', (1736.0, 199.0, 11.0, 9.0), 0],
 ['ae', (1798.0, 200.0, 12.0, 22.0), 19],
 ['ap', (1813.0, 176.0, 31.0, 33.0), 10],
 ['create playlist', (88.0, 206.0, 12.0, 44.0), 29],
 ['si', (1767.0, 182.0, 31.0, 20.0), 19],
 ['888e', (1519.0, 205.

In [56]:
fuzz.ratio("songs that hit like", "your library")

26

In [73]:
ghp = locate_text(df, text)

In [59]:
hmf = [5,6,7,8]
hmf.append([1,2,3,4])

In [60]:
hmf

[5, 6, 7, 8, [1, 2, 3, 4]]

In [61]:
[1,2,3,4] + [5,6,7,8]

[1, 2, 3, 4, 5, 6, 7, 8]

In [71]:
xlm = best_phrases(pop)

In [75]:
xlm

[['songs that hit like clarity', (86.0, 620.0, 11.0, 26.0), 83]]

In [74]:
ghp

[['songsthathit', (24, 622, 10, 35), 86],
 ['songsthathitlike', (24, 622, 10, 35), 100],
 ['songsthathitlikeclarity', (24, 622, 10, 35), 82]]

In [76]:
ghp + xlm

[['songsthathit', (24, 622, 10, 35), 86],
 ['songsthathitlike', (24, 622, 10, 35), 100],
 ['songsthathitlikeclarity', (24, 622, 10, 35), 82],
 ['songs that hit like clarity', (86.0, 620.0, 11.0, 26.0), 83]]