In [None]:
import tkinter as tk
from tkinter import filedialog
import cv2
import pytesseract
import pandas as pd
import re
import os

# Setting the path to the Tesseract executable
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

def process_image(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (3,3), 0)
    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
    invert = 255 - opening
    text = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')
    return text

def process_text(ocr_text):
    # Regex patterns to extract relevant data
    patterns = {
        "Time Summary": r"Time Summary\s*([\d\w\s]+?)(?=\n|$)",
        "Distance": r"Distance\s*([\d\.]+mile)",
        "Avg Speed": r"Avg Speed\s*([\d\.]+\s*[a-zA-Z]+)",
        "Max Altitude": r"Avg Altitude\s*([\d,]+)",
        "Avg Speed": r"Avg Speed\s*([\d\w\s:.]+?)(?=\n|$)",
        "Avg Altitude": r"Avg Altitude\s*([\d,]+)"
    }

    # Extracting and cleaning the data
    extracted_data = {}
    for key, pattern in patterns.items():
        # Search using the pattern
        match = re.search(pattern, ocr_text, re.IGNORECASE)
        if match:
            value = match.group(1).strip()
            if key == "Avg Speed":
                value = re.sub(r"^\d+\.\d+\s*", "", value)  # Remove unwanted prefix numbers
                value = value.replace("Sinpn", "mph").replace(" ", ".").replace("..", "")
            if key in ["Max Altitude", "Avg Altitude"]:
                value = value.replace(":", "").strip() + " ft"  # Correct format and OCR error
            extracted_data[key] = value

    # Further process for elevation gain
    pattern = r"Avg Altitude\s*([\d,]+)\s*:\s*([\d,]+)"
    match = re.search(pattern, ocr_text, re.IGNORECASE)
    if match:
        avg_altitude_first = match.group(1).replace(',', '').strip()
        avg_altitude_next = match.group(2).replace(',', '').strip()
        extracted_data['Avg Altitude'] = avg_altitude_next + " ft"
        extracted_data["Elevation Gain"] = str(int(avg_altitude_next.replace(" ft", "")) - int(avg_altitude_first.replace(" ft", ""))) + " ft"

    return extracted_data

# DataFrame to store all data cumulatively
cumulative_data_df = pd.DataFrame()

def upload_action():
    file_paths = filedialog.askopenfilenames()
    global cumulative_data_df
    for path in file_paths:
        text = process_image(path)
        data = process_text(text)
        data['Image'] = os.path.basename(path)
        cumulative_data_df = cumulative_data_df.append(data, ignore_index=True)
    
    result_text.delete('1.0', tk.END)  # Clear existing text
    result_text.insert(tk.END, cumulative_data_df.to_string(index=False))  # Display dataframe as string in text widget

def save_csv():
    save_path = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")])
    if save_path:
        global cumulative_data_df
        cumulative_data_df.to_csv(save_path, index=False)
        print(f"Data saved to {save_path}")
        cumulative_data_df = pd.DataFrame()  # Reset DataFrame after saving

root = tk.Tk()
root.title("EV_IMAGE_TEXT_Extractor")

# Button to upload images
upload_btn = tk.Button(root, text="Upload Images", command=upload_action)
upload_btn.pack()

# Button to save CSV
save_btn = tk.Button(root, text="Save CSV", command=save_csv)
save_btn.pack()

# Text widget to display results
result_text = tk.Text(root, height=20, width=80)
result_text.pack()

root.mainloop()
