In [None]:
import os
import time
import glob
import re
import numpy as np
import pandas as pd
import cv2
import pytesseract
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from datetime import datetime
from IPython.display import clear_output

# Load fruit classification model
model_fruit = load_model('fruit_classification_model1.h5')

# Define fruit categories
categories = ['freshapples', 'freshbanana', 'freshoranges', 
              'rottenapples', 'rottenbanana', 'rottenoranges']

# Set paths and configuration
image_dir = "captured_images"  # Directory to monitor for new images
CSV_FILE = 'product_details.csv'

# Columns for saving extracted details to CSV
columns = ['Brand', 'Product_Name', 'Net_Weight', 'MRP', 
           'Mfg_Date', 'Exp_Date', 'Category', 'Lot_No', 'Misc']

# Initialize or Load CSV
try:
    df = pd.read_csv(CSV_FILE)
except FileNotFoundError:
    df = pd.DataFrame(columns=columns)
    df.to_csv(CSV_FILE, index=False)

def preprocess_image(image):
    """Preprocess the image for better OCR."""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return thresh

def extract_details(text):
    """Extract product details from OCR output."""
    data = {col: '' for col in columns}  # Initialize with empty fields

    # Display the full extracted text
    print("\n--- Full Extracted Text ---\n", text)

    # Regex-based extraction
    net_weight = re.search(r'(Net\s*Weight|NET WT|Wt)\s*[:\-\s]*(\d+\s*(g|kg|ml|l))', text, re.IGNORECASE)
    mrp = re.search(r'MRP[\s:]*₹?(\d+\.\d{2})', text, re.IGNORECASE)
    exp_date = re.search(r'(Exp\.?|Expiry)\s*Date[:\-\s]*(\d{1,2}\s\w+\s\d{4})', text, re.IGNORECASE)
    mfg_date = re.search(r'(Mfg\.?|Manufacturing)\s*Date[:\-\s]*(\d{1,2}\s\w+\s\d{4})', text, re.IGNORECASE)

    if net_weight:
        data['Net_Weight'] = net_weight.group(2)
    if mrp:
        data['MRP'] = mrp.group(1)
    if exp_date:
        data['Exp_Date'] = exp_date.group(2)
        check_expiry(data['Exp_Date'])
    if mfg_date:
        data['Mfg_Date'] = mfg_date.group(2)

    lines = text.splitlines()
    if lines:
        data['Brand'] = lines[0].strip()
        data['Product_Name'] = " ".join(lines[1:2]).strip()

    # Display the extracted details
    print("\n--- Extracted Product Details ---")
    for key, value in data.items():
        print(f"{key}: {value}")

    return data

def check_expiry(expiry_date):
    """Check if the product has expired."""
    try:
        exp = datetime.strptime(expiry_date, "%d %b %Y")
        if exp < datetime.today():
            print("Warning: This product is expired!")
    except ValueError:
        print("Invalid expiry date format.")

def save_to_csv(data):
    """Save extracted details to CSV."""
    global df
    new_row = pd.DataFrame([data])
    df = pd.concat([df, new_row], ignore_index=True)
    df.to_csv(CSV_FILE, index=False)
    print("Data saved to CSV successfully!")

def load_image_for_prediction(img_path, target_size=(100, 100)):
    """Load and preprocess the image for fruit classification."""
    img = image.load_img(img_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0) / 255.0  # Normalize
    return img_array

def predict_fruit_category(model, img_path):
    """Make a prediction on the fruit image."""
    img_array = load_image_for_prediction(img_path)
    prediction = model.predict(img_array)
    predicted_label_index = np.argmax(prediction)
    predicted_label = categories[predicted_label_index]
    predicted_probability = prediction[0][predicted_label_index]
    return predicted_label, predicted_probability

def monitor_directory():
    """Continuously monitor the directory for new images."""
    print("Monitoring the directory for new images...")
    processed_files = set()  # Track already processed files

    while True:
        image_files = glob.glob(os.path.join(image_dir, "*.jpg"))

        for img_path in image_files:
            if img_path not in processed_files:
                print(f"Processing image: {img_path}")

                # Load the image
                img = cv2.imread(img_path)

                # Perform fruit classification
                result, probability = predict_fruit_category(model_fruit, img_path)

                # If classified as a fruit with high confidence
                if probability > 0.5:
                    print(f"The fruit is: {result} with confidence: {probability:.2f}")
                    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                    plt.title(f'Prediction: {result}, Confidence: {probability:.2f}')
                    plt.axis('off')
                    plt.show()
                else:
                    # Perform OCR extraction
                    processed_img = preprocess_image(img)
                    text = pytesseract.image_to_string(processed_img)
                    product_data = extract_details(text)
                    save_to_csv(product_data)

                # Mark file as processed
                processed_files.add(img_path)

                # Clear the output for better monitoring
                clear_output(wait=True)
                time.sleep(3)  # Delay before next iteration

        print("No new images found. Checking again...")
        time.sleep(1)

if __name__ == "__main__":
    monitor_directory()


In [4]:
import os
import time
import glob
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from pytesseract import image_to_string
from IPython.display import clear_output

# Initialize paths and load models
FRUIT_MODEL_PATH = 'fruit_classification_model1.h5'
CSV_FILE = "product_data.csv"
columns = ['Brand', 'Product_Name', 'Net_Weight', 'MRP', 'Mfg_Date', 'Exp_Date', 'Category', 'Lot_No', 'Misc']

model_fruit = load_model(FRUIT_MODEL_PATH)
categories = ['freshapples', 'freshbanana', 'freshoranges', 'rottenapples', 'rottenbanana', 'rottenoranges']

def load_image_for_prediction(img_path, target_size=(100, 100)):
    """Load and preprocess the image for prediction."""
    img = image.load_img(img_path, target_size=target_size)
    img_array = image.img_to_array(img) / 255.0
    return np.expand_dims(img_array, axis=0)

def predict_fruit_category(model, img_path):
    """Predict the category of the fruit."""
    img_array = load_image_for_prediction(img_path)
    prediction = model.predict(img_array)
    predicted_label_index = np.argmax(prediction)
    return categories[predicted_label_index], prediction[0][predicted_label_index]

def extract_text_from_image(img_path):
    """Extract text from an image using Tesseract."""
    img = cv2.imread(img_path)
    text = image_to_string(img)
    return text

def is_fruit_image(img_path, threshold=0.6):
    """Determine if the image is of a fruit based on the fruit model's confidence."""
    _, confidence = predict_fruit_category(model_fruit, img_path)
    return confidence > threshold

def save_data_to_csv(data, csv_file=CSV_FILE):
    """Save extracted data to a CSV file."""
    if os.path.exists(csv_file) and os.path.getsize(csv_file) > 0:
        df = pd.read_csv(csv_file)
    else:
        df = pd.DataFrame(columns=columns)
    df = df.append(data, ignore_index=True)
    df.to_csv(csv_file, index=False)

# Directory monitoring logic
image_dir = "captured_images"
print("Monitoring the directory for new images...")

while True:
    image_files = glob.glob(os.path.join(image_dir, "image-*.jpg"))

    if image_files:
        for img_path in image_files:
            print(f"Processing image: {img_path}")

            if is_fruit_image(img_path):
                # Fruit prediction logic
                result, probability = predict_fruit_category(model_fruit, img_path)
                print(f"The fruit is: {result} with confidence: {probability:.2f}")

                img = cv2.imread(img_path)
                plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                plt.title(f'Prediction: {result}, Confidence: {probability:.2f}')
                plt.axis('off')
                plt.show()

            else:
                # Label extraction logic
                text = extract_text_from_image(img_path)
                print("--- Extracted Text ---")
                print(text)

                # Save relevant data to CSV (update logic to extract meaningful data from text)
                data = {'Brand': '', 'Product_Name': '', 'Net_Weight': '', 'MRP': '',
                        'Mfg_Date': '', 'Exp_Date': '', 'Category': '', 'Lot_No': '', 'Misc': text}
                save_data_to_csv(data)

            # Clear output and delete processed image
            clear_output(wait=True)
            # os.remove(img_path)
            # print(f"Deleted image: {img_path}")
            time.sleep(3)

    else:
        print("No new images found. Checking again...")
        time.sleep(1)


KeyboardInterrupt: 