# Setup and Installations

In [1]:
# Install required packages
!pip install google-cloud-vision google-generativeai opencv-python pillow

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Import Libraries

In [2]:
import cv2
import numpy as np
from google.cloud import vision
import google.generativeai as genai
import re
import time
import os
import json
import pandas as pd
from difflib import SequenceMatcher
from google.colab import files
import glob

# Bengali Pipeline Class

In [3]:
class BengaliAnalyzer:
    def __init__(self):
        self.vision_client = None
        self.gemini_model = None
        self.results_summary = []

    def setup_apis(self, credentials_path, gemini_api_key):
        """Setup Google Vision and Gemini APIs"""
        try:
            os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
            self.vision_client = vision.ImageAnnotatorClient()

            genai.configure(api_key=gemini_api_key)
            self.gemini_model = genai.GenerativeModel('gemini-1.5-flash')

            print("APIs setup complete!")
            return True
        except Exception as e:
            print(f"Setup failed: {e}")
            return False

    def preprocess_image(self, image_path):
        """Image preprocessing"""
        img = cv2.imread(image_path)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        denoised = cv2.fastNlMeansDenoising(gray)
        binary = cv2.adaptiveThreshold(denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

        _, encoded = cv2.imencode('.png', binary)
        return encoded.tobytes()

    def extract_text(self, image_bytes):
        """Extract text using Google Vision API"""
        try:
            image = vision.Image(content=image_bytes)
            response = self.vision_client.text_detection(image=image)

            if response.text_annotations:
                return response.text_annotations[0].description.strip()
            return ""
        except Exception as e:
            print(f"Vision API error: {e}")
            return ""

    def find_broken_words(self, text):
        """Find potentially broken words"""
        broken_words = []
        words = text.split()

        for word in words:
            has_bengali = bool(re.search(r'[\u0980-\u09FF]', word))
            has_english = bool(re.search(r'[a-zA-Z]', word))

            if (has_bengali and has_english) or (len(word) == 1 and has_bengali):
                broken_words.append(word)

        return broken_words

    def correct_text(self, original_text, broken_words):
        """Correct text using Gemini"""
        if not broken_words:
            return original_text

        try:
            prompt = f"""Fix OCR errors in this Bengali text:
"{original_text}"

Broken words: {', '.join(broken_words)}

Return only the corrected Bengali text:"""

            response = self.gemini_model.generate_content(prompt)
            corrected = response.text.strip()

            if len(corrected) > 0 and len(corrected) < len(original_text) * 2:
                return corrected
            return original_text

        except Exception as e:
            print(f"Gemini error: {e}")
            return original_text

    def calculate_metrics(self, extracted_text, ground_truth):
        """Calculate accuracy metrics"""
        if not ground_truth.strip():
            return 0.0, 0.0, 0, 0, 0, 0

        # Clean texts
        extracted_clean = re.sub(r'\s+', ' ', extracted_text.strip())
        ground_truth_clean = re.sub(r'\s+', ' ', ground_truth.strip())

        # Character accuracy
        char_accuracy = SequenceMatcher(None, extracted_clean, ground_truth_clean).ratio() * 100

        # Word accuracy
        extracted_words = extracted_clean.split()
        ground_truth_words = ground_truth_clean.split()

        correct_words = 0
        for word in ground_truth_words:
            if word in extracted_words:
                correct_words += 1

        word_accuracy = (correct_words / len(ground_truth_words)) * 100 if ground_truth_words else 0

        # Character counts
        correct_chars = int(len(ground_truth_clean) * char_accuracy / 100)
        total_chars = len(ground_truth_clean)

        return char_accuracy, word_accuracy, correct_words, len(ground_truth_words), correct_chars, total_chars



# Configuration and Setup

In [None]:
# Initialize analyzer
analyzer = BengaliAnalyzer()

# Upload credentials (run this once)
print("Upload your Google Cloud credentials JSON file:")
uploaded_creds = files.upload()
creds_filename = list(uploaded_creds.keys())[0]

# Enter API key
gemini_key = input("Enter your Gemini API key: ")

# Setup APIs
analyzer.setup_apis(creds_filename, gemini_key)

# Define paths
DRIVE_BASE_PATH = "/content/drive/MyDrive"  # Adjust if your drive structure is different
DATASETS_PATH = f"{DRIVE_BASE_PATH}/BT"  # Folder containing D1, D2, D3, D4, D5
OUTPUT_PATH = f"{DRIVE_BASE_PATH}/BT/REC_OUTPUTS"

# Create output directory
os.makedirs(OUTPUT_PATH, exist_ok=True)

print(f"Looking for datasets in: {DATASETS_PATH}")
print(f"Results will be saved to: {OUTPUT_PATH}")

# Dataset Processing Function

In [5]:
def process_dataset(dataset_name, dataset_path, analyzer):
    """Process a single dataset (D1, D2, etc.)"""

    print(f"\n{'='*60}")
    print(f"Processing Dataset: {dataset_name}")
    print(f"{'='*60}")

    # Define expected file names
    expected_files = {
        'clean': 'Clean_Handwriting.png',
        'fast': 'Fast_Writing.png',
        'ruled': 'Ruled_Paper.png',
        'ground_truth': 'Typed_GroundTruth.txt'
    }

    # Check if all required files exist
    missing_files = []
    file_paths = {}

    for file_type, filename in expected_files.items():
        file_path = os.path.join(dataset_path, filename)
        if os.path.exists(file_path):
            file_paths[file_type] = file_path
        else:
            missing_files.append(filename)

    if missing_files:
        print(f"Missing files in {dataset_name}: {', '.join(missing_files)}")
        return None

    # Read ground truth
    with open(file_paths['ground_truth'], 'r', encoding='utf-8') as f:
        ground_truth = f.read().strip()

    print(f"Ground truth loaded: {len(ground_truth)} characters")

    # Process each image type
    image_types = ['clean', 'fast', 'ruled']
    dataset_results = []

    for image_type in image_types:
        print(f"\nProcessing {image_type} handwriting...")

        # Process image
        image_bytes = analyzer.preprocess_image(file_paths[image_type])
        extracted_text = analyzer.extract_text(image_bytes)

        if extracted_text:
            broken_words = analyzer.find_broken_words(extracted_text)
            corrected_text = analyzer.correct_text(extracted_text, broken_words)
            corrected_text = re.sub(r'\s+', ' ', corrected_text).strip()
        else:
            corrected_text = ""

        # Calculate metrics
        char_acc, word_acc, correct_words, total_words, correct_chars, total_chars = \
            analyzer.calculate_metrics(corrected_text, ground_truth)

        # Save individual result
        output_filename = f"{dataset_name}_{image_type}_output.txt"
        output_filepath = os.path.join(OUTPUT_PATH, output_filename)

        with open(output_filepath, 'w', encoding='utf-8') as f:
            f.write(corrected_text)

        # Store results
        result = {
            'dataset': dataset_name,
            'image_type': image_type,
            'extracted_text': corrected_text,
            'char_accuracy': char_acc,
            'word_accuracy': word_acc,
            'correct_words': correct_words,
            'total_words': total_words,
            'correct_chars': correct_chars,
            'total_chars': total_chars,
            'output_file': output_filename
        }

        dataset_results.append(result)

        print(f"{image_type}: {char_acc:.1f}% char, {word_acc:.1f}% word accuracy")

        time.sleep(1)  # Rate limiting

    return dataset_results

# Process All Datasets

In [6]:
# Process all datasets
all_results = []

for i in range(1, 6):  # D1 to D5
    dataset_name = f"D{i}"
    dataset_path = os.path.join(DATASETS_PATH, dataset_name)

    if os.path.exists(dataset_path):
        results = process_dataset(dataset_name, dataset_path, analyzer)
        if results:
            all_results.extend(results)
    else:
        print(f"Dataset {dataset_name} not found at {dataset_path}")

print(f"\nProcessed {len(all_results)} image-dataset combinations")


Processing Dataset: D1
Ground truth loaded: 1106 characters

Processing clean handwriting...
clean: 48.9% char, 51.0% word accuracy

Processing fast handwriting...
fast: 29.9% char, 47.1% word accuracy

Processing ruled handwriting...
ruled: 62.2% char, 47.1% word accuracy

Processing Dataset: D2
Ground truth loaded: 756 characters

Processing clean handwriting...
clean: 3.6% char, 0.9% word accuracy

Processing fast handwriting...
fast: 4.6% char, 0.9% word accuracy

Processing ruled handwriting...
ruled: 5.5% char, 0.9% word accuracy

Processing Dataset: D3
Ground truth loaded: 692 characters

Processing clean handwriting...
clean: 3.6% char, 2.7% word accuracy

Processing fast handwriting...
fast: 3.3% char, 2.7% word accuracy

Processing ruled handwriting...
ruled: 3.1% char, 2.7% word accuracy

Processing Dataset: D4
Ground truth loaded: 674 characters

Processing clean handwriting...
clean: 69.6% char, 56.5% word accuracy

Processing fast handwriting...
fast: 57.3% char, 56.5% w

# Generate Comparison Table

In [7]:
def create_comparison_table(results):
    """Create comprehensive comparison table"""

    # Create DataFrame
    df = pd.DataFrame(results)

    if df.empty:
        print("No results to display")
        return None

    # Create detailed table
    detailed_table = df[['dataset', 'image_type', 'char_accuracy', 'word_accuracy',
                        'correct_words', 'total_words', 'correct_chars', 'total_chars']].copy()

    # Round percentages
    detailed_table['char_accuracy'] = detailed_table['char_accuracy'].round(1)
    detailed_table['word_accuracy'] = detailed_table['word_accuracy'].round(1)

    # Create summary by dataset
    summary_by_dataset = df.groupby('dataset').agg({
        'char_accuracy': 'mean',
        'word_accuracy': 'mean',
        'correct_words': 'sum',
        'total_words': 'sum',
        'correct_chars': 'sum',
        'total_chars': 'sum'
    }).round(1)

    # Create summary by image type
    summary_by_type = df.groupby('image_type').agg({
        'char_accuracy': 'mean',
        'word_accuracy': 'mean',
        'correct_words': 'sum',
        'total_words': 'sum',
        'correct_chars': 'sum',
        'total_chars': 'sum'
    }).round(1)

    # Overall summary
    overall_summary = {
        'total_datasets': df['dataset'].nunique(),
        'total_images': len(df),
        'avg_char_accuracy': df['char_accuracy'].mean(),
        'avg_word_accuracy': df['word_accuracy'].mean(),
        'total_correct_words': df['correct_words'].sum(),
        'total_words': df['total_words'].sum(),
        'total_correct_chars': df['correct_chars'].sum(),
        'total_chars': df['total_chars'].sum()
    }

    return detailed_table, summary_by_dataset, summary_by_type, overall_summary

# Generate tables
detailed_table, summary_by_dataset, summary_by_type, overall_summary = create_comparison_table(all_results)



# Display Results

In [8]:
print("DETAILED RESULTS TABLE")
print("="*80)
print(detailed_table.to_string(index=False))

print("\n\nSUMMARY BY DATASET")
print("="*60)
print(summary_by_dataset.to_string())

print("\n\nSUMMARY BY HANDWRITING TYPE")
print("="*60)
print(summary_by_type.to_string())

print("\n\nOVERALL PERFORMANCE SUMMARY")
print("="*50)
print(f"Total Datasets Processed: {overall_summary['total_datasets']}")
print(f"Total Images Processed: {overall_summary['total_images']}")
print(f"Average Character Accuracy: {overall_summary['avg_char_accuracy']:.1f}%")
print(f"Average Word Accuracy: {overall_summary['avg_word_accuracy']:.1f}%")
print(f"Total Words Recognized: {overall_summary['total_correct_words']}/{overall_summary['total_words']} ({overall_summary['total_correct_words']/overall_summary['total_words']*100:.1f}%)")
print(f"Total Characters Recognized: {overall_summary['total_correct_chars']}/{overall_summary['total_chars']} ({overall_summary['total_correct_chars']/overall_summary['total_chars']*100:.1f}%)")


DETAILED RESULTS TABLE
dataset image_type  char_accuracy  word_accuracy  correct_words  total_words  correct_chars  total_chars
     D1      clean           48.9           51.0             79          155            538         1101
     D1       fast           29.9           47.1             73          155            329         1101
     D1      ruled           62.2           47.1             73          155            684         1101
     D2      clean            3.6            0.9              1          106             27          756
     D2       fast            4.6            0.9              1          106             34          756
     D2      ruled            5.5            0.9              1          106             41          756
     D3      clean            3.6            2.7              3          111             24          688
     D3       fast            3.3            2.7              3          111             22          688
     D3      ruled            3.

# Save Results to Drive

In [None]:
# Save detailed results to CSV
detailed_table.to_csv(f"{OUTPUT_PATH}/detailed_results.csv", index=False)

# Save summary tables
summary_by_dataset.to_csv(f"{OUTPUT_PATH}/summary_by_dataset.csv")
summary_by_type.to_csv(f"{OUTPUT_PATH}/summary_by_type.csv")

# Save comprehensive report
report_path = f"{OUTPUT_PATH}/comprehensive_report.txt"
with open(report_path, 'w', encoding='utf-8') as f:
    f.write("BENGALI HANDWRITING RECOGNITION - COMPREHENSIVE ANALYSIS\n")
    f.write("="*70 + "\n\n")

    f.write("DETAILED RESULTS:\n")
    f.write("-"*40 + "\n")
    f.write(detailed_table.to_string(index=False))
    f.write("\n\n")

    f.write("SUMMARY BY DATASET:\n")
    f.write("-"*40 + "\n")
    f.write(summary_by_dataset.to_string())
    f.write("\n\n")

    f.write("SUMMARY BY HANDWRITING TYPE:\n")
    f.write("-"*40 + "\n")
    f.write(summary_by_type.to_string())
    f.write("\n\n")

    f.write("OVERALL PERFORMANCE:\n")
    f.write("-"*40 + "\n")
    f.write(f"Total Datasets: {overall_summary['total_datasets']}\n")
    f.write(f"Total Images: {overall_summary['total_images']}\n")
    f.write(f"Avg Character Accuracy: {overall_summary['avg_char_accuracy']:.1f}%\n")
    f.write(f"Avg Word Accuracy: {overall_summary['avg_word_accuracy']:.1f}%\n")
    f.write(f"Total Words: {overall_summary['total_correct_words']}/{overall_summary['total_words']} ({overall_summary['total_correct_words']/overall_summary['total_words']*100:.1f}%)\n")
    f.write(f"Total Characters: {overall_summary['total_correct_chars']}/{overall_summary['total_chars']} ({overall_summary['total_correct_chars']/overall_summary['total_chars']*100:.1f}%)\n")

print(f"\nResults saved to Google Drive:")
print(f"   Individual outputs: {OUTPUT_PATH}/")
print(f"   Detailed results: {OUTPUT_PATH}/detailed_results.csv")
print(f"   Dataset summary: {OUTPUT_PATH}/summary_by_dataset.csv")
print(f"   Type summary: {OUTPUT_PATH}/summary_by_type.csv")
print(f"   Full report: {OUTPUT_PATH}/comprehensive_report.txt")

print("\nMulti-dataset analysis completed successfully!")