# üì∑ OCR Script - Membaca Text dari Gambar
### Menggunakan Tesseract OCR (Tanpa AI/LLM)
### Support: Bahasa Indonesia & English

---

**Cara Pakai:**
1. Jalankan Cell 1-3 untuk setup
2. Jalankan Cell 4 untuk upload dan proses gambar
3. Hasil akan ditampilkan dan bisa di-download

---

## üì¶ Cell 1: Instalasi Dependencies
Jalankan cell ini **sekali** di awal

In [None]:
print("=" * 50)
print("üì¶ INSTALASI DEPENDENCIES")
print("=" * 50)

# Install Tesseract OCR dan bahasa Indonesia + English
!apt-get update -qq
!apt-get install -y tesseract-ocr tesseract-ocr-ind tesseract-ocr-eng -qq

# Install Python packages
!pip install pytesseract pillow -q

print("\n‚úÖ Instalasi selesai!")
print("‚úÖ Tesseract OCR + Bahasa Indonesia & English terinstall")

## üîß Cell 2: Import dan Verifikasi

In [None]:
import pytesseract
from PIL import Image
from google.colab import files
from datetime import datetime
import os
import io

print("=" * 50)
print("üîß VERIFIKASI INSTALASI")
print("=" * 50)

tesseract_version = pytesseract.get_tesseract_version()
print(f"‚úÖ Tesseract versi: {tesseract_version}")

available_langs = pytesseract.get_languages()
print(f"‚úÖ Bahasa tersedia: {', '.join(available_langs)}")

if 'ind' in available_langs:
    print("‚úÖ Bahasa Indonesia: Tersedia")
if 'eng' in available_langs:
    print("‚úÖ Bahasa English: Tersedia")

## ‚öôÔ∏è Cell 3: Fungsi OCR

In [None]:
def extract_text(image, languages="ind+eng"):
    """
    Ekstrak text dari gambar menggunakan Tesseract OCR
    """
    custom_config = r'--oem 3 --psm 6'
    text = pytesseract.image_to_string(image, lang=languages, config=custom_config)
    return text.strip()


def process_and_save(image, filename="uploaded_image", save_to_drive=False):
    """
    Proses gambar dan simpan hasil ke file txt
    """
    print("\n" + "=" * 50)
    print("‚è≥ MEMPROSES GAMBAR...")
    print("=" * 50)
    
    # Tentukan bahasa
    available = pytesseract.get_languages()
    if 'ind' in available and 'eng' in available:
        languages = "ind+eng"
    elif 'ind' in available:
        languages = "ind"
    else:
        languages = "eng"
    
    print(f"üåê Bahasa: {languages}")
    print(f"üì∑ Gambar: {filename}")
    print(f"üìê Ukuran: {image.size[0]} x {image.size[1]} pixels")
    
    # Ekstrak text
    extracted_text = extract_text(image, languages)
    
    # Buat nama file output
    base_name = os.path.splitext(filename)[0]
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_filename = f"ocr_result_{base_name}_{timestamp}.txt"
    
    # Tentukan path output
    if save_to_drive:
        output_path = f"/content/drive/MyDrive/{output_filename}"
    else:
        output_path = f"/content/{output_filename}"
    
    # Simpan ke file
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(f"OCR Result - Google Colab\n")
        f.write(f"{'=' * 50}\n")
        f.write(f"Source: {filename}\n")
        f.write(f"Processed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write(f"Languages: {languages}\n")
        f.write(f"Image Size: {image.size[0]} x {image.size[1]}\n")
        f.write(f"{'=' * 50}\n\n")
        f.write(extracted_text)
    
    print(f"\n‚úÖ SELESAI!")
    print(f"üìÑ Output disimpan: {output_path}")
    
    return extracted_text, output_path


def display_result(text):
    """Tampilkan hasil OCR"""
    print("\n" + "=" * 50)
    print("üìù HASIL OCR")
    print("=" * 50)
    
    if text:
        print(text)
    else:
        print("(Tidak ada text yang terdeteksi)")
    
    print("=" * 50)
    
    if text:
        words = len(text.split())
        chars = len(text)
        lines = len(text.split('\n'))
        print(f"\nüìä Statistik:")
        print(f"   ‚Ä¢ Karakter: {chars}")
        print(f"   ‚Ä¢ Kata: {words}")
        print(f"   ‚Ä¢ Baris: {lines}")


print("‚úÖ Fungsi OCR siap digunakan!")

---
## üöÄ Cell 4: Upload & Proses Gambar
Jalankan cell ini untuk **upload gambar dan ekstrak text**

In [None]:
print("=" * 50)
print("üì§ UPLOAD GAMBAR")
print("=" * 50)
print("Pilih file gambar (PNG, JPG, JPEG, BMP, TIFF, WEBP)")
print()

# Upload file
uploaded = files.upload()

if uploaded:
    for filename, content in uploaded.items():
        print(f"\nüì∑ File diterima: {filename}")
        
        # Buka gambar
        image = Image.open(io.BytesIO(content))
        
        # Proses OCR
        text, output_path = process_and_save(image, filename)
        
        # Tampilkan hasil
        display_result(text)
        
        # Download hasil
        print("\n" + "=" * 50)
        print("üì• DOWNLOAD HASIL")
        print("=" * 50)
        files.download(output_path)
else:
    print("‚ö†Ô∏è Tidak ada file yang diupload")

---
# üìÇ OPSI TAMBAHAN
Cell-cell di bawah ini bersifat **opsional**

## üíæ Opsi A: Simpan ke Google Drive
Jalankan cell ini untuk menyimpan hasil langsung ke Google Drive

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

print("\n‚úÖ Google Drive terhubung!")
print("üìÅ File akan disimpan ke: /content/drive/MyDrive/")

In [None]:
# Upload dan simpan ke Google Drive
print("üì§ UPLOAD GAMBAR (Simpan ke Google Drive)")
print()

uploaded = files.upload()

for filename, content in uploaded.items():
    image = Image.open(io.BytesIO(content))
    text, output_path = process_and_save(image, filename, save_to_drive=True)
    display_result(text)
    print(f"\n‚úÖ Hasil disimpan di Google Drive: {output_path}")

## üåê Opsi B: Proses Gambar dari URL

In [None]:
import requests
from io import BytesIO

# ‚¨áÔ∏è GANTI URL DI BAWAH INI dengan URL gambar kamu
image_url = "https://example.com/your-image.png"  # <-- Ganti URL ini

print(f"üì• Mengunduh gambar dari: {image_url}")

try:
    response = requests.get(image_url)
    response.raise_for_status()
    
    image = Image.open(BytesIO(response.content))
    text, output_path = process_and_save(image, "image_from_url")
    display_result(text)
    files.download(output_path)
except Exception as e:
    print(f"‚ùå Error: {e}")

## üìö Opsi C: Proses Banyak Gambar Sekaligus

In [None]:
print("=" * 50)
print("üì§ UPLOAD BANYAK GAMBAR")
print("=" * 50)
print("Pilih beberapa file gambar sekaligus")
print()

uploaded = files.upload()

all_results = []

for i, (filename, content) in enumerate(uploaded.items(), 1):
    print(f"\n[{i}/{len(uploaded)}] Memproses: {filename}")
    
    image = Image.open(io.BytesIO(content))
    text, output_path = process_and_save(image, filename)
    
    all_results.append({
        'filename': filename,
        'text': text,
        'output': output_path
    })

print("\n" + "=" * 50)
print(f"‚úÖ Selesai memproses {len(all_results)} gambar!")
print("=" * 50)

# Download semua hasil
print("\nüì• Downloading semua hasil...")
for result in all_results:
    files.download(result['output'])

## üìÅ Opsi D: Proses Gambar dari Google Drive

In [None]:
# Mount Google Drive dulu (jika belum)
from google.colab import drive
drive.mount('/content/drive')

# ‚¨áÔ∏è GANTI PATH DI BAWAH INI dengan path gambar di Google Drive kamu
image_path = "/content/drive/MyDrive/gambar_saya.png"  # <-- Ganti path ini

print(f"üì∑ Memproses: {image_path}")

try:
    image = Image.open(image_path)
    text, output_path = process_and_save(
        image, 
        os.path.basename(image_path), 
        save_to_drive=True
    )
    display_result(text)
    print(f"\n‚úÖ Hasil disimpan di: {output_path}")
except FileNotFoundError:
    print(f"‚ùå File tidak ditemukan: {image_path}")
    print("   Pastikan path sudah benar!")