In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, jaccard_score

In [2]:
!pip install pdf2image Pillow numpy opencv-python



In [3]:
import zipfile
import os
from PIL import Image
import numpy as np

def extract_and_load_images(zip_file_path):
    """Extract JPEG images from a zip file and load them into a list."""
    images = []
    
    # Create a temporary directory to extract images
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        # Extract all files to a temporary folder
        zip_ref.extractall('temp_images')
        extracted_files = zip_ref.namelist()

        # Load images from the extracted files
        for file_name in extracted_files:
            if file_name.endswith('.jpg') or file_name.endswith('.jpeg'):
                img_path = os.path.join('temp_images', file_name)
                img = Image.open(img_path)
                img = img.resize((128, 128))  # Resize image to uniform size
                images.append(np.array(img))

    # Clean up the temporary directory if needed
    # Uncomment the next lines to delete the temp directory after loading images
    # import shutil
    # shutil.rmtree('temp_images')
    
    return np.array(images)

# Example usage
zip_file_path = 'ilovepdf_pages-to-jpg.zip'  # Replace with your zip file path
images = extract_and_load_images(zip_file_path)

print(f'Loaded {len(images)} images from the zip file.')

Loaded 6 images from the zip file.


In [4]:
import zipfile
import os
from PIL import Image
import numpy as np

def extract_and_load_images(zip_file_path, img_size=(128, 128)):
    """Extract JPEG images from a zip file and load them into a list."""
    images = []
    
    # Create a temporary directory to extract images
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        # Extract all files to a temporary folder
        zip_ref.extractall('temp_images')
        extracted_files = zip_ref.namelist()

        # Load images from the extracted files
        for file_name in extracted_files:
            if file_name.endswith('.jpg') or file_name.endswith('.jpeg'):
                img_path = os.path.join('temp_images', file_name)
                img = Image.open(img_path)
                img = img.resize(img_size)  # Resize image to uniform size
                images.append(np.array(img))

    return np.array(images)

def extract_and_load_masks(zip_file_path, img_size=(128, 128)):
    """Extract binary masks from a zip file and load them into a list."""
    masks = []
    
    # Create a temporary directory to extract masks
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall('temp_masks')
        extracted_files = zip_ref.namelist()

        # Load masks from the extracted files
        for file_name in extracted_files:
            if file_name.endswith('.jpg') or file_name.endswith('.jpeg'):
                mask_path = os.path.join('temp_masks', file_name)
                mask = Image.open(mask_path).convert('L')  # Convert to grayscale
                mask = mask.resize(img_size)  # Resize mask to uniform size
                mask = np.array(mask) > 127  # Binarize the mask (0 or 1)
                masks.append(mask.astype(np.float32))

    return np.array(masks)

def load_dataset(images_zip_path, masks_zip_path, img_size=(128, 128)):
    """Load images and binary masks from zip files."""
    X = extract_and_load_images(images_zip_path, img_size)
    y = extract_and_load_masks(masks_zip_path, img_size)
    return X, y

# Example usage
images_zip_path = 'ilovepdf_pages-to-jpg.zip'   # Replace with your zip file path for images
masks_zip_path = 'ilovepdf_pages-to-jpg.zip'      # Replace with your zip file path for masks

X, y = load_dataset(images_zip_path, masks_zip_path)

print(f'Loaded {len(X)} images and {len(y)} masks.')


Loaded 6 images and 6 masks.


In [5]:
# Example shape
X = np.random.rand(100, 128, 128, 3)  # Example image shape (dummy data)
y = np.random.randint(0, 2, (100, 128, 128, 1))  # Example mask shape (dummy data)

# Split dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Create a CRNN model
def create_crnn_model(input_shape):
    inputs = layers.Input(shape=input_shape)
    
    # Encoder (Downsampling path)
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    
    # Decoder (Upsampling path)
    x = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(x)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(x)
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    
    # Output layer
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model


In [7]:
# Build and train the model
model = create_crnn_model((128, 128, 3))
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=16)

# Predictions
predictions = model.predict(X_val)
predictions = (predictions > 0.5).astype(int)

# Flatten predictions and ground truths for metric calculations
y_val_flat = y_val.flatten()
predictions_flat = predictions.flatten()

Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2s/step - accuracy: 0.5008 - loss: 0.6932 - val_accuracy: 0.4999 - val_loss: 0.6932
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 633ms/step - accuracy: 0.5016 - loss: 0.6931 - val_accuracy: 0.5016 - val_loss: 0.6931
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 652ms/step - accuracy: 0.5022 - loss: 0.6931 - val_accuracy: 0.4996 - val_loss: 0.6932
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 802ms/step - accuracy: 0.5024 - loss: 0.6931 - val_accuracy: 0.5008 - val_loss: 0.6932
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 580ms/step - accuracy: 0.5022 - loss: 0.6931 - val_accuracy: 0.5008 - val_loss: 0.6931
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 567ms/step - accuracy: 0.5031 - loss: 0.6931 - val_accuracy: 0.5008 - val_loss: 0.6932
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━

In [8]:
# Calculate evaluation metrics
precision = precision_score(y_val_flat, predictions_flat)
recall = recall_score(y_val_flat, predictions_flat)
f1 = f1_score(y_val_flat, predictions_flat)
iou = jaccard_score(y_val_flat, predictions_flat)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"IoU: {iou:.2f}")

Precision: 0.50
Recall: 0.22
F1 Score: 0.31
IoU: 0.18
