In [2]:
import glob
import os
import io
import sys
import shutil
import glob
import sqlite3
import numpy as np
import torch
import matplotlib.pyplot as plt
from pathlib import Path
from time import time
from PIL import Image, ImageFilter, ImageChops
import mediapy as media

In [8]:
# Delete working image, and create a new one
!rm -rf working
!mkdir working
!mkdir working/tmp_input

file_paths = [
	"662B5822-D943-4826-B322-926A8C7645C1",
	"72763945-4493-4F65-9E8F-80D8F3D2CA20",
	"371467AC-1C97-4ECF-81CB-C4381F7CE73C",
	"0AC6D7F9-C85D-418C-A21C-5934530D9782",
	"4BE8D61F-93F4-42C3-9014-7CB0C6B090FB",
]

for file_i, file_path in enumerate(file_paths):
	connection = sqlite3.connect(f'./split_dbs/db_session_{file_path}.sqlite')

	# Get all data
	cursor = connection.cursor()
	cursor.execute("SELECT image_blob FROM maptodon_raw WHERE ROWID % 10 = 0")
	results = cursor.fetchall()

	# Process the search results
	image_hashes = {}
	if len(results) > 0:
		for i, row in enumerate(results):
			image_data = row[0]

			# Save to working directory
			image = Image.open(io.BytesIO(image_data))

			image.save(f"./working/tmp_input/image{file_i}_{i:05d}.png")
		print("Saved " + str(len(results)) + " images to working directory.")
	else:
		print("No results found.")

	# Close the database connection
	connection.close()

Saved 135 images to working directory.
Saved 179 images to working directory.
Saved 116 images to working directory.
Saved 92 images to working directory.
Saved 75 images to working directory.


In [9]:
from PIL import Image
import numpy as np
import os

def is_blurry(image, threshold):
    """
    Determine if the image is blurry using the variance of the Laplacian operator.
    :param image: The image as a numpy array.
    :param threshold: The threshold to determine if the image is blurry.
    :return: True if the image is blurry, False otherwise.
    """
    kernel = np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]])
    laplacian = np.abs(np.convolve(image.flatten(), kernel.flatten(), 'same'))
    variance = np.var(laplacian)
    return variance < threshold

def delete_blurry_images(directory, threshold):
    """
    Delete all blurry images in the specified directory.
    :param directory: The directory where the images are stored.
    :param threshold: The threshold to determine if an image is blurry.
    """
    for filename in os.listdir(directory):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            file_path = os.path.join(directory, filename)
            image = Image.open(file_path).convert('L')  # convert image to grayscale
            image = np.array(image)
            
            if is_blurry(image, threshold):
                os.remove(file_path)
                print(f"Deleted {filename} as it was blurry.")

# Example usage:
# Set your own values for these variables
image_directory = "./working/tmp_input"
blur_threshold = 700  # Adjust the threshold as per your requirement

delete_blurry_images(image_directory, blur_threshold)


Deleted image2_00095.png as it was blurry.
Deleted image2_00081.png as it was blurry.
Deleted image2_00042.png as it was blurry.
Deleted image1_00062.png as it was blurry.
Deleted image1_00076.png as it was blurry.
Deleted image4_00051.png as it was blurry.
Deleted image4_00045.png as it was blurry.
Deleted image3_00009.png as it was blurry.
Deleted image1_00116.png as it was blurry.
Deleted image3_00035.png as it was blurry.
Deleted image0_00029.png as it was blurry.
Deleted image0_00028.png as it was blurry.
Deleted image3_00034.png as it was blurry.
Deleted image1_00103.png as it was blurry.
Deleted image3_00008.png as it was blurry.
Deleted image1_00117.png as it was blurry.
Deleted image4_00044.png as it was blurry.
Deleted image4_00050.png as it was blurry.
Deleted image1_00077.png as it was blurry.
Deleted image1_00063.png as it was blurry.
Deleted image2_00043.png as it was blurry.
Deleted image2_00080.png as it was blurry.
Deleted image2_00041.png as it was blurry.
Deleted ima

In [10]:
images = glob.glob("./working/tmp_input/*.png")
print("Found " + str(len(images)) + " images.")

Found 191 images.


In [11]:
!rm -rf ./working/input
!mkdir ./working/input

images = glob.glob("./working/tmp_input/*.png")

# Maximum number of images to copy
M = 100
step = len(images) // M

for i, image in enumerate(images):
	if i % step == 0:
		shutil.copy(image, f"./working/input/{i:05d}.png")
images = glob.glob("./working/input/*.png")
print(f"There are {len(images)} images in the working directory.")


There are 191 images in the working directory.


In [12]:
!zip -r ./working/input.zip ./working/input

  adding: working/input/ (stored 0%)
  adding: working/input/00126.png (deflated 0%)
  adding: working/input/00132.png (deflated 0%)
  adding: working/input/00091.png (deflated 0%)
  adding: working/input/00085.png (deflated 0%)
  adding: working/input/00052.png (deflated 0%)
  adding: working/input/00046.png (deflated 0%)
  adding: working/input/00047.png (deflated 0%)
  adding: working/input/00053.png (deflated 0%)
  adding: working/input/00084.png (deflated 0%)
  adding: working/input/00090.png (deflated 0%)
  adding: working/input/00133.png (deflated 0%)
  adding: working/input/00127.png (deflated 0%)
  adding: working/input/00131.png (deflated 0%)
  adding: working/input/00125.png (deflated 0%)
  adding: working/input/00119.png (deflated 0%)
  adding: working/input/00086.png (deflated 0%)
  adding: working/input/00092.png (deflated 0%)
  adding: working/input/00045.png (deflated 0%)
  adding: working/input/00051.png (deflated 0%)
  adding: working/input/00079.png (deflated 0%)
  a