In [None]:
import os
import numpy as np
import csv
import shutil
import matplotlib.pyplot as plt

from PIL import Image
from tqdm import tqdm
from train_dataset import img_segmention,img_patch


# Config

In [None]:
source_folder = "dataset/mini_testdatensatz"
im_size = [192,192]
min_pix = 300
max_pix = 450

# Clear Dataset I

In [None]:
# Path to the folder containing image files
source_folder_train = source_folder + "/train"

mask_size = [[],[]]
global_mask = np.zeros((im_size[0],im_size[1]))

# Iterate through all files in the folder
for filename in tqdm(os.listdir(source_folder_train)):
    # Check if the file is an image file
    if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"):
        # Process the image
        image_path = os.path.join(source_folder_train, filename)
        img = Image.open(image_path) # Open the image file
        img_resized = img_patch(img, im_size) # Resize the image

        # Perform image segmentation on the resized image
        try:
            _,mask,_ = img_segmention(img_resized)
            num_ones = np.count_nonzero(mask)
            mask_size[0].append(num_ones)
            mask_size[1].append(filename)
            global_mask = np.add(global_mask,mask) # Add the mask to the global mask
        except:
            # If an error occurs during segmentation, delete the image file
            os.remove(image_path)
            print("The image", filename, "caused an error during img_segmention and has been deleted")

# Sort the list mask_size

In [None]:
# Create a list of tuples containing (num_ones, filename) pairs
pairs = list(zip(mask_size[0], mask_size[1]))

# Sort the list of tuples based on the first element (num_ones)
sorted_pairs = sorted(pairs, key=lambda x: x[0])

# Extract the sorted lists of num_ones and filenames using list comprehensions
sorted_mask_size = [pair[0] for pair in sorted_pairs]
sorted_filenames = [pair[1] for pair in sorted_pairs]

mask_size_with_filename = [sorted_mask_size, sorted_filenames]

# Global Mask --> average of all masks

In [None]:
max_index = np.argmax(global_mask)
# use np.unravel_index to find the row and column indices of the maximum value
max_row, max_col = np.unravel_index(max_index, global_mask.shape)
print("The maximum value of the global mask is", global_mask[max_row, max_col], "at row", max_row, "and column", max_col)

In [None]:
global_mask_adjust = (global_mask - global_mask.min()) * (255 / (global_mask.max() - global_mask.min()))
# Save the global mask as an image
global_mask_img = Image.fromarray(global_mask_adjust.astype(np.uint8))
global_mask_img.save("analyse_dataset/global_mask.png")


# Write in csv

In [None]:
with open('analyse_dataset/mask_size.csv', mode='w') as file:
    writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    writer.writerow(['Mask Size', 'Filename'])
    for i in range(len(mask_size_with_filename[0])):
        writer.writerow([mask_size_with_filename[0][i], mask_size_with_filename[1][i]])

# Mean and Standard Deviation before Clear II

In [None]:
pixel_counts = []
with open('analyse_dataset/mask_size.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader) 
    for row in reader:
        pixel_counts.append(int(row[0]))

mean = np.mean(pixel_counts)
std = np.std(pixel_counts)

print("Mean:", mean)
print("Standard Deviation:", std)

# Histogramm before Clear II

In [None]:
# 1. Lesen Sie die CSV-Datei ein und speichern Sie die Anzahl der Pixel und den Dateinamen in separaten Listen.
pixel_counts = []
filenames = []
with open('analyse_dataset/mask_size.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader) 
    for row in reader:
        pixel_counts.append(int(row[0]))
        filenames.append(row[1])


# 2. Erstellen Sie eine Liste, die alle einzigartigen Anzahlen von Pixeln enthält.
unique_counts = list(set(pixel_counts))

# 3. Verwenden Sie eine Schleife, um durch jede einzigartige Anzahl von Pixeln in der Liste zu iterieren.
histogram_data = []
for count in unique_counts:
    # 4. Zählen Sie die Anzahl der Bilder, die die aktuelle Anzahl von Pixeln aufweisen.
    num_images = 0
    for i in range(len(pixel_counts)):
        if pixel_counts[i] == count:
            num_images += 1
    # 5. Fügen Sie die Anzahl der Bilder mit der aktuellen Anzahl von Pixeln zu einer neuen Liste hinzu.
    histogram_data.append(num_images)

# 6. Erstellen Sie das Histogramm der Anzahl der Bilder mit verschiedenen Pixelanzahlen.
# Erstellen des Histogramms wie zuvor
unique_counts = list(set(pixel_counts))
histogram_data = [pixel_counts.count(count) for count in unique_counts]
plt.hist(pixel_counts, bins=len(unique_counts))
plt.axvline(x=min_pix, color='r', linestyle='--')
plt.axvline(x=max_pix, color='r', linestyle='--')
plt.xlabel('Anzahl der Pixel')
plt.ylabel('Anzahl der Bilder')
plt.show()

In [None]:
# Öffnen der CSV-Datei und Lesen der Daten
pixel_counts = []

with open('analyse_dataset/mask_size.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader)  # Überspringen der Kopfzeile
    pixel_counts = [int(row[0]) for row in reader]

# Initialisierung der Zählvariablen
below = 0
above = 0

# Schleife durch die Liste und Zählen der Werte
for count in pixel_counts:
    if count < min_pix:
        below += 1
    elif count > max_pix:
        above += 1

# Ausgabe der Ergebnisse
print("Anzahl der Werte mit weniger als",str(min_pix),"Pixel:", below)
print("Anzahl der Werte mit weniger als",str(max_pix),"Pixel:", above)

# Copy Dataset

In [None]:
destination_folder = source_folder + "_cleared"

if os.path.exists(destination_folder):
    shutil.rmtree(destination_folder)

shutil.copytree(source_folder, destination_folder)
    

# Clear II (only Copy Dataset is cleared)

In [None]:
pixel_counts = []
filenames = []

with open('analyse_dataset/mask_size.csv', 'r') as inp, open('analyse_dataset/mask_size_cleared.csv', 'w') as out:
    writer = csv.writer(out)
    reader = csv.reader(inp)
    next(reader) 
    for row in reader:
        if int(row[0]) < min_pix:
            try:
                os.remove(os.path.join(destination_folder,"train",row[1]))
                print("The mask oft the image", str(row[1]), "was too small. The image has been deleted")
            except:
                print("The dataset has already been cleared")
        elif int(row[0]) > max_pix:
            try:
                os.remove(os.path.join(destination_folder,"train",row[1]))
                print("The mask oft the image", str(row[1]), "was too large. The image has been deleted")
            except:
                 print("The dataset has already been cleared")
        else:
            writer.writerow(row)


# Mean and Standard Deviation after Clear II

In [None]:
pixel_counts = []
with open('analyse_dataset/mask_size_cleared.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader) 
    for row in reader:
        pixel_counts.append(int(row[0]))

mean = np.mean(pixel_counts)
std = np.std(pixel_counts)

print("Mean:", mean)
print("Standard Deviation:", std)

# Histogramm after Clear II

In [None]:
# 1. Lesen Sie die CSV-Datei ein und speichern Sie die Anzahl der Pixel und den Dateinamen in separaten Listen.
pixel_counts = []
filenames = []
with open('analyse_dataset/mask_size_cleared.csv', 'r') as f:
    reader = csv.reader(f)
    next(reader) 
    for row in reader:
        pixel_counts.append(int(row[0]))
        filenames.append(row[1])


# 2. Erstellen Sie eine Liste, die alle einzigartigen Anzahlen von Pixeln enthält.
unique_counts = list(set(pixel_counts))

# 3. Verwenden Sie eine Schleife, um durch jede einzigartige Anzahl von Pixeln in der Liste zu iterieren.
histogram_data = []
for count in unique_counts:
    # 4. Zählen Sie die Anzahl der Bilder, die die aktuelle Anzahl von Pixeln aufweisen.
    num_images = 0
    for i in range(len(pixel_counts)):
        if pixel_counts[i] == count:
            num_images += 1
    # 5. Fügen Sie die Anzahl der Bilder mit der aktuellen Anzahl von Pixeln zu einer neuen Liste hinzu.
    histogram_data.append(num_images)

# 6. Erstellen Sie das Histogramm der Anzahl der Bilder mit verschiedenen Pixelanzahlen.
# Erstellen des Histogramms wie zuvor
unique_counts = list(set(pixel_counts))
histogram_data = [pixel_counts.count(count) for count in unique_counts]
plt.hist(pixel_counts, bins=len(unique_counts))
plt.axvline(x=min_pix, color='r', linestyle='--')
plt.axvline(x=max_pix, color='r', linestyle='--')
plt.xlabel('Anzahl der Pixel')
plt.ylabel('Anzahl der Bilder')
plt.show()

# Testmask --> only one mask to test images

In [None]:
path = 'dataset/mini_testdatensatz_cleared/train/picture_1.png'
img = Image.open(path) # Open the image file
im_size= [192,192]
img_resized = img_patch(img, im_size) # Resize the image

# Perform image segmentation on the resized image
_,mask,_ = img_segmention(img_resized)
mask[mask == 1] = 255


mask_img = Image.fromarray(mask.astype(np.uint8))
mask_img.save("analyse_dataset/test_mask_img.png")

img_resized_np = np.array(img_resized)
cut = np.zeros_like(img_resized_np)
cut[mask != 0] = img_resized_np[mask != 0]
cut_img = Image.fromarray(cut.astype(np.uint8))
cut_img.save("analyse_dataset/test_cut_img.png")
