In [1]:
import cv2
import numpy as np
import os

def binarize_image(image_path, min_area=100):
    # Load the image
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Binarize the image using Otsu's thresholding
    _, binary_image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Find contours of the binary image
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Create an empty mask to draw the filtered contours
    filtered_mask = np.zeros_like(binary_image)
    
    # Draw contours that have an area larger than the min_area
    for contour in contours:
        if cv2.contourArea(contour) > min_area:
            cv2.drawContours(filtered_mask, [contour], -1, 255, thickness=cv2.FILLED)
    
    # Apply the mask to the binary image
    filtered_image = cv2.bitwise_and(binary_image, binary_image, mask=filtered_mask)
    
    return filtered_image

def process_images(input_dir, output_dir, min_area=100):
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Loop through all files in the input directory
    for filename in os.listdir(input_dir):
        # Construct full file path
        file_path = os.path.join(input_dir, filename)
        
        # Check if the file is an image (optional: you can add more file extensions)
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.bmp')):
            print(f"Processing {filename}...")
            
            # Binarize and filter the image
            filtered_image = binarize_image(file_path, min_area)
            
            # Save the filtered image in the output directory
            output_path = os.path.join(output_dir, filename)
            cv2.imwrite(output_path, filtered_image)
    
    print("Processing complete.")

# Example usage
input_directory = "/kaggle/input/chromosomedetect2/54816/24_chromosomes_object/JEPG"
output_directory = '/kaggle/working/Binary_Images'
process_images(input_directory, output_directory)

Processing 1100303.jpg...
Processing 1060512.jpg...
Processing 1057232.jpg...
Processing 1080212.jpg...
Processing unknow015.jpg...
Processing 1053093.jpg...
Processing 1057461.jpg...
Processing 1050244.jpg...
Processing 1055471.jpg...
Processing 1054581.jpg...
Processing 1054082.jpg...
Processing 1101136.jpg...
Processing 1052932.jpg...
Processing 1055904.jpg...
Processing 1100094.jpg...
Processing 1051802.jpg...
Processing 1050273.jpg...
Processing 1055281.jpg...
Processing 1058044.jpg...
Processing 1055542.jpg...
Processing 104172.jpg...
Processing unknow003.jpg...
Processing 1056871.jpg...
Processing 1071823.jpg...
Processing 1057664.jpg...
Processing 1056312.jpg...
Processing 1050623.jpg...
Processing 1055461.jpg...
Processing 1080014.jpg...
Processing 1050151.jpg...
Processing 1054121.jpg...
Processing 1091491.jpg...
Processing 1050253_1.jpg...
Processing 1060741.jpg...
Processing 1100443.jpg...
Processing 1055873.jpg...
Processing unknow011.jpg...
Processing 1071943.jpg...
Proce

In [2]:
import numpy
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.datasets import mnist

2024-06-14 15:17:00.603199: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-14 15:17:00.603291: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-14 15:17:00.723116: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
#median filter 

import os
import cv2

# Directory path containing the images
input_directory = '/kaggle/working/Binary_Images'
output_directory = '/kaggle/working/Denoised_Images'

# Create the output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Function to apply median filter and save denoised images
def denoise_images(input_dir, output_dir):
    # Iterate over all files in the directory
    for filename in os.listdir(input_dir):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            # Construct full file paths
            input_filepath = os.path.join(input_dir, filename)
            output_filepath = os.path.join(output_dir, filename)
            
            # Load the image
            noisy_image = cv2.imread(input_filepath)
            
            if noisy_image is not None:
                # Apply median filter
                denoised_image = cv2.medianBlur(noisy_image, 5)  # Adjust kernel size as needed
                
                # Save the denoised image
                cv2.imwrite(output_filepath, denoised_image)
                print(f"Denoised image saved: {output_filepath}")
            else:
                print(f"Failed to read image: {input_filepath}")

# Call the function to denoise images and save them in the specified directory
denoise_images(input_directory, output_directory)

Denoised image saved: /kaggle/working/Denoised_Images/1053541.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1052971.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1056944.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1070723.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1055992.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1070693.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1055771.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1051461.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1080154.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1056412.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1050803.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1054191.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1071474.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1071964.jpg
Denoised image saved: /kaggle/working/Denoised_Images/1052352.jpg
Denoised i

In [4]:
import pandas as pd
import json
import xml.etree.ElementTree as ET
import os
def convert_voc_to_coco(xml_paths, label2id, output_json):
    coco_json = []
    for xml_path in xml_paths:
        tree = ET.parse(xml_path)
        root = tree.getroot()
        image_id = root.find('filename').text
        width = int(root.find('size').find('width').text)
        height = int(root.find('size').find('height').text)
        coco_json.append({
            'image_id': image_id,
            'width': width,
            'height': height,
            'annotations': []
        })
        for obj in root.findall('object'):
            obj_id = obj.find('name').text
            obj_id = label2id.get(obj_id, -1)
            coco_json[-1]['annotations'].append({
                'image_id': image_id,
                'category_id': obj_id,
                'bbox': [
                    float(obj.find('bndbox').find('xmin').text),
                    float(obj.find('bndbox').find('ymin').text),
                    float(obj.find('bndbox').find('xmax').text) - float(obj.find('bndbox').find('xmin').text),
                    float(obj.find('bndbox').find('ymax').text) - float(obj.find('bndbox').find('ymin').text)
                ]
            })
    with open(output_json, 'w') as f:
        json.dump(coco_json, f)

def load_coco_json_to_df(coco_json):
    df = pd.read_json(coco_json)
    return df

if __name__ == '__main__':
    xml_dir = '/kaggle/input/chromosomedetect2/54816/24_chromosomes_object/annotations'
    label2id = {'chromosomes': 1}  # Replace with your label mapping
    output_json = '/kaggle/working/path_to_your_output_json.json'

    xml_paths = [f'{xml_dir}/{file}' for file in os.listdir(xml_dir) if file.endswith('.xml')]
    convert_voc_to_coco(xml_paths, label2id, output_json)

    coco_json = output_json
    df = load_coco_json_to_df(coco_json)
    print(df)

           image_id  width  height  \
0       1100571.jpg    431     544   
1        103193.jpg    647     860   
2        104681.jpg    716     875   
3     unknow022.jpg    615     585   
4       1100573.jpg    737     885   
...             ...    ...     ...   
4995    1054364.jpg    626     711   
4996    1054281.jpg    704     965   
4997    1091063.jpg    639     591   
4998    1100351.jpg   1320     739   
4999    1070523.jpg    707     880   

                                            annotations  
0     [{'image_id': '1100571.jpg', 'category_id': -1...  
1     [{'image_id': '103193.jpg', 'category_id': -1,...  
2     [{'image_id': '104681.jpg', 'category_id': -1,...  
3     [{'image_id': 'unknow022.jpg', 'category_id': ...  
4     [{'image_id': '1100573.jpg', 'category_id': -1...  
...                                                 ...  
4995  [{'image_id': '1054364.jpg', 'category_id': -1...  
4996  [{'image_id': '1054281.jpg', 'category_id': -1...  
4997  [{'image_id': '

In [5]:
datas = df.drop("annotations",axis=1)
datas

Unnamed: 0,image_id,width,height
0,1100571.jpg,431,544
1,103193.jpg,647,860
2,104681.jpg,716,875
3,unknow022.jpg,615,585
4,1100573.jpg,737,885
...,...,...,...
4995,1054364.jpg,626,711
4996,1054281.jpg,704,965
4997,1091063.jpg,639,591
4998,1100351.jpg,1320,739


In [6]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

# Directory containing XML files
directory = '/kaggle/input/chromosomedetect2/54816/24_chromosomes_object/annotations'

# List to hold all data
all_data = []

# Loop through all files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.xml'):
        # Parse the XML file
        tree = ET.parse(os.path.join(directory, filename))
        root = tree.getroot()

        # Extract the object details
        for obj in root.findall('object'):
            name = obj.find('name').text
            pose = obj.find('pose').text
            truncated = int(obj.find('truncated').text)
            difficult = int(obj.find('difficult').text)
            bndbox = obj.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)
            
            all_data.append({
                'filename': filename,
                'name': name,
                'pose': pose,
                'truncated': truncated,
                'difficult': difficult,
                'xmin': xmin,
                'ymin': ymin,
                'xmax': xmax,
                'ymax': ymax
            })

# Create a DataFrame
annotations = pd.DataFrame(all_data)

# Display the DataFrame
print(annotations)


           filename name         pose  truncated  difficult  xmin  ymin  xmax  \
0       1100571.xml   A1  Unspecified          0          0   187   195   289   
1       1100571.xml   A1  Unspecified          0          0    59   309   142   
2       1100571.xml   A2  Unspecified          0          0   129   274   264   
3       1100571.xml   A2  Unspecified          0          0   181    37   238   
4       1100571.xml   A3  Unspecified          0          0   148   104   264   
...             ...  ...          ...        ...        ...   ...   ...   ...   
229847  1070523.xml  G21  Unspecified          0          0   480   487   512   
229848  1070523.xml  G22  Unspecified          0          0   380   726   440   
229849  1070523.xml  G22  Unspecified          0          0   550   467   592   
229850  1070523.xml    X  Unspecified          0          0    23   316    99   
229851  1070523.xml    Y  Unspecified          0          0   517   640   577   

        ymax  
0        309

In [7]:
annotations = pd.DataFrame(annotations)
annotations['image'] = annotations['filename'].str.replace('.xml', '.jpg')
annotations['output'] =  "/kaggle/working/Denoised_Images/"+ annotations['image']
annotations.head()

Unnamed: 0,filename,name,pose,truncated,difficult,xmin,ymin,xmax,ymax,image,output
0,1100571.xml,A1,Unspecified,0,0,187,195,289,309,1100571.jpg,/kaggle/working/Denoised_Images/1100571.jpg
1,1100571.xml,A1,Unspecified,0,0,59,309,142,444,1100571.jpg,/kaggle/working/Denoised_Images/1100571.jpg
2,1100571.xml,A2,Unspecified,0,0,129,274,264,329,1100571.jpg,/kaggle/working/Denoised_Images/1100571.jpg
3,1100571.xml,A2,Unspecified,0,0,181,37,238,174,1100571.jpg,/kaggle/working/Denoised_Images/1100571.jpg
4,1100571.xml,A3,Unspecified,0,0,148,104,264,169,1100571.jpg,/kaggle/working/Denoised_Images/1100571.jpg


In [8]:
annotations['name'].unique()

array(['A1', 'A2', 'A3', 'B4', 'B5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11',
       'C12', 'D13', 'D14', 'D15', 'E16', 'E17', 'E18', 'F19', 'F20',
       'G21', 'G22', 'X', 'Y'], dtype=object)

In [9]:
import pandas as pd
import cv2
import os

# Read the CSV file
data = annotations
data['output'] =  "/kaggle/working/Denoised_Images/"+ annotations['image']
# Base directory to save cropped images
base_output_dir = '/kaggle/working/Final'

# Create the base output directory if it doesn't exist
if not os.path.exists(base_output_dir):
    os.makedirs(base_output_dir)

# Iterate through unique name values and create subdirectories
for name in data['name'].unique():
    name_output_dir = os.path.join(base_output_dir, name)
    os.makedirs(name_output_dir, exist_ok=True)

# Iterate through the data and crop images
for idx, row in data.iterrows():
    image_path = row['output']
    xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']
    name = row['name']
    
    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not read image {image_path}")
        continue
    
    # Validate coordinates
    if xmin < 0 or ymin < 0 or xmax > image.shape[1] or ymax > image.shape[0]:
        print(f"Error: Invalid coordinates for cropping: {xmin}, {ymin}, {xmax}, {ymax}")
        continue
    
    # Crop the image
    cropped_image = image[ymin:ymax, xmin:xmax]
    
    # Check if cropped image is valid
    if cropped_image.size == 0:
        print(f"Error: Cropped image is empty for {image_path} with coordinates: {xmin}, {ymin}, {xmax}, {ymax}")
        continue
    
    # Get the filename from the image path
    image_filename = os.path.basename(image_path)
    
    # Save the cropped image in the corresponding subdirectory
    name_output_path = os.path.join(base_output_dir, name, image_filename)
    cv2.imwrite(name_output_path, cropped_image)

print("Cropping completed.")


Error: Invalid coordinates for cropping: 237, -1, 291, 102
Error: Invalid coordinates for cropping: -1, 240, 84, 303
Error: Invalid coordinates for cropping: 403, -1, 506, 76
Error: Invalid coordinates for cropping: 393, -1, 478, 82
Error: Invalid coordinates for cropping: 328, -1, 416, 48
Error: Invalid coordinates for cropping: 359, -1, 423, 87
Error: Invalid coordinates for cropping: 1096, 56, 1218, 139
Error: Invalid coordinates for cropping: 267, -1, 324, 150
Error: Invalid coordinates for cropping: 653, -1, 704, 127
Error: Invalid coordinates for cropping: 397, -1, 499, 89
Error: Invalid coordinates for cropping: -1, 113, 60, 143
Error: Invalid coordinates for cropping: 266, -1, 316, 180
Error: Invalid coordinates for cropping: -1, 377, 64, 501
Error: Invalid coordinates for cropping: 405, -1, 512, 108
Error: Invalid coordinates for cropping: -1, 475, 48, 521
Error: Invalid coordinates for cropping: 897, 257, 1066, 315
Error: Invalid coordinates for cropping: 895, 48, 991, 145
Er

In [10]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import Callback
import time

# Directory paths
input_directory = r'/kaggle/working/Final'

# Parameters for image resizing and model training
img_height, img_width = 100, 100
img_channels = 3  # Assuming RGB images
batch_size = 128
epochs = 100

# Create a data generator for training and validation
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Create training and validation generators
train_generator = train_datagen.flow_from_directory(
    input_directory,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    input_directory,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

# Custom callback to print epoch number and estimate time remaining
class TimeHistory(Callback):
    def on_train_begin(self, logs=None):
        self.times = []

    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_time_start = time.time()
        print(f"\nStarting epoch {epoch + 1}/{epochs}")

    def on_epoch_end(self, epoch, logs=None):
        epoch_time = time.time() - self.epoch_time_start
        self.times.append(epoch_time)
        avg_time_per_epoch = np.mean(self.times)
        remaining_epochs = epochs - (epoch + 1)
        remaining_time = remaining_epochs * avg_time_per_epoch
        print(f"Epoch {epoch + 1}/{epochs} completed in {epoch_time:.2f} seconds.")
        print(f"Estimated time remaining: {remaining_time // 60:.0f} minutes {remaining_time % 60:.0f} seconds.")

# Define the model
model = Sequential()

# Add Convolutional layers
model.add(Conv2D(256, (5, 5), activation='relu', input_shape=(img_height, img_width, img_channels)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))

# Add Pooling layer
model.add(MaxPooling2D(pool_size=(2, 2)))

# Add Dropout layer
model.add(Dropout(0.5))

# Add another Convolutional layer
model.add(Conv2D(256, (3, 3), activation='relu'))

# Add another Pooling layer
model.add(MaxPooling2D(pool_size=(2, 2)))

# Add final Convolutional layer
model.add(Conv2D(128, (3, 3), activation='relu'))

# Add Flatten layer
model.add(Flatten())

# Add Dense layers
model.add(Dense(120, activation='relu'))  # 120 neurons in the fully connected layer
model.add(Dense(train_generator.num_classes, activation='softmax'))  # Output layer with softmax activation for multi-class classification

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

# Create an instance of the callback
time_callback = TimeHistory()

# Train the model using the generator
model.fit(train_generator, epochs=epochs, validation_data=validation_generator, callbacks=[time_callback])

# Evaluate the model
loss, accuracy = model.evaluate(validation_generator)
print(f'Test accuracy: {accuracy}')


Found 94016 images belonging to 24 classes.
Found 23503 images belonging to 24 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Starting epoch 1/100
Epoch 1/100


  self._warn_if_super_not_called()
I0000 00:00:1718379007.608519      83 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m735/735[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 482ms/step - accuracy: 0.1610 - loss: 2.6562Epoch 1/100 completed in 502.03 seconds.
Estimated time remaining: 828 minutes 21 seconds.
[1m735/735[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m502s[0m 546ms/step - accuracy: 0.1611 - loss: 2.6559 - val_accuracy: 0.2387 - val_loss: 2.3068

Starting epoch 2/100
Epoch 2/100
[1m735/735[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 413ms/step - accuracy: 0.2946 - loss: 2.0906Epoch 2/100 completed in 327.33 seconds.
Estimated time remaining: 677 minutes 19 seconds.
[1m735/735[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m327s[0m 445ms/step - accuracy: 0.2946 - loss: 2.0906 - val_accuracy: 0.2908 - val_loss: 2.1008

Starting epoch 3/100
Epoch 3/100
[1m735/735[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 413ms/step - accuracy: 0.3615 - loss: 1.8936Epoch 3/100 completed in 327.35 seconds.
Estimated time remaining: 623 minutes 20 seconds.
[1m735/735[0m [3