## Libraries

In [1]:
# necessary libraries
import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from keras import layers
from keras import models
from keras import optimizers

from keras.models import Model, Sequential
from keras import layers

import tensorflow as tf

# Check GPU 

In [None]:
 tf.config.list_physical_devices('GPU')

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

# Creating Train and Validation Folders

### 1. Create Folders from one  main folder 

this method is used for creating train and validation folder from one folder that contains all images with subfolders of class names <br>
you can change split percentage -->  split_point = int(0.7 * len(files))   <br>
<br>
Example

    mainfolder:
        flowers:
            rose/
            daisy/
            dandelion/

    flowers2:
        train:
            rose/
            daisy/
            dandelion/
        validation:
            rose/
            daisy/
            dandelion/

            animal24.png 

In [None]:
import os
import shutil
import random

# original path 
original_dataset_dir ="datasets/dl_datasets/flowers" # folder that contains all images 

# new paths for training and validation sets
base_dir = "datasets/dl_datasets/flowers2"   # new folder for your train and validation folders
os.mkdir(base_dir)

# new folder for training set
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)

# new folder for validation set
validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)

# Eğitim ve validation için her bir sınıfın klasörleri
classes = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']  # class names

for cls in classes:
    cls_dir = os.path.join(original_dataset_dir, cls)
    files = os.listdir(cls_dir)
    random.shuffle(files)
    
    # split dataset to training and validation 
    split_point = int(0.7 * len(files))   # training percentage is 0.7 --> %70
    train_files = files[:split_point]
    validation_files = files[split_point:]
    
    train_cls_dir = os.path.join(train_dir, cls)
    os.makedirs(train_cls_dir)
    
    validation_cls_dir = os.path.join(validation_dir, cls)
    os.makedirs(validation_cls_dir)

    # copy files to the folders
    for file in train_files:
        src = os.path.join(cls_dir, file)
        dst = os.path.join(train_cls_dir, file)
        shutil.copyfile(src, dst)
        
    for file in validation_files:
        src = os.path.join(cls_dir, file)
        dst = os.path.join(validation_cls_dir, file)
        shutil.copyfile(src, dst)

### 2. Create Folders from CSV File

In this method there is one main folder that contain all images in one folder without subfolders <br>
class names are defined by csv file (one column for file name , other is for label name ) <br>
<br>
Example
    
    images:
        animal1.png
        animal2.png
        animal3.png
    
    labels.csv -->  file_name      labels
                    animal1.png     dog
                    animal2.png     dog
                    animal3.png     cat

<br>
In the end you have 2 Folder 

    train:
        dog:
            animal1.png
            animal2.png
        cat:
            animal3.png 

    validation:
        dog:
            animal5.png
            animal6.png
        cat:
            animal24.png 
            

In [None]:
import os
import shutil
import pandas as pd
import random

# Read the CSV file (assuming the columns are named "filename" and "label" by default)
csv_file_path = "datasets/dl_datasets/butterfly1/Training_set.csv"
data = pd.read_csv(csv_file_path)

# Creating train and validation directories
root_directory = "datasets\\dl_datasets\\butterfly2"  # Replace with your root directory name

image_dir="datasets\\dl_datasets\\butterfly1\\train"   # this folder contains all images

train_directory = os.path.join(root_directory, "train")
val_directory = os.path.join(root_directory, "validation")

os.makedirs(train_directory, exist_ok=True)
os.makedirs(val_directory, exist_ok=True)

# Creating label directories
labels = train_csv["label"].value_counts().index


for label in labels:
    label_train_directory = os.path.join(train_directory, label)
    label_val_directory = os.path.join(val_directory, label)
    os.makedirs(label_train_directory, exist_ok=True)
    os.makedirs(label_val_directory, exist_ok=True)

# Shuffle the data
data = data.sample(frac=1).reset_index(drop=True)
# Split the data into 70% train and 30% validation
val_data_count = int(len(data) * 0.3)
validation_data = data[:val_data_count]
train_data = data[val_data_count:]


# Copy train data
for index, row in train_data.iterrows():
    try:
        # file name 
        file_name = row['filename']
        # label name
        label = row['label']

        # images folder , all images are in here  
        source_path = os.path.join(image_dir, file_name) #
        # new destination
        destination_directory = os.path.join(train_directory, label)
        # copy files 
        shutil.copy(source_path, destination_directory)
    except:
        print("file error")
    
# Copy validation data
for index, row in validation_data.iterrows():
    try:
        file_name = row['filename']
        label = row['label']
        
        source_path = os.path.join(image_dir, file_name)
        destination_directory = os.path.join(val_directory, label)
        shutil.copy(source_path, destination_directory)
    except:
        print("file error")


# Create Datasets ( Augmented - Not Augmented )

In [None]:
#directories
training_dir="datasets/seg_train/seg_train"
validation_dir="datasets/seg_test/seg_test"

# use this function for preparing data  
def prep_data(augmented,batch_size=16):      # if you want to augmented dat set use it like this : prep_data(True)
    if augmented:                            # default batch_size is 16 , you can change it 
        train_datagen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=40,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True)
    
        validation_datagen = ImageDataGenerator(rescale=1./255)    

    else:
        train_datagen = ImageDataGenerator(rescale=1.0 / 255.0)
        validation_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

    # training set
    train_set = train_datagen.flow_from_directory(
        training_dir,
        target_size=(180, 180),  # The dimensions to which all images found will be resized
        batch_size=batch_size,# 32  default
        class_mode="sparse") # you can change this to onehotEncoded format or another format
         
    
    # validation set
    validation_set = validation_datagen.flow_from_directory(
        validation_dir,
        target_size=(180, 180),
        batch_size=batch_size,  # 32 default
        class_mode="sparse")
             
    return train_set , validation_set

# Visualization Function for History of Model

In [None]:
# visulization function for Models
def visualize(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(acc) + 1)
    
    fig, axs = plt.subplots(1, 2, figsize=(12, 5))
    
    axs[0].plot(epochs, acc, 'r', label='Training acc')
    axs[0].plot(epochs, val_acc, 'b', label='Validation acc')
    axs[0].set_title('Training and validation accuracy')
    axs[0].legend()
    axs[0].grid(True)
    
    axs[1].plot(epochs, loss, 'r', label='Training loss')
    axs[1].plot(epochs, val_loss, 'b', label='Validation loss')
    axs[1].set_title('Training and validation loss')
    axs[1].legend()
    axs[1].grid(True)
    
    plt.tight_layout()
    plt.show()

# Visualization of Class Distribution

In [None]:
import os

# Eğitim ve doğrulama setlerinin dizinleri
train_dir = training_dir
validation_dir = validation_dir

# Eğitim setindeki sınıf dağılımını hesapla
train_class_counts = {}
for class_folder in os.listdir(train_dir):
    class_path = os.path.join(train_dir, class_folder)
    if os.path.isdir(class_path):
        num_images = len(os.listdir(class_path))
        train_class_counts[class_folder] = num_images

# Doğrulama setindeki sınıf dağılımını hesapla
validation_class_counts = {}
for class_folder in os.listdir(validation_dir):
    class_path = os.path.join(validation_dir, class_folder)
    if os.path.isdir(class_path):
        num_images = len(os.listdir(class_path))
        validation_class_counts[class_folder] = num_images

print("Training set Distribution:")
print(train_class_counts)

print("Validation set Distribution:")
print(validation_class_counts)

In [None]:
import matplotlib.pyplot as plt

# Eğitim seti sınıf dağılımı için bar grafik oluştur
plt.figure(figsize=(15, 6))

# Eğitim seti sınıf dağılımı için subplot 1
plt.subplot(1, 2, 1)
plt.bar(train_class_counts.keys(), train_class_counts.values())
plt.title('Training set Distribution')
plt.xlabel('Classes')
plt.ylabel('Sample Numbers')
plt.xticks(rotation=45)

# Doğrulama seti sınıf dağılımı için subplot 2
plt.subplot(1, 2, 2)
plt.bar(validation_class_counts.keys(), validation_class_counts.values())
plt.title('Validation set Distribution')
plt.xlabel('Classes')
plt.ylabel('Sample Numbers')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

# Visualization of Example Images from Dataset

In [None]:
# not augmented dataset
train_set,validation_set=prep_data(False) # for augmented images set True

images,labels=train_set.next()

class_names = train_set.class_indices
class_names = {v: k for k, v in class_names.items()}

In [None]:
# for visualization
fig, axes = plt.subplots(1, 4, figsize=(15, 5))

for i in range(4):
    axes[i].imshow(images[i]) 
    label_index = int(labels[i])
    class_name = class_names[label_index]
    axes[i].set_title(f"{class_name}")
    axes[i].axis('off')

plt.tight_layout()
plt.show()