In [2]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

def load_images(image_folder, target_size=(224,224)):
    images = []
    filenames = []
    for filename in os.listdir(image_folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):  # Add other file types if needed
            img_path = os.path.join(image_folder, filename)
            img = load_img(img_path, target_size=target_size)
            img_array = img_to_array(img)
            img_array = preprocess_input(img_array)  # Preprocess the image like VGG16 model expects
            images.append(img_array)
            filenames.append(filename)
    return np.array(images), filenames

images, filenames = load_images('letters')


In [9]:
import os
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import numpy as np

# Load the pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False)
model = Model(inputs=base_model.input, outputs=base_model.output)

def extract_features(image_path):
    img = load_img(image_path, target_size=(224, 224))  # Load and resize image
    img_array = img_to_array(img)  # Convert image to array
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array /= 255.0  # Normalize the image
    features = model.predict(img_array)  # Get features
    features_flatten = features.flatten()  # Flatten the features
    return features_flatten

def process_folder(folder_path):
    features_list = []
    filenames = []
    
    for file in os.listdir(folder_path):
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):  # check for image files
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            features_list.append(features)
            filenames.append(file)

    return features_list, filenames

# Path to your 'letters' folder
folder_path = 'letters'
features, filenames = process_folder(folder_path)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [1]:
# Test script using a simple scikit-learn function
from sklearn.datasets import make_blobs
from sklearn.cluster import DBSCAN

# Generate sample data
X, _ = make_blobs(n_samples=10, centers=3, n_features=2, random_state=42)

# Try clustering
dbscan = DBSCAN(eps=1.0, min_samples=2)
print(dbscan.fit_predict(X))


[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1]


In [7]:
from sklearn.cluster import KMeans

# Assuming you want to cluster into 36 groups as previously mentioned
kmeans = KMeans(n_clusters=36, random_state=0)
clusters = kmeans.fit_predict(features)

# Optionally, you might want to save the cluster labels with filenames to review
clustered_data = list(zip(filenames, clusters))
for data in clustered_data:
    print(data)


In [8]:
import shutil

output_folder = 'output'
for cluster_id in range(k):
    cluster_folder = os.path.join(output_folder, f'cluster_{cluster_id}')
    os.makedirs(cluster_folder, exist_ok=True)

for file, cluster_id in zip(filenames, clusters):
    src_path = os.path.join('letters', file)
    dst_path = os.path.join(output_folder, f'cluster_{cluster_id}', file)
    shutil.copy(src_path, dst_path)


In [10]:
import os
import shutil
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import numpy as np
from sklearn.cluster import KMeans

# Load the pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False)
model = Model(inputs=base_model.input, outputs=base_model.output)

def extract_features(image_path):
    img = load_img(image_path, target_size=(224, 224))  # Load and resize image
    img_array = img_to_array(img)  # Convert image to array
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array /= 255.0  # Normalize the image
    features = model.predict(img_array)  # Get features
    features_flatten = features.flatten()  # Flatten the features
    return features_flatten

def process_folder(folder_path):
    features_list = []
    filenames = []
    
    for file in os.listdir(folder_path):
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):  # check for image files
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            features_list.append(features)
            filenames.append(file_path)  # save full path for easier file management

    return features_list, filenames

def cluster_images(features, filenames, n_clusters=36):
    kmeans = KMeans(n_clusters=n_clusters, random_state=0)
    clusters = kmeans.fit_predict(features)
    return clusters, filenames

def save_clustered_images(clusters, filenames, folder_path):
    for index, cluster in enumerate(clusters):
        cluster_folder = os.path.join(folder_path, f'cluster_{cluster}')
        os.makedirs(cluster_folder, exist_ok=True)  # Create a folder for the cluster if it doesn't exist
        file_path = filenames[index]
        shutil.copy(file_path, os.path.join(cluster_folder, os.path.basename(file_path)))

# Path to your 'letters' folder
input_folder_path = 'letters'
output_folder_path = 'output'

# Process the images to extract features
features, filenames = process_folder(input_folder_path)

# Cluster the images based on extracted features
clusters, filenames = cluster_images(features, filenames)

# Save images to their respective cluster directories
save_clustered_images(clusters, filenames, output_folder_path)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms

In [1]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import os
import shutil
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint

# Load the saved model
model = load_model('final_model.keras')

def classify_and_move_images(source_dir, target_dir, model, target_size):
    for subdir, dirs, files in os.walk(source_dir):
        for file in files:
            filepath = os.path.join(subdir, file)
            try:
                img = load_and_prepare_image(filepath, target_size)
                prediction = model.predict(img)
                predicted_class_index = np.argmax(prediction, axis=1)
                # predicted_class_name = index_to_class[predicted_class_index[0]]

                # Create target directory if not already exists
                new_dir = os.path.join(target_dir, str(predicted_class_index))
                if not os.path.exists(new_dir):
                    os.makedirs(new_dir)

                # Copy the image to the new directory
                shutil.copy(filepath, new_dir)
                print(f"Copied {file} to {new_dir}")

            except Exception as e:
                print(f"Failed to process {file}: {str(e)}")

# Example usage
source_directory = r'letters'  # path to the 'letters' folder
target_directory = r'letters/let'  # path to the new folder structure

classify_and_move_images(source_directory, target_directory, model, target_size=(34, 80))


Failed to process AAIHTXKVJ_0.png: name 'load_and_prepare_image' is not defined
Failed to process AAIHTXKVJ_1.png: name 'load_and_prepare_image' is not defined
Failed to process AAIHTXKVJ_2.png: name 'load_and_prepare_image' is not defined
Failed to process AAIHTXKVJ_3.png: name 'load_and_prepare_image' is not defined
Failed to process AAIHTXKVJ_4.png: name 'load_and_prepare_image' is not defined
Failed to process AAIHTXKVJ_5.png: name 'load_and_prepare_image' is not defined
Failed to process ADBPLHDOH_0.png: name 'load_and_prepare_image' is not defined
Failed to process ADBPLHDOH_1.png: name 'load_and_prepare_image' is not defined
Failed to process ADBPLHDOH_2.png: name 'load_and_prepare_image' is not defined
Failed to process ADBPLHDOH_3.png: name 'load_and_prepare_image' is not defined
Failed to process ADBPLHDOH_4.png: name 'load_and_prepare_image' is not defined
Failed to process ADBPLHDOH_5.png: name 'load_and_prepare_image' is not defined
Failed to process AEIKVMSCL_0.png: name 

In [2]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint

def create_datagen():
    train_datagen = ImageDataGenerator(
        rescale=1.0/255.0,
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        validation_split=0.2  # Using 20% of the data for validation
    )
    return train_datagen

def create_generators(base_dir, input_shape, batch_size):
    datagen = create_datagen()
    
    train_generator = datagen.flow_from_directory(
        base_dir,  # Directly use base_dir without '/train'
        target_size=input_shape,
        batch_size=batch_size,
        class_mode='categorical',
        subset='training'
    )
    validation_generator = datagen.flow_from_directory(
        base_dir,  # Same here, use base_dir
        target_size=input_shape,
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation'
    )
    return train_generator, validation_generator



def define_model(num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(34, 80, 3)),  # Adjusted input_shape
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'),
        Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(100, activation='relu', kernel_initializer='he_uniform'),
        Dense(num_classes, activation='softmax')
    ])
    # compile model
    opt = SGD(learning_rate=0.001, momentum=0.01)  # Changed 'lr' to 'learning_rate'
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model



# Define parameters
base_dir = 'supervisedLetters'
input_shape = (34, 80)  # Adjust based on your dataset
batch_size = 1
num_classes = 36  # Update this based on your actual class count

# Prepare data generators
train_gen, val_gen = create_generators(base_dir, input_shape, batch_size)
print(train_gen.class_indices)

# Define the model
model = define_model(num_classes)

# Directory to save the models
model_dir = 'models2'
os.makedirs(model_dir, exist_ok=True)  # Create the directory if it doesn't exist

# Create a checkpoint callback to save best model each epoch with a unique name
checkpoint_cb = ModelCheckpoint(
    os.path.join(model_dir, 'model_epoch_{epoch:02d}_loss_{val_loss:.8f}.keras'),
    save_best_only=False,  # Change this to False to save models at each epoch regardless of validation loss
    monitor='val_loss',  # Decide what to monitor; here we use validation loss
    mode='min',  # Save models with the minimum validation loss
    verbose=1  # Print out messages when saving models
)

# Fit model
model.fit(
    train_gen,
    steps_per_epoch=len(train_gen),
    validation_data=val_gen,
    validation_steps=len(val_gen),
    epochs=1000,
    callbacks=[checkpoint_cb]
)


Found 6085 images belonging to 36 classes.
Found 1503 images belonging to 36 classes.
{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 'a': 10, 'b': 11, 'c': 12, 'd': 13, 'e': 14, 'f': 15, 'g': 16, 'h': 17, 'i': 18, 'j': 19, 'k': 20, 'l': 21, 'm': 22, 'n': 23, 'o': 24, 'p': 25, 'q': 26, 'r': 27, 's': 28, 't': 29, 'u': 30, 'v': 31, 'w': 32, 'x': 33, 'y': 34, 'z': 35}


  super().__init__(


Epoch 1/1000
[1m   8/6085[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m44s[0m 7ms/step - accuracy: 0.0000e+00 - loss: 5.6019   

  self._warn_if_super_not_called()


[1m6081/6085[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - accuracy: 0.0322 - loss: 3.6087
Epoch 1: saving model to models2\model_epoch_01_loss_3.47153354.keras
[1m6085/6085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 7ms/step - accuracy: 0.0322 - loss: 3.6087 - val_accuracy: 0.0739 - val_loss: 3.4715
Epoch 2/1000

Epoch 2: saving model to models2\model_epoch_02_loss_0.00000000.keras
[1m6085/6085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 3/1000
[1m   1/6085[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5:46[0m 57ms/step - accuracy: 0.0000e+00 - loss: 3.7216

  self.gen.throw(typ, value, traceback)


[1m6081/6085[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.0828 - loss: 3.4030
Epoch 3: saving model to models2\model_epoch_03_loss_3.06914616.keras
[1m6085/6085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 7ms/step - accuracy: 0.0828 - loss: 3.4029 - val_accuracy: 0.1677 - val_loss: 3.0691
Epoch 4/1000

Epoch 4: saving model to models2\model_epoch_04_loss_0.00000000.keras
[1m6085/6085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00
Epoch 5/1000
[1m6082/6085[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - accuracy: 0.1946 - loss: 2.8918
Epoch 5: saving model to models2\model_epoch_05_loss_2.26709414.keras
[1m6085/6085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 7ms/step - accuracy: 0.1946 - loss: 2.8917 - val_accuracy: 0.3214 - val_loss: 2.2671
Epoch 6/1000

Epoch 6: saving model to models2\model_epoch_06_l

<keras.src.callbacks.history.History at 0x1e383b45710>