In [1]:
from sklearn.preprocessing import LabelBinarizer
from tensorflow import keras
from keras.utils import plot_model

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, Flatten, Dense, Dropout

2023-12-13 21:00:27.246860: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
classes_to_exclude = set();
for i in range(10):
  classes_to_exclude.add(' Numeric-'+str(i))

print(classes_to_exclude)


{' Numeric-1', ' Numeric-4', ' Numeric-2', ' Numeric-7', ' Numeric-8', ' Numeric-3', ' Numeric-9', ' Numeric-5', ' Numeric-6', ' Numeric-0'}


In [3]:
image_dir = 'data/train'

# Path to CSV file containing labels
csv_file = 'data/train_classes.csv'

# Read the CSV file to obtain labels
labels_df = pd.read_csv(csv_file)

# The column with a value of 1 for each image
labels_df['label_column'] = labels_df.iloc[:, 1:].idxmax(axis=1)  # Adjust iloc[:, 1:] based on your CSV structure

labels_df = labels_df[~labels_df['label_column'].isin(classes_to_exclude)]
class_counts = labels_df['label_column'].value_counts()

print("-----------------Classwise Training Data------------------")
print(class_counts)
print("Total: " + str(len(labels_df)))

-----------------Classwise Training Data------------------
label_column
 Alphabet-A    651
 Alphabet-J    606
 Alphabet-H    594
 Alphabet-S    591
 Alphabet-M    582
 Alphabet-K    573
 Alphabet-B    573
 Alphabet-C    570
 Alphabet-N    558
 Alphabet-T    555
 Alphabet-F    555
 Alphabet-P    546
 Alphabet-L    540
 Alphabet-U    522
 Alphabet-Q    519
 Alphabet-I    507
 Alphabet-g    504
 Alphabet-R    495
 Alphabet-Z    492
 Alphabet-Y    486
 Alphabet-E    474
 Alphabet-V    471
 Alphabet-X    471
 Alphabet-W    468
 Alphabet-D    393
 Alphabet-O    393
Name: count, dtype: int64
Total: 13689


In [4]:
test_image_dir = 'data/test'

# Path to your CSV file containing labels
test_csv_file = 'data/test_classes.csv'

test_labels_df = pd.read_csv(test_csv_file)

# ... (Rest of the code remains the same)
# Find the column with a value of 1 for each image
test_labels_df['label_column'] = test_labels_df.iloc[:, 1:].idxmax(axis=1)  # Adjust iloc[:, 1:] based on your CSV structure

test_labels_df = test_labels_df[~test_labels_df['label_column'].isin(classes_to_exclude)]
class_counts = test_labels_df['label_column'].value_counts()
print("-----------------Classwise Testing Data------------------")
print(class_counts)
print("Total: " + str(len(test_labels_df)))

-----------------Classwise Testing Data------------------
label_column
 Alphabet-F    71
 Alphabet-B    71
 Alphabet-V    70
 Alphabet-I    66
 Alphabet-g    65
 Alphabet-W    63
 Alphabet-T    62
 Alphabet-N    62
 Alphabet-C    61
 Alphabet-Q    61
 Alphabet-P    59
 Alphabet-M    59
 Alphabet-E    58
 Alphabet-R    58
 Alphabet-L    56
 Alphabet-U    55
 Alphabet-H    55
 Alphabet-A    54
 Alphabet-K    53
 Alphabet-J    53
 Alphabet-Y    52
 Alphabet-S    50
 Alphabet-D    46
 Alphabet-X    45
 Alphabet-O    41
 Alphabet-Z    40
Name: count, dtype: int64
Total: 1486


In [5]:
valid_image_dir = 'data/valid'

# Path to your CSV file containing labels
valid_csv_file = 'data/valid_classes.csv'

valid_labels_df = pd.read_csv(valid_csv_file)

# ... (Rest of the code remains the same)
# Find the column with a value of 1 for each image
valid_labels_df['label_column'] = valid_labels_df.iloc[:, 1:].idxmax(axis=1)  # Adjust iloc[:, 1:] based on your CSV structure

valid_labels_df = valid_labels_df[~valid_labels_df['label_column'].isin(classes_to_exclude)]
class_counts = valid_labels_df['label_column'].value_counts()
print("-----------------Classwise Validation Data------------------")
print(class_counts)
print("Total: " + str(len(valid_labels_df)))

-----------------Classwise Validation Data------------------
label_column
 Alphabet-g    77
 Alphabet-L    77
 Alphabet-K    73
 Alphabet-I    72
 Alphabet-P    68
 Alphabet-S    68
 Alphabet-W    66
 Alphabet-R    63
 Alphabet-E    61
 Alphabet-A    60
 Alphabet-Q    60
 Alphabet-F    59
 Alphabet-B    59
 Alphabet-M    58
 Alphabet-H    57
 Alphabet-C    55
 Alphabet-V    54
 Alphabet-U    53
 Alphabet-Z    51
 Alphabet-N    48
 Alphabet-T    48
 Alphabet-X    47
 Alphabet-Y    47
 Alphabet-J    42
 Alphabet-D    40
 Alphabet-O    39
Name: count, dtype: int64
Total: 1502


In [6]:
# Initialize ImageDataGenerator for preprocessing images
image_generator = ImageDataGenerator(rescale=1./255,
                                     shear_range=0.2,
                                      zoom_range=0.2,
                                      horizontal_flip=True)  # You can add other preprocessing steps as needed

# Create an image generator using flow_from_dataframe
data_generator = image_generator.flow_from_dataframe(
    dataframe=labels_df,
    directory=image_dir,
    x_col='filename',         # Column containing image filenames in the CSV
    y_col='label_column',       # Column containing identified labels for each image
    target_size=(64, 64),  # Specify the target size of your images
    batch_size=32,      # Set the batch size
    class_mode='categorical'    # Choose 'categorical' for multi-class classification
    # Add other parameters as needed (e.g., shuffle, seed, etc.)
)

valid_image_generator = ImageDataGenerator(rescale=1./255)


valid_generator = image_generator.flow_from_dataframe(
    dataframe=valid_labels_df,
    directory=valid_image_dir,
    x_col='filename',         # Column containing image filenames in the CSV
    y_col='label_column',       # Column containing identified labels for each image
    target_size=(64, 64),  # Specify the target size of your images
    batch_size=32,      # Set the batch size
    class_mode='categorical'    # Choose 'categorical' for multi-class classification
    # Add other parameters as needed (e.g., shuffle, seed, etc.)
)



Found 13689 validated image filenames belonging to 26 classes.
Found 1502 validated image filenames belonging to 26 classes.


In [7]:
test_labels_df[:5]

Unnamed: 0,filename,Alphabet-A,Alphabet-B,Alphabet-C,Alphabet-D,Alphabet-E,Alphabet-F,Alphabet-H,Alphabet-I,Alphabet-J,...,Numeric-1,Numeric-2,Numeric-3,Numeric-4,Numeric-5,Numeric-6,Numeric-7,Numeric-8,Numeric-9,label_column
0,1150_jpg.rf.00bc375275d4ead3f5e898b56d50f69f.jpg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alphabet-g
3,518_jpg.rf.034742535312128d301bf3248c430c8e.jpg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alphabet-R
4,314_jpg.rf.007b6fc65dc29625f60ce63cfda19021.jpg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alphabet-P
5,989_jpg.rf.0083ae042bd9056c29948df39929b6d2.jpg,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alphabet-B
7,1190_jpg.rf.031015b90bbb3ef3f78768bdadc2a2b2.jpg,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Alphabet-V


In [9]:
# CNN Model
classifier = Sequential()

# First layer
classifier.add(Convolution2D(32, (3, 3), input_shape = (64, 64, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))

# Second layer
classifier.add(Convolution2D(64, (3, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))

# Third layer
classifier.add(Convolution2D(128, (3, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))

# Flattening
classifier.add(Flatten())

# Classification
classifier.add(Dense(256, activation='relu'))
classifier.add(Dense(26, activation='softmax'))

# Run the CNN model
classifier.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

model = classifier.fit(
        data_generator,
        epochs=25,
        validation_data = valid_generator,
      )

2023-12-13 21:00:39.958845: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Epoch 1/25


2023-12-13 21:00:40.438095: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2023-12-13 21:03:33.794668: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [10]:
classifier.save("isl_classifier_26_classes_KV_2.h5")