In [None]:
import joblib
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

In [None]:
import kagglehub
import os
import pandas as pd

# Download latest version
dataset_dir = kagglehub.dataset_download("dhruvildave/english-handwritten-characters-dataset")

# Find the CSV file within the dataset directory
for filename in os.listdir(dataset_dir):
    if filename.endswith(".csv"):
        csv_file_path = os.path.join(dataset_dir, filename)
        break  # Stop searching once the CSV is found

# Read the CSV file
df = pd.read_csv(csv_file_path)

print("Path to dataset files:", csv_file_path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/dhruvildave/english-handwritten-characters-dataset?dataset_version_number=3...


100%|██████████| 13.1M/13.1M [00:01<00:00, 8.11MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/dhruvildave/english-handwritten-characters-dataset/versions/3/english.csv


In [None]:
df.head(10)

Unnamed: 0,image,label
0,Img/img001-001.png,0
1,Img/img001-002.png,0
2,Img/img001-003.png,0
3,Img/img001-004.png,0
4,Img/img001-005.png,0
5,Img/img001-006.png,0
6,Img/img001-007.png,0
7,Img/img001-008.png,0
8,Img/img001-009.png,0
9,Img/img001-010.png,0


In [None]:
img_size = (32, 32)

In [None]:
import os

def load_and_preprocess_image(image_path):
    # Extract the dataset folder path from csv_file_path
    dataset_folder = os.path.dirname(csv_file_path)
    # Construct the correct image path by joining the dataset folder path and image_path
    image_path = os.path.join(dataset_folder, image_path)
    img = load_img(image_path, target_size=img_size, color_mode='grayscale')
    return img_to_array(img) / 255.0

In [None]:
image_paths = df['image'].values
labels = df['label'].values


In [None]:
# Encode character labels to integers
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

In [None]:
imgs = np.array([load_and_preprocess_image(img_path) for img_path in image_paths])

In [None]:
num_label = len(np.unique(labels_encoded))
one_hot_labels = np.eye(num_label)[labels_encoded]

In [None]:
X_train, X_val, y_train, y_val = train_test_split(imgs, one_hot_labels, test_size=0.1, random_state=42)

In [None]:
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)
datagen.fit(X_train)

In [None]:
# Define the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(num_label, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)

In [None]:
# Train the model with data augmentation
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    epochs=50,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping, model_checkpoint, reduce_lr]
)

Epoch 1/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.7639 - loss: 0.7202 - val_accuracy: 0.7977 - val_loss: 0.6444 - learning_rate: 1.0000e-05
Epoch 2/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 0.7836 - loss: 0.6750 - val_accuracy: 0.7977 - val_loss: 0.6469 - learning_rate: 1.0000e-05
Epoch 3/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7774 - loss: 0.6535 - val_accuracy: 0.7977 - val_loss: 0.6471 - learning_rate: 1.0000e-05
Epoch 4/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7747 - loss: 0.6563 - val_accuracy: 0.8035 - val_loss: 0.6472 - learning_rate: 1.0000e-05
Epoch 5/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7771 - loss: 0.6693 - val_accuracy: 0.8006 - val_loss: 0.6483 - learning_rate: 1.0000e-05
Epoch 6/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
loss, accuracy = model.evaluate(X_val, y_val)
print(f'Validation Loss: {loss}')
print(f'Validation Accuracy: {accuracy}')

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7691 - loss: 0.7269 
Validation Loss: 0.6444424986839294
Validation Accuracy: 0.7976539731025696


In [None]:
joblib.dump(label_encoder, 'label_encoder.pkl')

['label_encoder.pkl']

In [1]:
!git init


[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/.git/


In [2]:
!git add .

In [4]:
!git commit -m "initials"

Author identity unknown

*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@8a5bcae1b087.(none)')


In [5]:
!git remote add origin https://github.com/kailai-13/Character_classification_note_vook.git
!git branch -M main
!git push -u origin main

error: src refspec main does not match any
[31merror: failed to push some refs to 'https://github.com/kailai-13/Character_classification_note_vook.git'
[m