In [None]:

!pip install kaggle --quiet

# 1.2 Google Drive bağla (modeli/sonuçları kaydetmek için)
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("michaelfink0923/synthetic-dyslexia-handwriting-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/michaelfink0923/synthetic-dyslexia-handwriting-dataset?dataset_version_number=1...


100%|██████████| 120M/120M [00:03<00:00, 36.5MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/michaelfink0923/synthetic-dyslexia-handwriting-dataset/versions/1


In [None]:
!mkdir -p ~/.kaggle
!cp "/content/drive/MyDrive/Colab Notebooks/kaggle.json" ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
# Dataset slug
!kaggle datasets download -d michaelfink0923/synthetic-dyslexia-handwriting-dataset -p /content --unzip


Dataset URL: https://www.kaggle.com/datasets/michaelfink0923/synthetic-dyslexia-handwriting-dataset
License(s): apache-2.0
Downloading synthetic-dyslexia-handwriting-dataset.zip to /content
  0% 0.00/120M [00:00<?, ?B/s]
100% 120M/120M [00:00<00:00, 1.63GB/s]


In [None]:
import cv2, os, glob
from pathlib import Path
import shutil
import numpy as np
from tqdm import tqdm

IMG_DIR = "/content/kaggle/working/synthdata/images/train"   # indirilen images klasörü (kontrol et)
LAB_DIR = "/content/kaggle/working/synthdata/labels/train"   # labels klasörü
OUT_DIR = "/content/crops_naug"  # augmentation yok, croplar buraya
os.makedirs(OUT_DIR, exist_ok=True)

# Sınıf index -> isim haritaları bilinmiyorsa ilk önce bir örnek label dosyası okumak iyi
# Eğer Kaggle sayfasında sınıf tanımları varsa oralara göre değiştirin.
# Aksi halde numeric sınıf isimleri ile klasör oluşturacağız (class_0,class_1,...)

def yolo_to_bbox(line, img_w, img_h):
    parts = line.strip().split()
    if len(parts) < 5:
        return None
    cls = int(float(parts[0]))
    xc, yc, w, h = map(float, parts[1:5])
    x1 = int((xc - w/2) * img_w)
    y1 = int((yc - h/2) * img_h)
    x2 = int((xc + w/2) * img_w)
    y2 = int((yc + h/2) * img_h)
    # clamp
    x1 = max(0, x1); y1 = max(0, y1)
    x2 = min(img_w-1, x2); y2 = min(img_h-1, y2)
    return cls, x1, y1, x2, y2

# Create class folders dynamically
all_classes = set()
for lab in glob.glob(os.path.join(LAB_DIR, "*.txt")):
    with open(lab,'r') as f:
        for line in f:
            if line.strip()=="":
                continue
            cls = int(line.split()[0])
            all_classes.add(cls)
classes_sorted = sorted(list(all_classes))
print("Found classes:", classes_sorted)

# make folders
for c in classes_sorted:
    os.makedirs(os.path.join(OUT_DIR, f"class_{c}"), exist_ok=True)

# Crop and save
for img_path in tqdm(sorted(glob.glob(os.path.join(IMG_DIR, "*")))):
    img_name = os.path.basename(img_path)
    label_path = os.path.join(LAB_DIR, os.path.splitext(img_name)[0] + ".txt")
    if not os.path.exists(label_path):
        continue
    img = cv2.imread(img_path)
    if img is None: continue
    h,w = img.shape[:2]
    with open(label_path,'r') as f:
        for i,line in enumerate(f):
            if line.strip()=="":
                continue
            parsed = yolo_to_bbox(line, w, h)
            if parsed is None: continue
            cls, x1,y1,x2,y2 = parsed
            crop = img[y1:y2, x1:x2]
            if crop.size == 0:
                continue
            # ensure min size
            try:
                crop = cv2.resize(crop, (128,128))
            except:
                continue
            outname = f"{os.path.splitext(img_name)[0]}_box{i}_cls{cls}.png"
            cv2.imwrite(os.path.join(OUT_DIR, f"class_{cls}", outname), crop)

print("Crops saved to:", OUT_DIR)


Found classes: [0, 1, 2]


100%|██████████| 1873/1873 [01:00<00:00, 31.11it/s]

Crops saved to: /content/crops_naug





In [None]:
!find /content/crops_naug -maxdepth 2 -type f | wc -l
!ls -lah /content/crops_naug | sed -n '1,200p'


112906
total 5.6M
drwxr-xr-x 5 root root 4.0K Oct  8 05:23 .
drwxr-xr-x 1 root root 4.0K Oct  8 05:23 ..
drwxr-xr-x 2 root root 2.0M Oct  8 05:24 class_0
drwxr-xr-x 2 root root 1.9M Oct  8 05:24 class_1
drwxr-xr-x 2 root root 1.8M Oct  8 05:24 class_2


In [None]:
DATA_DIR = "/content/crops_naug"  # veya "/content/crops_naug" eğer croplar burada


In [None]:
import tensorflow as tf
import os

DATA_DIR = "/content/crops_naug"

# sadece klasörleri filtrele
class_dirs = [d for d in os.listdir(DATA_DIR) if os.path.isdir(os.path.join(DATA_DIR,d)) and not d.startswith(".")]
print("Kullanılacak sınıflar:", class_dirs)

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    labels='inferred',
    label_mode='categorical',
    batch_size=32,
    image_size=(128,128),
    validation_split=0.2,
    subset='training',
    seed=42
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    labels='inferred',
    label_mode='categorical',
    batch_size=32,
    image_size=(128,128),
    validation_split=0.2,
    subset='validation',
    seed=42
)


Kullanılacak sınıflar: ['class_2', 'class_1', 'class_0']
Found 112906 files belonging to 3 classes.
Using 90325 files for training.
Found 112906 files belonging to 3 classes.
Using 22581 files for validation.


In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

num_classes = 3  # class_0, class_1, class_2

base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(128,128,3))
base_model.trainable = True

# sadece son 20–30 katmanı fine-tune
for layer in base_model.layers[:-20]:
    layer.trainable = False

inputs = Input(shape=(128,128,3))
x = tf.keras.applications.efficientnet.preprocess_input(inputs)
x = base_model(x, training=False)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
outputs = Dense(num_classes, activation='softmax')(x)

model = Model(inputs, outputs)
model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

checkpoint_path = "/content/efficientnet_dyslexia_baseline.h5"

callbacks = [
    ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True),
    EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)
]

history = model.fit(train_ds, validation_data=val_ds, epochs=12, callbacks=callbacks)


Epoch 1/12
[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.5286 - loss: 0.9512



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 32ms/step - accuracy: 0.5286 - loss: 0.9511 - val_accuracy: 0.7012 - val_loss: 0.6793 - learning_rate: 1.0000e-04
Epoch 2/12
[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.6782 - loss: 0.7119



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 23ms/step - accuracy: 0.6782 - loss: 0.7119 - val_accuracy: 0.7228 - val_loss: 0.6308 - learning_rate: 1.0000e-04
Epoch 3/12
[1m2820/2823[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - accuracy: 0.6966 - loss: 0.6749



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 22ms/step - accuracy: 0.6967 - loss: 0.6749 - val_accuracy: 0.7345 - val_loss: 0.6067 - learning_rate: 1.0000e-04
Epoch 4/12
[1m2820/2823[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - accuracy: 0.7088 - loss: 0.6543



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 25ms/step - accuracy: 0.7088 - loss: 0.6543 - val_accuracy: 0.7401 - val_loss: 0.5927 - learning_rate: 1.0000e-04
Epoch 5/12
[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.7123 - loss: 0.6464



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 25ms/step - accuracy: 0.7123 - loss: 0.6464 - val_accuracy: 0.7469 - val_loss: 0.5812 - learning_rate: 1.0000e-04
Epoch 6/12
[1m2821/2823[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - accuracy: 0.7181 - loss: 0.6369



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 22ms/step - accuracy: 0.7181 - loss: 0.6369 - val_accuracy: 0.7506 - val_loss: 0.5732 - learning_rate: 1.0000e-04
Epoch 7/12
[1m2820/2823[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - accuracy: 0.7199 - loss: 0.6327



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 22ms/step - accuracy: 0.7199 - loss: 0.6327 - val_accuracy: 0.7544 - val_loss: 0.5662 - learning_rate: 1.0000e-04
Epoch 8/12
[1m2821/2823[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - accuracy: 0.7211 - loss: 0.6277



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 21ms/step - accuracy: 0.7211 - loss: 0.6277 - val_accuracy: 0.7563 - val_loss: 0.5608 - learning_rate: 1.0000e-04
Epoch 9/12
[1m2819/2823[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - accuracy: 0.7235 - loss: 0.6242



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 22ms/step - accuracy: 0.7235 - loss: 0.6242 - val_accuracy: 0.7591 - val_loss: 0.5565 - learning_rate: 1.0000e-04
Epoch 10/12
[1m2820/2823[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - accuracy: 0.7257 - loss: 0.6213



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 22ms/step - accuracy: 0.7257 - loss: 0.6213 - val_accuracy: 0.7605 - val_loss: 0.5529 - learning_rate: 1.0000e-04
Epoch 11/12
[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.7261 - loss: 0.6188



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 23ms/step - accuracy: 0.7261 - loss: 0.6188 - val_accuracy: 0.7615 - val_loss: 0.5503 - learning_rate: 1.0000e-04
Epoch 12/12
[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.7261 - loss: 0.6173



[1m2823/2823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 26ms/step - accuracy: 0.7261 - loss: 0.6173 - val_accuracy: 0.7637 - val_loss: 0.5474 - learning_rate: 1.0000e-04


In [None]:
!ls -a /content
!ls -a /content/crops_naug


.  ..  .config	crops_naug  drive  .ipynb_checkpoints  kaggle  sample_data
.  ..  class_0	class_1  class_2


In [None]:
# 1️⃣ Kütüphaneler
import tensorflow as tf
import os
import numpy as np
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

# 2️⃣ Dataset path ve class temizliği
DATA_DIR = "/content/crops_naug"
class_dirs = [d for d in os.listdir(DATA_DIR) if os.path.isdir(os.path.join(DATA_DIR,d)) and not d.startswith(".")]
print("Kullanılacak sınıflar:", class_dirs)

# 3️⃣ Image size ve batch
IMG_SIZE = (224,224)
BATCH_SIZE = 16

# 4️⃣ Safe augmentation (yamuk yazıları bozmayacak)
train_datagen = ImageDataGenerator(
    validation_split=0.2,
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
    brightness_range=[0.8,1.2],
    zoom_range=0.1,
    width_shift_range=0.05,
    height_shift_range=0.05
)

val_datagen = ImageDataGenerator(
    validation_split=0.2,
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input
)

train_generator = train_datagen.flow_from_directory(
    DATA_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    subset='training',
    class_mode='categorical',
    shuffle=True
)

val_generator = val_datagen.flow_from_directory(
    DATA_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    subset='validation',
    class_mode='categorical',
    shuffle=False
)

# 5️⃣ EfficientNet-B0 modeli
num_classes = len(class_dirs)
base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(IMG_SIZE[0], IMG_SIZE[1],3))
base_model.trainable = False

inputs = Input(shape=(IMG_SIZE[0], IMG_SIZE[1],3))
x = tf.keras.applications.efficientnet.preprocess_input(inputs)
x = base_model(x, training=False)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
outputs = Dense(num_classes, activation='softmax')(x)
model = Model(inputs, outputs)

model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# 6️⃣ Callbacks
checkpoint_path = "/content/efficientnet_dyslexia_safe_aug.h5"
callbacks = [
    ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True),
    EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)
]

# 7️⃣ Modeli eğit (freeze)
history = model.fit(train_generator, validation_data=val_generator, epochs=12, callbacks=callbacks)

# 8️⃣ Fine-tune (son 20 katmanı aç)
base_model.trainable = True
for layer in base_model.layers[:-20]:
    layer.trainable = False

model.compile(optimizer=Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
history_ft = model.fit(train_generator, validation_data=val_generator, epochs=10, callbacks=callbacks)

# 9️⃣ Confusion Matrix ve Classification Report
y_true, y_pred = [], []
for images, labels in val_generator:
    preds = model.predict(images)
    y_true.extend(np.argmax(labels, axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d", xticklabels=class_dirs, yticklabels=class_dirs, cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

print(classification_report(y_true, y_pred, target_names=class_dirs))

# 10️⃣ Grad-CAM örneği (1 görsel)
import matplotlib.cm as cm

def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model([model.inputs], [model.get_layer(last_conv_layer_name).output, model.output])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]

    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0,1,2))
    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap,0)/tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# Test için 1 görsel
img, label = val_generator[0]
heatmap = make_gradcam_heatmap(img[0:1], model, last_conv_layer_name="top_conv")
plt.matshow(heatmap)
plt.show()


Kullanılacak sınıflar: ['class_2', 'class_1', 'class_0']
Found 90326 images belonging to 3 classes.
Found 22580 images belonging to 3 classes.


  self._warn_if_super_not_called()


Epoch 1/12
[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step - accuracy: 0.5665 - loss: 0.8936



[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1345s[0m 233ms/step - accuracy: 0.5665 - loss: 0.8936 - val_accuracy: 0.6574 - val_loss: 0.7594 - learning_rate: 1.0000e-04
Epoch 2/12
[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 219ms/step - accuracy: 0.6765 - loss: 0.7178



[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1272s[0m 225ms/step - accuracy: 0.6765 - loss: 0.7178 - val_accuracy: 0.6547 - val_loss: 0.7324 - learning_rate: 1.0000e-04
Epoch 3/12
[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 217ms/step - accuracy: 0.6922 - loss: 0.6877



[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1263s[0m 224ms/step - accuracy: 0.6922 - loss: 0.6877 - val_accuracy: 0.6771 - val_loss: 0.7078 - learning_rate: 1.0000e-04
Epoch 4/12
[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step - accuracy: 0.7057 - loss: 0.6649



[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1278s[0m 226ms/step - accuracy: 0.7057 - loss: 0.6649 - val_accuracy: 0.6979 - val_loss: 0.6920 - learning_rate: 1.0000e-04
Epoch 5/12
[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 225ms/step - accuracy: 0.7095 - loss: 0.6555



[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1305s[0m 231ms/step - accuracy: 0.7095 - loss: 0.6555 - val_accuracy: 0.6849 - val_loss: 0.6889 - learning_rate: 1.0000e-04
Epoch 6/12
[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step - accuracy: 0.7088 - loss: 0.6583



[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1286s[0m 228ms/step - accuracy: 0.7088 - loss: 0.6583 - val_accuracy: 0.6955 - val_loss: 0.6776 - learning_rate: 1.0000e-04
Epoch 7/12
[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step - accuracy: 0.7108 - loss: 0.6501



[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1297s[0m 230ms/step - accuracy: 0.7108 - loss: 0.6501 - val_accuracy: 0.6983 - val_loss: 0.6729 - learning_rate: 1.0000e-04
Epoch 8/12
[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step - accuracy: 0.7132 - loss: 0.6456



[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1309s[0m 232ms/step - accuracy: 0.7132 - loss: 0.6456 - val_accuracy: 0.7059 - val_loss: 0.6660 - learning_rate: 1.0000e-04
Epoch 9/12
[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 225ms/step - accuracy: 0.7136 - loss: 0.6443



[1m5646/5646[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1311s[0m 232ms/step - accuracy: 0.7136 - loss: 0.6443 - val_accuracy: 0.7080 - val_loss: 0.6616 - learning_rate: 1.0000e-04
Epoch 10/12
[1m4347/5646[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m4:36[0m 213ms/step - accuracy: 0.7183 - loss: 0.6385