<a href="https://colab.research.google.com/github/mahsa-2003/AI-challenge/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# نصب کتابخانه‌های مورد نیاز
!pip install kagglehub

# وارد کردن کتابخانه‌ها
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50
from sklearn.model_selection import KFold, RandomizedSearchCV
from sklearn.utils import class_weight
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import kagglehub

# دانلود دیتاست با استفاده از kagglehub
path = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")
print("Dataset path:", path)

# مسیر پوشه‌های داده
base_dir = os.path.join(path, 'chest_xray')
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

# بررسی محتوای پوشه‌ها
print("Train directory contains:", len(os.listdir(train_dir)), "files")
print("Validation directory contains:", len(os.listdir(val_dir)), "files")
print("Test directory contains:", len(os.listdir(test_dir)), "files")

# تعریف پیش‌پردازش‌ها
train_datagen = ImageDataGenerator(
    rescale=1./255,  # نرمال‌سازی پیکسل‌ها
    shear_range=0.2,  # تغییر شکل تصادفی
    zoom_range=0.2,   # بزرگ‌نمایی تصادفی
    horizontal_flip=True  # چرخش تصادفی
)

validation_datagen = ImageDataGenerator(rescale=1./255)  # فقط نرمال‌سازی برای داده‌های اعتبارسنجی

# بارگذاری داده‌ها
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),  # تغییر اندازه تصاویر
    batch_size=32,
    class_mode='binary'  # دو کلاس (پنومونی و بدون پنومونی)
)

validation_generator = validation_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

# ساخت مدل CNN
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),  # جلوگیری از overfitting
    Dense(1, activation='sigmoid')  # خروجی باینری
])

# کامپایل مدل
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# نمایش خلاصه مدل
model.summary()

# محاسبه وزن‌های کلاس‌ها برای تعادل‌دهی
class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weights = dict(enumerate(class_weights))

# آموزش مدل
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    class_weight=class_weights  # استفاده از وزن‌های کلاس
)

# ارزیابی مدل روی داده‌های تست
test_generator = validation_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

test_loss, test_acc = model.evaluate(test_generator)
print(f"Test accuracy: {test_acc * 100:.2f}%")

# رسم نمودارهای loss و accuracy برای بررسی overfitting
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.show()

plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.show()

# استفاده از Transfer Learning با ResNet50
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # فریز کردن لایه‌های پایه

model_transfer = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1, activation='sigmoid')
])

# کامپایل مدل Transfer Learning
model_transfer.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# آموزش مدل Transfer Learning
history_transfer = model_transfer.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    class_weight=class_weights
)

# ارزیابی مدل Transfer Learning روی داده‌های تست
test_loss_transfer, test_acc_transfer = model_transfer.evaluate(test_generator)
print(f"Test accuracy (Transfer Learning): {test_acc_transfer * 100:.2f}%")

# پیش‌بینی یک تصویر تست برای فعال‌سازی لایه‌های مدل
sample_image, _ = next(train_generator)  # یک بچ از داده‌های train را بگیر
_ = model_transfer.predict(sample_image)  # مدل را روی این داده فراخوانی کن

# تفسیر مدل با Grad-CAM
def grad_cam(model, img_path, layer_name):
    img = tf.keras.preprocessing.image.load_img(img_path, target_size=(224, 224))
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = tf.convert_to_tensor(img_array, dtype=tf.float32)

    grad_model = tf.keras.models.Model(
        inputs=model.input,
        outputs=[model.get_layer(layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_output, predictions = grad_model(img_array)
        loss = predictions[0]

    grads = tape.gradient(loss, conv_output)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    heatmap = tf.reduce_sum(tf.multiply(pooled_grads, conv_output), axis=-1)
    heatmap = np.squeeze(heatmap)
    heatmap = np.maximum(heatmap, 0)
    heatmap /= np.max(heatmap)

    plt.imshow(heatmap, cmap='jet')
    plt.colorbar()
    plt.show()

# مثال استفاده از Grad-CAM
img_path = os.path.join(test_dir, 'PNEUMONIA', os.listdir(os.path.join(test_dir, 'PNEUMONIA'))[0])
grad_cam(model_transfer, img_path, 'conv4_block4_3_conv')  # لایه دلخواه برای Grad-CAM

Downloading from https://www.kaggle.com/api/v1/datasets/download/paultimothymooney/chest-xray-pneumonia?dataset_version_number=2...


100%|██████████| 2.29G/2.29G [00:26<00:00, 93.7MB/s]

Extracting files...





Dataset path: /root/.cache/kagglehub/datasets/paultimothymooney/chest-xray-pneumonia/versions/2
Train directory contains: 2 files
Validation directory contains: 2 files
Test directory contains: 2 files
Found 5216 images belonging to 2 classes.
Found 16 images belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  self._warn_if_super_not_called()


Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m685s[0m 4s/step - accuracy: 0.6582 - loss: 0.8493 - val_accuracy: 0.8125 - val_loss: 0.3474
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m690s[0m 4s/step - accuracy: 0.8717 - loss: 0.2996 - val_accuracy: 0.8125 - val_loss: 0.3234
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m736s[0m 4s/step - accuracy: 0.8886 - loss: 0.2784 - val_accuracy: 0.8750 - val_loss: 0.3317
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m745s[0m 4s/step - accuracy: 0.9026 - loss: 0.2444 - val_accuracy: 0.8750 - val_loss: 0.2544
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m730s[0m 4s/step - accuracy: 0.9043 - loss: 0.2373 - val_accuracy: 0.8750 - val_loss: 0.3437
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m678s[0m 4s/step - accuracy: 0.9021 - loss: 0.2270 - val_accuracy: 0.7500 - val_loss: 0.4382
Epoch 7/10
[1m163/163