In [None]:
# ==============================================================================
# 1. KURULUM VE KÜTÜPHANELER
# ==============================================================================
import os
import time
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from datasets import load_dataset
from sklearn.metrics import classification_report, confusion_matrix
from PIL import Image
from tqdm import tqdm

# Rastgelelikleri sabitle (Tekrarlanabilirlik için)
SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)

print(f"TensorFlow Version: {tf.__version__}")
print("GPU Mevcut mu?:", "EVET" if tf.config.list_physical_devices('GPU') else "HAYIR")


TensorFlow Version: 2.19.0
GPU Mevcut mu?: EVET


In [None]:
dataset_name = "prithivMLmods/IndoorOutdoorNet-20K"
print(f"\n--- Veri Seti İndiriliyor: {dataset_name} ---")
hf_dataset = load_dataset(dataset_name, split="train") # Tek parça geliyor

# Klasör yapısını oluştur
base_dir = "dataset_temp"
splits = ['train', 'val', 'test']
classes = ['Indoor', 'Outdoor'] # 0: Indoor, 1: Outdoor (Dataset kartına göre)



--- Veri Seti İndiriliyor: prithivMLmods/IndoorOutdoorNet-20K ---


In [None]:
# ==============================================================================
# 2. VERİ SETİNİ HAZIRLAMA (Hugging Face -> Disk -> TF Dataset)
# ==============================================================================
# Veriyi bellekten direkt TF'ye aktarmak RAM şişirebilir,
# en temiz yöntem geçici klasörlere kaydetmektir.


for split in splits:
    for cls in classes:
        os.makedirs(os.path.join(base_dir, split, cls), exist_ok=True)

print("--- Veriler Train/Val/Test olarak ayrıştırılıyor ve kaydediliyor ---")
# Veriyi karıştır ve böl: %70 Train, %15 Val, %15 Test
shuffled_data = hf_dataset.shuffle(seed=SEED)
total_size = len(shuffled_data)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)

# Label mapping (Datasetten gelen label integer ise)
# Genellikle 0: Indoor, 1: Outdoor. Bunu kontrol ederek kaydediyoruz.
# Hugging Face dataset features usually handle mapping, assuming 0=Indoor here based on metadata.

for i, item in tqdm(enumerate(shuffled_data), total=total_size):
    image = item['image']
    label = item['label'] # 0 veya 1

    # Label ismini belirle
    label_name = classes[label]

    # Hangi split'e gidecek?
    if i < train_size:
        split = 'train'
    elif i < train_size + val_size:
        split = 'val'
    else:
        split = 'test'

    # Resmi kaydet
    image.save(os.path.join(base_dir, split, label_name, f"{i}.jpg"))





--- Veri Seti İndiriliyor: prithivMLmods/IndoorOutdoorNet-20K ---


README.md: 0.00B [00:00, ?B/s]

datasets/0000.parquet:   0%|          | 0.00/451M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/19998 [00:00<?, ? examples/s]

--- Veriler Train/Val/Test olarak ayrıştırılıyor ve kaydediliyor ---


100%|██████████| 19998/19998 [00:32<00:00, 608.79it/s]


In [None]:
# ==============================================================================
# 3. TENSORFLOW DATA PIPELINE
# ==============================================================================
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

print("\n--- TF Dataset Pipeline Oluşturuluyor ---")
train_ds = tf.keras.utils.image_dataset_from_directory(
    os.path.join(base_dir, 'train'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='binary'
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    os.path.join(base_dir, 'val'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='binary'
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    os.path.join(base_dir, 'test'),
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='binary',
    shuffle=False # Test verisini karıştırma!
)

# Performans optimizasyonu (Prefetching)
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache("training_cache_file").shuffle(1000).prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.cache("val_cache_file").prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)


--- TF Dataset Pipeline Oluşturuluyor ---
Found 13998 files belonging to 2 classes.
Found 2999 files belonging to 2 classes.
Found 3001 files belonging to 2 classes.


In [None]:
# ==============================================================================
# 4. MODEL MİMARİSİ (EfficientNetB0 - Lightweight)
# ==============================================================================
print("\n--- EfficientNetB0 Modeli Hazırlanıyor ---")

# Veri Artırma (Data Augmentation) - Overfitting önlemek için
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
])

# Temel Model (Pre-trained)
base_model = tf.keras.applications.EfficientNetB0(
    input_shape=(224, 224, 3),
    include_top=False, # Üst katmanı at
    weights='imagenet'
)
base_model.trainable = False # İlk aşamada dondur

# Modelin birleştirilmesi
inputs = tf.keras.Input(shape=(224, 224, 3))
x = data_augmentation(inputs)
# EfficientNet kendi preprocess işlemini içinde barındırır ama yine de çağırmak iyidir
# x = tf.keras.applications.efficientnet.preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs, outputs)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()


--- EfficientNetB0 Modeli Hazırlanıyor ---


In [None]:
# ==============================================================================
# 5. EĞİTİM (PHASE 1: Feature Extraction)
# ==============================================================================
print("\n--- Aşama 1: Feature Extraction Eğitimi Başlıyor ---")
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history_1 = model.fit(
    train_ds,
    epochs=10,
    validation_data=val_ds,
    callbacks=[early_stop]
)


--- Aşama 1: Feature Extraction Eğitimi Başlıyor ---
Epoch 1/10
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m548s[0m 480ms/step - accuracy: 0.8960 - loss: 0.2534 - val_accuracy: 0.9907 - val_loss: 0.0422
Epoch 2/10


In [None]:
# ==============================================================================
# 6. EĞİTİM (PHASE 2: Fine-Tuning)
# ==============================================================================
print("\n--- Aşama 2: Fine-Tuning (İnce Ayar) Başlıyor ---")
base_model.trainable = True

# İlk 100 katmanı dondur, gerisini serbest bırak
for layer in base_model.layers[:100]:
    layer.trainable = False

# Çok daha düşük learning rate kullan
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss='binary_crossentropy',
              metrics=['accuracy'])

history_2 = model.fit(
    train_ds,
    epochs=10,
    validation_data=val_ds,
    callbacks=[early_stop]
)

In [None]:
# ==============================================================================
# 7. DEĞERLENDİRME VE METRİKLER
# ==============================================================================
print("\n--- Test Seti Değerlendirmesi ---")

# Doğruluk
loss, accuracy = model.evaluate(test_ds)
print(f"Test Accuracy: {accuracy:.4f}")

# Tahminleri al
y_true = []
y_pred_probs = []

for images, labels in test_ds:
    y_true.extend(labels.numpy().flatten())
    preds = model.predict(images, verbose=0)
    y_pred_probs.extend(preds.flatten())

y_true = np.array(y_true)
y_pred_probs = np.array(y_pred_probs)
y_pred = (y_pred_probs > 0.5).astype(int)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=classes))

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

In [None]:
# ==============================================================================
# 8. HIZ VE BOYUT ANALİZİ (BENCHMARKING)
# ==============================================================================
print("\n--- Verimlilik Analizi (Efficiency Benchmarking) ---")

# 1. Model Boyutu (Keras .h5 formatı)
model.save('content/drive/MyDrive/indoor_outdoor_efficientnet.h5')
file_size = os.path.getsize('content/drive/MyDrive/indoor_outdoor_efficientnet.h5') / (1024 * 1024)
print(f"Orijinal Model Boyutu (H5): {file_size:.2f} MB")

# 2. Inference Süresi (CPU üzerinde simülasyon)
# Tek bir resim üzerinde ortalama tahmin süresi
dummy_input = tf.random.normal([1, 224, 224, 3])
# Isınma turları
for _ in range(10): _ = model(dummy_input)

start_time = time.time()
for _ in range(100):
    _ = model(dummy_input, training=False)
end_time = time.time()
avg_time_ms = ((end_time - start_time) / 100) * 1000
print(f"Ortalama Inference Süresi (CPU): {avg_time_ms:.2f} ms")


In [None]:
# ==============================================================================
# 9. MOBİL OPTİMİZASYON (TFLite + Quantization)
# ==============================================================================
print("\n--- TFLite Dönüştürme ve Quantization ---")

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT] # Boyut küçültme ve hızlandırma
tflite_quant_model = converter.convert()

tflite_path = 'content/drive/MyDrive/indoor_outdoor_quantized.tflite'
with open(tflite_path, 'wb') as f:
    f.write(tflite_quant_model)

tflite_size = os.path.getsize(tflite_path) / (1024 * 1024)
print(f"Optimize Edilmiş TFLite Model Boyutu: {tflite_size:.2f} MB")
print(f"Boyut Kazancı: {file_size / tflite_size:.1f}x kat küçülme!")

print("\n--- İŞLEM TAMAMLANDI! ---")
# Bu dosyaları indirip demoda kullanabilirsin:
# 1. indoor_outdoor_efficientnet.h5 (Tam model)
# 2. indoor_outdoor_quantized.tflite (Mobil model)

# Task
Compress the `dataset_temp` folder into a `.zip` file and save it to Google Drive.

## Sıkıştırılmış Dataset Oluştur

### Subtask:
Oluşturulan `dataset_temp` klasörünü (train, val, test alt klasörleriyle birlikte) `.zip` formatında sıkıştır.


**Reasoning**:
To compress the `dataset_temp` directory into a zip file as requested, I will use the `shutil.make_archive` function, after importing the `shutil` module.



In [None]:
import shutil

archive_name = 'dataset_temp_archive'
root_dir = 'dataset_temp'

print(f"\n--- '{root_dir}' klas\u00f6r\u00fc s\u0131k\u0131\u015ft\u0131r\u0131l\u0131yor ---")
shutil.make_archive(archive_name, 'zip', root_dir)
print(f"'{archive_name}.zip' dosyas\u0131 olu\u015fturuldu.")

## Dataseti Google Drive'a Kaydet

### Subtask:
Sıkıştırılmış `.zip` dosyasını kullanıcının Google Drive'ına kaydet. Kullanıcıdan Drive erişimi için yetkilendirme istenir.


**Reasoning**:
To save the compressed `.zip` file to Google Drive, I first need to import the `drive` object from `google.colab` and then mount Google Drive. This step ensures that the notebook has the necessary permissions and access to the user's Google Drive.



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

print("Google Drive bađlandı.")

Mounted at /content/gdrive
Google Drive bađlandı.


**Reasoning**:
Now that Google Drive is mounted, I will copy the compressed `dataset_temp_archive.zip` file from the Colab environment to the specified folder in Google Drive. This involves defining the source and destination paths and then using `shutil.copy` for the file transfer.



In [None]:
import shutil

source_path = f"./{archive_name}.zip" # archive_name is defined in previous cells
destination_path = f"/content/gdrive/My Drive/{archive_name}.zip"

print(f"--- '{archive_name}.zip' dosyası Google Drive'a kopyalanıyor ---")
shutil.copy(source_path, destination_path)
print(f"'{archive_name}.zip' dosyası '{destination_path}' konumuna kaydedildi.")

--- 'dataset_temp_archive.zip' dosyası Google Drive'a kopyalanıyor ---
'dataset_temp_archive.zip' dosyası '/content/gdrive/My Drive/dataset_temp_archive.zip' konumuna kaydedildi.


## Final Task

### Subtask:
Veri setinin başarıyla Google Drive'a kaydedildiğini onayla.


## Summary:

### Q&A
Yes, the `dataset_temp` dataset was successfully compressed into `dataset_temp_archive.zip` and saved to Google Drive.

### Data Analysis Key Findings
*   The `dataset_temp` folder was successfully compressed into a `.zip` file named `dataset_temp_archive.zip`.
*   Google Drive was successfully mounted within the Colab environment.
*   The `dataset_temp_archive.zip` file was successfully copied to the user's Google Drive at the path `/content/gdrive/My Drive/dataset_temp_archive.zip`.

### Insights or Next Steps
*   The dataset is now securely backed up on Google Drive, providing easy access and version control.
*   The compressed file can be easily shared or transferred for further analysis or deployment.
