# Imports

In [6]:
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from collections import Counter

# Labeling

In [7]:
data_dir = "data"
categories = ["cloudy", "desert", "green_area", "water"]

In [8]:
images = []
labels = []

In [9]:
for category in categories:
    path = os.path.join(data_dir, category)
    # Convert label to numbers
    label = categories.index(category)
    
    for img_name in os.listdir(path):
        img_path = os.path.join(path, img_name)
        img = tf.keras.preprocessing.image.load_img(img_path, target_size=(256, 256))
        img = tf.keras.preprocessing.image.img_to_array(img)
        images.append(img)
        labels.append(label)

In [10]:
images = np.array(images, dtype="float32")
labels = np.array(labels)

In [11]:
images = images / 255.0

In [12]:
print(images.shape)
print(labels.shape)
print(images)
print(labels)
print(categories)

(5631, 256, 256, 3)
(5631,)
[[[[0.5921569  0.56078434 0.46666667]
   [0.5921569  0.5686275  0.47058824]
   [0.59607846 0.5803922  0.47843137]
   ...
   [0.8039216  0.78431374 0.70980394]
   [0.80784315 0.7882353  0.7137255 ]
   [0.80784315 0.7882353  0.7137255 ]]

  [[0.5921569  0.5647059  0.47058824]
   [0.5921569  0.57254905 0.47058824]
   [0.59607846 0.58431375 0.48235294]
   ...
   [0.8039216  0.78431374 0.7058824 ]
   [0.8039216  0.78431374 0.70980394]
   [0.80784315 0.7882353  0.7137255 ]]

  [[0.5921569  0.5686275  0.4745098 ]
   [0.5921569  0.5764706  0.47843137]
   [0.59607846 0.58431375 0.4862745 ]
   ...
   [0.8        0.78039217 0.7058824 ]
   [0.8039216  0.78431374 0.7058824 ]
   [0.8039216  0.78431374 0.70980394]]

  ...

  [[0.8862745  0.8039216  0.80784315]
   [0.8862745  0.8039216  0.80784315]
   [0.8862745  0.8039216  0.80784315]
   ...
   [0.5803922  0.59607846 0.49803922]
   [0.58431375 0.59607846 0.49803922]
   [0.5882353  0.59607846 0.49803922]]

  [[0.8862745  0.

# Splitting the data

In [13]:
X_train, X_temp, y_train, y_temp = train_test_split(images, labels, test_size=0.3, random_state=42, stratify=labels)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

In [14]:
print("Training label distribution:", Counter(y_train))
print("Validation label distribution:", Counter(y_val))
print("Test label distribution:", Counter(y_test))

Training label distribution: Counter({2: 1050, 0: 1050, 3: 1050, 1: 791})
Validation label distribution: Counter({0: 225, 2: 225, 3: 225, 1: 170})
Test label distribution: Counter({2: 225, 3: 225, 0: 225, 1: 170})


In [15]:
buffer_size = 256
batch_size = 32

# Create the tf.data.Dataset objects
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

# Shuffle, batch, and prefetch the datasets
train_dataset = train_dataset.shuffle(buffer_size).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

# Model

In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)),
    tf.keras.layers.MaxPooling2D(),
    
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 127, 127, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 125, 125, 64)      18496     
                                                                 
 conv2d_2 (Conv2D)           (None, 123, 123, 64)      36928     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 61, 61, 64)       0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 59, 59, 128)       7

# Compile

In [13]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'best_model.h5',
    monitor='val_loss',
    save_best_only=True,
    mode='min'
)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

model.compile(
    optimizer=optimizer, 
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# Fit

In [14]:
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=300,
    callbacks=[early_stopping, checkpoint],
    verbose=1
)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

# Evaluate and Predict

In [16]:
model = tf.keras.models.load_model('best_model.h5')

In [17]:
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test accuracy: {test_accuracy}")

Test accuracy: 0.9786982536315918


In [18]:
predictions = model.predict(test_dataset)

# If you want to convert predictions back to class labels:
predicted_classes = tf.argmax(predictions, axis=1)

# To see the actual predictions and compare with true labels
for i, prediction in enumerate(predicted_classes):
    print(f"Prediction: {categories[prediction]}, True Label: {categories[y_test[i]]}")

Prediction: green_area, True Label: green_area
Prediction: green_area, True Label: green_area
Prediction: water, True Label: water
Prediction: water, True Label: water
Prediction: cloudy, True Label: cloudy
Prediction: desert, True Label: desert
Prediction: desert, True Label: desert
Prediction: desert, True Label: desert
Prediction: cloudy, True Label: cloudy
Prediction: desert, True Label: desert
Prediction: cloudy, True Label: cloudy
Prediction: water, True Label: water
Prediction: desert, True Label: desert
Prediction: cloudy, True Label: cloudy
Prediction: cloudy, True Label: cloudy
Prediction: cloudy, True Label: cloudy
Prediction: water, True Label: water
Prediction: desert, True Label: desert
Prediction: desert, True Label: desert
Prediction: cloudy, True Label: cloudy
Prediction: cloudy, True Label: cloudy
Prediction: desert, True Label: desert
Prediction: green_area, True Label: green_area
Prediction: green_area, True Label: green_area
Prediction: green_area, True Label: gree