In [1]:
import os
import pandas as pd

BASE_PATH = "/kaggle/input/aws-recruitment1/weather classification/dataset"

CLASSES = ["cloudy", "foggy", "rainy", "shine", "sunrise"]

image_paths = []
labels = []

for cls in CLASSES:
    cls_path = os.path.join(BASE_PATH, cls)
    for img in os.listdir(cls_path):
        image_paths.append(os.path.join(cls_path, img))
        labels.append(cls)

df = pd.DataFrame({
    "image_path": image_paths,
    "label": labels
})

print(df.head())
print("\nTotal images:", len(df))


                                          image_path   label
0  /kaggle/input/aws-recruitment1/weather classif...  cloudy
1  /kaggle/input/aws-recruitment1/weather classif...  cloudy
2  /kaggle/input/aws-recruitment1/weather classif...  cloudy
3  /kaggle/input/aws-recruitment1/weather classif...  cloudy
4  /kaggle/input/aws-recruitment1/weather classif...  cloudy

Total images: 1500


In [2]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df["label"],
    random_state=42
)

print("Train size:", len(train_df))
print("Validation size:", len(val_df))


Train size: 1200
Validation size: 300


In [3]:
import tensorflow as tf

IMG_SIZE = 224
BATCH_SIZE = 16



2026-02-10 19:28:51.542376: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770751731.772974      17 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1770751731.842603      17 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1770751732.406442      17 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770751732.406520      17 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770751732.406524      17 computation_placer.cc:177] computation placer alr

In [4]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

train_augmentation = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255),
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.15),
    tf.keras.layers.RandomZoom(0.2),
    tf.keras.layers.RandomContrast(0.2),
])


2026-02-10 19:29:06.579572: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [5]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
label_encoder.fit(train_df["label"])

train_df["label_enc"] = label_encoder.transform(train_df["label"])
val_df["label_enc"] = label_encoder.transform(val_df["label"])

NUM_CLASSES = len(label_encoder.classes_)
print("Classes:", label_encoder.classes_)


Classes: ['cloudy' 'foggy' 'rainy' 'shine' 'sunrise']


In [6]:
train_ds = tf.data.Dataset.from_tensor_slices(
    (train_df["image_path"].values, train_df["label_enc"].values)
)

val_ds = tf.data.Dataset.from_tensor_slices(
    (val_df["image_path"].values, val_df["label_enc"].values)
)


In [7]:
import tensorflow as tf

def load_image(path, label):
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    img.set_shape((IMG_SIZE, IMG_SIZE, 3))
    return img, label


In [8]:
import tensorflow as tf

val_preprocessing = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255)
])


In [9]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = (
    tf.data.Dataset.from_tensor_slices(
        (train_df["image_path"].values, train_df["label_enc"].values)
    )
    .map(lambda x, y: load_image(x, y), num_parallel_calls=AUTOTUNE)
    .map(lambda x, y: (train_augmentation(x, training=True),
                       tf.one_hot(y, NUM_CLASSES)))
    .shuffle(512)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)

val_ds = (
    tf.data.Dataset.from_tensor_slices(
        (val_df["image_path"].values, val_df["label_enc"].values)
    )
    .map(lambda x, y: load_image(x, y), num_parallel_calls=AUTOTUNE)
    .map(lambda x, y: (val_preprocessing(x, training=False),
                       tf.one_hot(y, NUM_CLASSES)))
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)


In [10]:
AUTOTUNE = tf.data.AUTOTUNE

val_ds = (
    tf.data.Dataset.from_tensor_slices(
        (val_df["image_path"].values, val_df["label_enc"].values)
    )
    .map(lambda x, y: load_image(x, y), num_parallel_calls=AUTOTUNE)
    .map(lambda x, y: (val_preprocessing(x, training=False),
                       tf.one_hot(y, NUM_CLASSES)))
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)


In [11]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model


In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Input,
    Conv2D, MaxPooling2D,
    BatchNormalization,
    Dense, Dropout, Flatten
)

model = Sequential([
    Input(shape=(IMG_SIZE, IMG_SIZE, 3)),

    Conv2D(32, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(128, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(NUM_CLASSES, activation='softmax')
])


In [13]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [14]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

callbacks = [
    EarlyStopping(
        monitor="val_loss",
        patience=4,
        restore_best_weights=True
    ),
    ReduceLROnPlateau(
        monitor="val_loss",
        patience=2,
        factor=0.3,
        min_lr=1e-5
    )
]


In [15]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=25,
    callbacks=callbacks
)


Epoch 1/25
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 2s/step - accuracy: 0.5519 - loss: 28.5408 - val_accuracy: 0.4167 - val_loss: 14.5484 - learning_rate: 0.0010
Epoch 2/25
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 2s/step - accuracy: 0.6411 - loss: 22.9489 - val_accuracy: 0.4533 - val_loss: 12.1239 - learning_rate: 0.0010
Epoch 3/25
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 2s/step - accuracy: 0.6295 - loss: 19.9541 - val_accuracy: 0.3633 - val_loss: 35.5113 - learning_rate: 0.0010
Epoch 4/25
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 2s/step - accuracy: 0.6241 - loss: 13.7324 - val_accuracy: 0.3933 - val_loss: 12.5212 - learning_rate: 0.0010
Epoch 5/25
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 2s/step - accuracy: 0.6324 - loss: 12.0347 - val_accuracy: 0.6000 - val_loss: 3.6702 - learning_rate: 3.0000e-04
Epoch 6/25
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [16]:
model.save("weather_cnn_final.h5")
print("✅ Model saved")




✅ Model saved


In [17]:
import numpy as np
import os
import tensorflow as tf

TEST_DIR = "/kaggle/input/aws-recruitment1/weather classification/dataset/alien_test"

test_images = []
image_names = []

for img in os.listdir(TEST_DIR):
    img_path = os.path.join(TEST_DIR, img)
    image = tf.io.read_file(img_path)
    image = tf.io.decode_image(image, channels=3, expand_animations=False)
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = image / 255.0
    
    test_images.append(image)
    image_names.append(img)

test_images = tf.stack(test_images)


In [18]:
pred = model.predict(test_images)
pred_labels = np.argmax(pred, axis=1)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 679ms/step


In [19]:
label_map = dict(enumerate(label_encoder.classes_))
pred_class_names = [label_map[i] for i in pred_labels]


In [20]:
# Convert numeric predictions back to class names
label_map = dict(enumerate(label_encoder.classes_))
pred_class_names = [label_map[i] for i in pred_labels]

print(pred_class_names[:10])  # quick sanity check


['foggy', 'foggy', 'rainy', 'shine', 'sunrise', 'rainy', 'shine', 'foggy', 'sunrise', 'foggy']


In [21]:
import pandas as pd
import numpy as np

submission = pd.DataFrame({
    "id": np.arange(1, len(pred_labels) + 1),
    "labels": pred_labels
})

# VERY IMPORTANT: index=False
submission.to_csv("submission.csv", index=False)


In [22]:
submission.head()


Unnamed: 0,id,labels
0,1,1
1,2,1
2,3,2
3,4,3
4,5,4


In [23]:
import pandas as pd

test_df = pd.read_csv(
    "/kaggle/input/aws-recruitment1/weather classification/dataset/test.csv"
)

print(test_df.columns)
print(test_df.head())

Index(['id', 'Image_id', 'labels'], dtype='object')
   id      Image_id  labels
0   1   Cloud_1.png       0
1   2   Cloud_2.jpg       0
2   3  Cloud_3.jpeg       0
3   4   Cloud_4.jpg       0
4   5   foggy_1.jpg       1


In [24]:
import pandas as pd

test_df = pd.read_csv(
    "/kaggle/input/aws-recruitment1/weather classification/dataset/test.csv"
)

In [25]:
import tensorflow as tf
import numpy as np
import os

BASE_PATH = "/kaggle/input/aws-recruitment1/weather classification/dataset"
TEST_DIR = os.path.join(BASE_PATH, "alien_test")

test_images = []

for img_name in test_df["Image_id"]:
    img_path = os.path.join(TEST_DIR, img_name)
    
    image = tf.io.read_file(img_path)
    image = tf.io.decode_image(image, channels=3, expand_animations=False)
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = image / 255.0
    
    test_images.append(image)

test_images = tf.stack(test_images)

In [26]:
pred = model.predict(test_images)
pred_labels = np.argmax(pred, axis=1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 587ms/step


In [27]:
submission = pd.DataFrame({
    "id": test_df["id"],
    "labels": pred_labels
})

submission.to_csv("submission.csv", index=False)
print("✅ submission.csv correctly aligned and created")

✅ submission.csv correctly aligned and created


In [28]:
submission.head()

Unnamed: 0,id,labels
0,1,1
1,2,0
2,3,3
3,4,3
4,5,1
