In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow.keras import layers, optimizers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import Callback, ModelCheckpoint
from tensorflow.keras.applications import VGG19
import cv2
from torchvision import transforms
from PIL import Image

import zipfile
import os

In [2]:
zip_file = '/kaggle/input/platesv2/plates.zip'
destination_directory = '/kaggle/working/'
with zipfile.ZipFile(zip_file, 'r') as zip_obj:
    zip_obj.extractall(destination_directory)

In [3]:
def draw_samples(image_files):
    fig, axes = plt.subplots(4, 5, figsize=(15, 15))
    axes = axes.flatten()
    for i, image_file in enumerate(image_files[:4 * 5]):
        img = mpimg.imread(image_file)
        ax = axes[i]
        ax.imshow(img)
        ax.set_title("Cleaned" if i < 10 else "Dirty")
        ax.axis('off')
    plt.tight_layout()
    plt.show()

## Image Processing

In [4]:
cleaned_path = '/kaggle/working/plates/train/cleaned/'
dirty_path = '/kaggle/working/plates/train/dirty/'
test_path = '/kaggle/working/plates/test/'

image_files_cleaned = [os.path.join(cleaned_path, f) for f in os.listdir(cleaned_path) if f.endswith(('jpg', 'png', 'jpeg'))]
image_files_dirty = [os.path.join(dirty_path, f) for f in os.listdir(dirty_path) if f.endswith(('jpg', 'png', 'jpeg'))]
image_test_files = [os.path.join(test_path, f) for f in os.listdir(test_path) if f.endswith(('jpg', 'png', 'jpeg'))]

In [5]:
processed_cleaned_path = '/kaggle/input/cleaned-vs-dirty-plates-images/plates/train_processed/cleaned'
processed_dirty_path = '/kaggle/input/cleaned-vs-dirty-plates-images/plates/train_processed/dirty'
processed_test_path = '/kaggle/input/cleaned-vs-dirty-plates-images/plates/test_processed'

image_files_processed_cleaned = [os.path.join(processed_cleaned_path, f) for f in os.listdir(processed_cleaned_path) if f.endswith(('jpg', 'png', 'jpeg'))]
image_files_processed_dirty = [os.path.join(processed_dirty_path, f) for f in os.listdir(processed_dirty_path) if f.endswith(('jpg', 'png', 'jpeg'))]
image_files_processed_test = [os.path.join(processed_test_path, f) for f in os.listdir(processed_test_path) if f.endswith(('jpg', 'png', 'jpeg'))]

In [6]:
X_train = []
y_train = []

def load_images(image_files, label):
    for filename in image_files:
        img = cv2.imread(filename)
        img = cv2.resize(img, (224, 224))
        X_train.append(img)
        y_train.append(label)
        
load_images(image_files_processed_cleaned, 0)
load_images(image_files_processed_dirty, 1)

X_train = np.array(X_train)
y_train = np.array(y_train)

X_train, y_train = shuffle(X_train, y_train)

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(y_train)

X_train shape: (40, 224, 224, 3)
y_train shape: (40,)
[0 1 1 1 1 1 0 1 0 0 1 1 0 0 1 1 1 0 1 0 0 1 1 0 0 0 1 1 0 0 1 0 0 0 0 1 0
 1 1 0]


In [7]:
X_test = []
for filename in image_files_processed_test:
    img = cv2.imread(filename)
    img = cv2.resize(img, (224, 224))
    X_test.append(img)
X_test = np.array(X_test)

print(f"X_test shape: {X_test.shape}")

X_test shape: (744, 224, 224, 3)


## Train

In [8]:
IMG_SIZE = 224
BATCH_SIZE = 5
EPOCHS = 30

In [9]:
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True
    )
train_gen = datagen.flow(X_train, y_train, batch_size=BATCH_SIZE)

test_datagen = ImageDataGenerator()
test_gen = test_datagen.flow(X_test, batch_size=BATCH_SIZE, shuffle=False)

In [10]:
input_tensor = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

pretrained = VGG19(weights='imagenet', include_top=False)
pretrained.trainable = False

x = pretrained(input_tensor, training=False)

x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(1024, activation='relu')(x)
output = layers.Dense(1, activation='sigmoid')(x)

model = Model(inputs=input_tensor, outputs=output)

optimizer = optimizers.Adam(learning_rate=0.001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m80134624/80134624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [11]:
history = model.fit(train_gen, epochs=EPOCHS)

Epoch 1/30


I0000 00:00:1742842163.044076      68 service.cc:145] XLA service 0x7b01ec006260 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1742842163.044141      68 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1742842163.044145      68 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5


[1m5/8[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m0s[0m 35ms/step - accuracy: 0.5080 - loss: 2.8835

I0000 00:00:1742842167.862807      68 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 37ms/step - accuracy: 0.5185 - loss: 3.4434
Epoch 2/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.6493 - loss: 2.8910
Epoch 3/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - accuracy: 0.5971 - loss: 2.3891
Epoch 4/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - accuracy: 0.9129 - loss: 0.5100
Epoch 5/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - accuracy: 1.0000 - loss: 0.0270
Epoch 6/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.8734 - loss: 0.3408
Epoch 7/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - accuracy: 1.0000 - loss: 0.0136
Epoch 8/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - accuracy: 1.0000 - loss: 0.0634
Epoch 9/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36

In [12]:
predictions = model.predict(test_gen)

[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 55ms/step


In [13]:
submission = pd.DataFrame({'id': [n for n in range(len(predictions))], 'label': ['dirty' if x > 0.5 else 'cleaned' for x in predictions]})
print(submission['label'].value_counts())
submission.to_csv('/kaggle/working/submission.csv', index=False)

label
dirty      618
cleaned    126
Name: count, dtype: int64
