In [1]:
import numpy as np
import pandas as pd
import sklearn
import tensorflow as tf


In [2]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [4]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras


train = pd.read_csv("data/train.csv") 
test  = pd.read_csv("data/test.csv")  

train['file_name'] = train['file_name'].apply(lambda x: os.path.join('data', x))
if 'file_name' in test.columns:
    test['file_name'] = test['file_name'].apply(lambda x: os.path.join('data', x))
else:
    # test CSV has column 'id' instead
    test['file_name'] = test['id'].apply(lambda x: os.path.join('data', 'test_data_v2', x))


train_df, val_df = train_test_split(
    train, 
    test_size=0.2, 
    random_state=42, 
    stratify=train['label']
)

def create_generators(train_df, val_df, test_df):
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    test_val_datagen = ImageDataGenerator(rescale=1./255)

    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col='file_name',
        y_col='label',
        target_size=(224, 224),
        class_mode='raw',
        batch_size=32,
        shuffle=True
    )
    val_generator = test_val_datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col='file_name',
        y_col='label',
        target_size=(224, 224),
        class_mode='raw',
        batch_size=32,
        shuffle=False
    )
    test_generator = test_val_datagen.flow_from_dataframe(
        dataframe=test_df,
        x_col='file_name',
        y_col=None,
        target_size=(224, 224),
        class_mode=None,
        batch_size=32,
        shuffle=False
    )

    return train_generator, val_generator, test_generator

train_gen, val_gen, test_gen = create_generators(train_df, val_df, test)

Found 63960 validated image filenames.
Found 15990 validated image filenames.
Found 0 validated image filenames.




In [5]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# trying a mobilenet for transfer learning, because the dataset is so large

def create_mobilenet_model():
    base_model = MobileNetV2(
        input_shape=(224, 224, 3),
        include_top=False,
        weights="imagenet"
    )
    
    base_model.trainable = False    

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.2)(x)               
    x = Dense(128, activation="relu")(x)
    x = Dropout(0.2)(x)
    output = Dense(1, activation="sigmoid")(x)

    model = Model(inputs=base_model.input, outputs=output)

    model.compile(
        optimizer=Adam(learning_rate=0.0005),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    
    return model

model = create_mobilenet_model()
model.summary()

EPOCHS = 8  
history = model.fit(
    train_gen,
    epochs=EPOCHS,
    validation_data=val_gen,
    verbose=1
)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


Epoch 1/8
[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m362s[0m 180ms/step - accuracy: 0.8408 - loss: 0.3800 - val_accuracy: 0.8862 - val_loss: 0.2737
Epoch 2/8
[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m354s[0m 177ms/step - accuracy: 0.8648 - loss: 0.3179 - val_accuracy: 0.8918 - val_loss: 0.2630
Epoch 3/8
[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m351s[0m 175ms/step - accuracy: 0.8714 - loss: 0.3051 - val_accuracy: 0.8909 - val_loss: 0.2673
Epoch 4/8
[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m355s[0m 178ms/step - accuracy: 0.8697 - loss: 0.3079 - val_accuracy: 0.8894 - val_loss: 0.2688
Epoch 5/8
[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m354s[0m 177ms/step - accuracy: 0.8714 - loss: 0.3043 - val_accuracy: 0.8891 - val_loss: 0.2684
Epoch 6/8
[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m353s[0m 177ms/step - accuracy: 0.8695 - loss: 0.3051 - val_accuracy: 0.8915 - val_loss: 0.261

In [6]:
val_loss, val_acc = model.evaluate(val_gen, verbose=0)
print('Initial validation loss', val_loss)
print('Initial validation accuracy', val_acc)



Initial validation loss 0.2668425142765045
Initial validation accuracy 0.8909943699836731
