In [23]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import confusion_matrix, classification_report

In [24]:
positive_dir = Path('/eduTech-Hackathon/archive/train/Positive')
negative_dir = Path('/eduTech-Hackathon/archive/train/Negative')

In [25]:
positive_dir_val = Path('/eduTech-Hackathon/archive/valid/Positive')
negative_dir_val = Path('/eduTech-Hackathon/archive/valid/Negative')
positive_dir_test = Path('/eduTech-Hackathon/archive/test/Positive')
negative_dir_test= Path('/eduTech-Hackathon/archive/test/Negative')


In [26]:
dir_pred = Path('/eduTech-Hackathon/archive/predict')
filepaths = pd.Series(list(dir_pred.glob(r'*.jpg')), name='Filepath').astype(str)
df_predict=pd.DataFrame(filepaths)
df_predict

Unnamed: 0,Filepath
0,\eduTech-Hackathon\archive\predict\IMG_1129.JPG
1,\eduTech-Hackathon\archive\predict\IMG_1130.JPG
2,\eduTech-Hackathon\archive\predict\IMG_1131.JPG
3,\eduTech-Hackathon\archive\predict\IMG_1132.JPG
4,\eduTech-Hackathon\archive\predict\IMG_1133.JPG
5,\eduTech-Hackathon\archive\predict\IMG_1134.JPG


In [27]:
def generate_df(image_dir, label):
    filepaths = pd.Series(list(image_dir.glob(r'*.jpg')), name='Filepath').astype(str)
    labels = pd.Series(label, name='Label', index=filepaths.index)
    df = pd.concat([filepaths, labels], axis=1)
    return df

In [28]:
positive_df = generate_df(positive_dir, label="POSITIVE")
negative_df = generate_df(negative_dir, label="NEGATIVE")

train_df = pd.concat([positive_df, negative_df], axis=0).sample(frac=1.0, random_state=101).reset_index(drop=True)
train_df

Unnamed: 0,Filepath,Label
0,\eduTech-Hackathon\archive\train\Positive\IMG_...,POSITIVE
1,\eduTech-Hackathon\archive\train\Negative\IMG_...,NEGATIVE
2,\eduTech-Hackathon\archive\train\Negative\IMG_...,NEGATIVE
3,\eduTech-Hackathon\archive\train\Negative\IMG_...,NEGATIVE
4,\eduTech-Hackathon\archive\train\Positive\IMG_...,POSITIVE
...,...,...
595,\eduTech-Hackathon\archive\train\Negative\IMG_...,NEGATIVE
596,\eduTech-Hackathon\archive\train\Positive\IMG_...,POSITIVE
597,\eduTech-Hackathon\archive\train\Negative\IMG_...,NEGATIVE
598,\eduTech-Hackathon\archive\train\Negative\IMG_...,NEGATIVE


In [29]:
positive_df = generate_df(positive_dir_val, label="POSITIVE")
negative_df = generate_df(negative_dir_val, label="NEGATIVE")

val_df = pd.concat([positive_df, negative_df], axis=0).sample(frac=1.0, random_state=101).reset_index(drop=True)
val_df

Unnamed: 0,Filepath,Label
0,\eduTech-Hackathon\archive\valid\Positive\IMG_...,POSITIVE
1,\eduTech-Hackathon\archive\valid\Negative\IMG_...,NEGATIVE
2,\eduTech-Hackathon\archive\valid\Positive\IMG_...,POSITIVE
3,\eduTech-Hackathon\archive\valid\Positive\IMG_...,POSITIVE
4,\eduTech-Hackathon\archive\valid\Positive\IMG_...,POSITIVE
...,...,...
195,\eduTech-Hackathon\archive\valid\Positive\IMG_...,POSITIVE
196,\eduTech-Hackathon\archive\valid\Positive\IMG_...,POSITIVE
197,\eduTech-Hackathon\archive\valid\Positive\IMG_...,POSITIVE
198,\eduTech-Hackathon\archive\valid\Positive\IMG_...,POSITIVE


In [30]:
positive_df = generate_df(positive_dir_test, label="POSITIVE")
negative_df = generate_df(negative_dir_test, label="NEGATIVE")

test_df = pd.concat([positive_df, negative_df], axis=0).sample(frac=1.0, random_state=101).reset_index(drop=True)
test_df

Unnamed: 0,Filepath,Label
0,\eduTech-Hackathon\archive\test\Positive\DJI_0...,POSITIVE
1,\eduTech-Hackathon\archive\test\Negative\DJI_0...,NEGATIVE
2,\eduTech-Hackathon\archive\test\Positive\DJI_0...,POSITIVE
3,\eduTech-Hackathon\archive\test\Positive\DJI_0...,POSITIVE
4,\eduTech-Hackathon\archive\test\Positive\DJI_0...,POSITIVE
...,...,...
195,\eduTech-Hackathon\archive\test\Positive\DJI_0...,POSITIVE
196,\eduTech-Hackathon\archive\test\Positive\DJI_0...,POSITIVE
197,\eduTech-Hackathon\archive\test\Positive\DJI_0...,POSITIVE
198,\eduTech-Hackathon\archive\test\Positive\DJI_0...,POSITIVE


In [31]:
train_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

val_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

test_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

pred_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

In [32]:
train_data = train_gen.flow_from_dataframe(
    train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='binary',
    batch_size=32,
    shuffle=True,
    seed=42
)

val_data = val_gen.flow_from_dataframe(
    val_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='binary',
    batch_size=32,
    shuffle=True,
    seed=42
)

test_data = test_gen.flow_from_dataframe(
    test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='binary',
    batch_size=32,
    shuffle=False,
    seed=42
)

Found 600 validated image filenames belonging to 2 classes.
Found 200 validated image filenames belonging to 2 classes.
Found 200 validated image filenames belonging to 2 classes.


In [33]:
inputs = tf.keras.Input(shape=(120, 120, 3))
x = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D(pool_size=(2, 2))(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D(pool_size=(2, 2))(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print(model.summary())

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 120, 120, 3)]     0         
                                                                 
 conv2d_2 (Conv2D)           (None, 118, 118, 16)      448       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 59, 59, 16)       0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 57, 57, 32)        4640      
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 28, 28, 32)       0         
 2D)                                                             
                                                                 
 global_average_pooling2d_1   (None, 32)               0   

In [34]:
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=50,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )
    ]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50


In [36]:
y_pred=model.predict(test_data)



In [37]:
y_pred = np.squeeze((model.predict(test_data) >= 0.5).astype(np.int))



Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_pred = np.squeeze((model.predict(test_data) >= 0.5).astype(np.int))


In [43]:
test_df['Label']

0      POSITIVE
1      NEGATIVE
2      POSITIVE
3      POSITIVE
4      POSITIVE
         ...   
195    POSITIVE
196    POSITIVE
197    POSITIVE
198    POSITIVE
199    POSITIVE
Name: Label, Length: 200, dtype: object

In [55]:
y_test = []
for label in test_df['Label']:
    if label == 'POSITIVE':
        y_test.append(1)
    else:
        y_test.append(0)

In [56]:
len(y_pred), len(y_test)

(200, 200)

In [57]:
from sklearn.metrics import accuracy_score
accuracy_score(y_pred, y_test)

0.74

In [58]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.66      1.00      0.79       100
           1       1.00      0.48      0.65       100

    accuracy                           0.74       200
   macro avg       0.83      0.74      0.72       200
weighted avg       0.83      0.74      0.72       200

