In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm
import os
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications import Xception
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint
from sklearn.metrics import classification_report,confusion_matrix
import io
from keras.models import Model
from sklearn.manifold import TSNE
from tensorflow.keras.applications.vgg19 import VGG19
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score, accuracy_score
import random
from sklearn.model_selection import train_test_split

In [None]:
csv = pd.read_csv('/content/drive/Shareddrives/dataton/train_labels.csv', header=None)

In [None]:
numpy_csv = csv.to_numpy()

In [None]:
files = numpy_csv[:,0]
labels = numpy_csv[:,1]

#read new dataset and save as numpy


In [None]:
X_train = []
y_train = []

image_size = 224
folderPath = os.path.join('/content/drive/Shareddrives/dataton/train_imgs') #define the filepath to the directory with images

for i in range(len(files)):
  print(os.path.join(folderPath,files[i]))
  img = plt.imread(os.path.join(folderPath,files[i]))
  img = cv2.resize(img,(image_size, image_size))
  X_train.append(img)
  y_train.append(labels[i])

In [None]:
X_train = np.array(X_train)
y_train = np.array(y_train)

In [None]:
BASE_DIR = '/content/drive/Shareddrives/dataton/' #Choose directory to save dataset
run_save = True
if run_save:
    with open(BASE_DIR + "train_set.npy", 'wb') as f:
      np.save(f, X_train)
      np.save(f, y_train)

# download preloaded dataset

In [46]:
BASE_DIR = '/content/drive/Shareddrives/dataton/' #filepath to preloaded dataset
run_save = True

with open(BASE_DIR + "train_set_dontchange.npy", 'rb') as f:
    X_train = np.load(f, allow_pickle=True)
    y_train = np.load(f, allow_pickle=True)

y_train = y_train.reshape((-1,1))

#split_data

In [47]:
train_X, val_X, train_y, val_y = train_test_split(X_train, y_train, train_size=0.88, shuffle=True, random_state=7)

print(train_X.shape)
print(val_X.shape)

(2304, 224, 224, 3)
(315, 224, 224, 3)


# Augment

In [48]:
datagen = ImageDataGenerator(rotation_range = 30, 
                             zoom_range = 0.3, 
                             #width_shift_range=0.15, 
                             #height_shift_range=0.15,
                             shear_range = 0.2,
                             horizontal_flip=True,
                             vertical_flip=False,
                             brightness_range = [0.5,1.5],
                             fill_mode = 'wrap')

In [49]:
idx = np.where(train_y == 0)[0]
choices = random.choices(idx, k=500)


aug_iter = datagen.flow(train_X[choices], train_y[choices], batch_size=50)

for i in range(10):
  aug_img = next(aug_iter)
  train_X = np.append(train_X, aug_img[0], axis=0)
  train_y = np.append(train_y, aug_img[1], axis=0)

print(train_X.shape)

(2804, 224, 224, 3)


#Model

In [52]:
def get_model():
  effnet = EfficientNetB0(weights='imagenet',include_top=False,input_shape=(224 ,224 ,3))
  model = effnet.output
  model = tf.keras.layers.GlobalMaxPooling2D()(model)
  model = tf.keras.layers.Dropout(rate=0.5)(model)
  model = tf.keras.layers.Dense(1,activation='sigmoid')(model)
  model = tf.keras.models.Model(inputs=effnet.input, outputs = model)
  model.compile(loss='binary_crossentropy',optimizer = 'Adam', metrics= ['accuracy'])
  return model

In [53]:
tensorboard = TensorBoard(log_dir = 'logs')
#choose directory to save best model
checkpoint = ModelCheckpoint("/content/drive/MyDrive/hackathon/effnet.h5",monitor="val_accuracy",save_best_only=True,mode="auto",verbose=1) 
reduce_lr = ReduceLROnPlateau(monitor = 'val_accuracy', factor = 0.3, patience = 2, min_delta = 0.001,
                              mode='auto',verbose=1)

In [54]:
model = get_model()

train_X, train_y = shuffle(train_X, train_y, random_state=101)

history = model.fit(train_X, train_y, validation_data = (val_X, val_y), epochs = 12, verbose=1, batch_size=32,
                   callbacks=[tensorboard,checkpoint,reduce_lr])

Epoch 1/12
Epoch 1: val_accuracy improved from -inf to 0.98095, saving model to /content/drive/MyDrive/hackathon/effnet.h5
Epoch 2/12
Epoch 2: val_accuracy improved from 0.98095 to 0.99048, saving model to /content/drive/MyDrive/hackathon/effnet.h5
Epoch 3/12
Epoch 3: val_accuracy improved from 0.99048 to 0.99365, saving model to /content/drive/MyDrive/hackathon/effnet.h5
Epoch 4/12
Epoch 4: val_accuracy did not improve from 0.99365
Epoch 5/12
Epoch 5: val_accuracy did not improve from 0.99365

Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.
Epoch 6/12
Epoch 6: val_accuracy did not improve from 0.99365
Epoch 7/12
Epoch 7: val_accuracy did not improve from 0.99365

Epoch 7: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
Epoch 8/12
Epoch 8: val_accuracy did not improve from 0.99365
Epoch 9/12
Epoch 9: val_accuracy did not improve from 0.99365

Epoch 9: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.
Epoch 10/12
Epoch 10: 

In [56]:
model=tf.keras.models.load_model("/content/drive/Shareddrives/dataton/final_effnet.h5")

In [62]:
model.evaluate(val_X, val_y)



[0.04100771248340607, 0.9936507940292358]

In [63]:
pre=model.predict(val_X)



In [64]:
pre=[int(i>0.5) for i in pre]

In [66]:
f1_score(val_y, pre, average=None)

array([0.99280576, 0.99431818])

In [67]:
confusion_matrix(val_y,pre)

array([[138,   1],
       [  1, 175]])

#write predictions to csv

In [69]:
%cd /content/drive/Shareddrives/dataton/test_imgs/ 

/content/drive/Shareddrives/dataton/test_imgs


In [70]:
photos = !ls

In [72]:
names=[]
for i in photos:
  n=i.split()
  names+=n

In [74]:
image_size = 224
folderPath = os.path.join('/content/drive/Shareddrives/dataton/test_imgs') #folder with test images

with open('/content/drive/Shareddrives/dataton/test_labels.csv', 'w') as f:

  for i in range(len(names)):
    img = plt.imread(os.path.join(folderPath,names[i]))
    img = cv2.resize(img,(image_size, image_size))
    labell=model.predict(np.expand_dims(img,0), verbose=0)
    print("processing image",i+1)
    labell=int(labell>0.5)
    f.write(names[i]+','+str(labell)+'\n')

processing image 1
processing image 2
processing image 3
processing image 4
processing image 5
processing image 6
processing image 7
processing image 8
processing image 9
processing image 10
processing image 11
processing image 12
processing image 13
processing image 14
processing image 15
processing image 16
processing image 17
processing image 18
processing image 19
processing image 20
processing image 21
processing image 22
processing image 23
processing image 24
processing image 25
processing image 26
processing image 27
processing image 28
processing image 29
processing image 30
processing image 31
processing image 32
processing image 33
processing image 34
processing image 35
processing image 36
processing image 37
processing image 38
processing image 39
processing image 40
processing image 41
processing image 42
processing image 43
processing image 44
processing image 45
processing image 46
processing image 47
processing image 48
processing image 49
processing image 50
processin