# Mosquito Detection

## Imports

In [40]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf
import pathlib

from sklearn.model_selection import train_test_split


from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from keras.preprocessing.image import ImageDataGenerator

#import opencv2

## Data import and preprocessing

In [41]:
#mosquito dataset
mosquito_arrays = []

mosquito_path = './data/mosquitoes/'

# convert each image to normalized array and store
for file in os.listdir(mosquito_path):
    try:
        mos = load_img(mosquito_path + file, target_size=(256, 256))
        mosquito_array = img_to_array(mos) / 255
        mosquito_arrays.append(mosquito_array)
    except:
        print(f'Error for file: {file}')

print(f'{len(mosquito_arrays)} pictures converted.')

245 pictures converted.


In [42]:
#dataset of other bugs
other_arrays = []
other_path = './data/other/'

# convert each image to normalized array and store
for file in os.listdir(other_path):
    try:
        oth = load_img(other_path + file, target_size=(256, 256))
        other_array = img_to_array(oth) / 255
        other_arrays.append(other_array)
    except:
        print(f'Error for file: {file}')

print(f'{len(other_arrays)} pictures converted.')

Error for file: .DS_Store
274 pictures converted.


In [43]:
# X contains mosquitos and other
X = mosquito_arrays + other_arrays

X_arr = np.array(X)
print(f'X shape: {X_arr.shape}')

# 1 for mosquito, 0 for other
y = [1] * 245 + [0] * 274
# convert to array and check shape
y = np.array(y)
print(f'y shape: {y.shape}')

X shape: (519, 256, 256, 3)
y shape: (519,)


## Modelling

### Base model

In [44]:
#train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [45]:
#convert to np array
X_train = np.array(X_train)
X_test = np.array(X_test)

In [46]:
#cnn base model
model1 = Sequential()

model1.add(Conv2D(64, (3,3), activation = 'relu', input_shape = (256, 256, 3)))
model1.add(MaxPooling2D(pool_size = (2,2)))

model1.add(Conv2D(64, (3,3), activation = 'relu'))
model1.add(MaxPooling2D(pool_size = (2,2)))

model1.add(Flatten())
model1.add(Dense(64, activation='relu'))

model1.add(Dense(1, activation='sigmoid'))

model1.compile(optimizer="adam",
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [47]:
h1 = model1.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=64, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [48]:
model1.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 254, 254, 64)      1792      
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 127, 127, 64)      0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 125, 125, 64)      36928     
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 62, 62, 64)        0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 246016)            0         
_________________________________________________________________
dense_8 (Dense)              (None, 64)                15745088  
_________________________________________________________________
dense_9 (Dense)              (None, 1)                

### Model with data augmentation

In [59]:
#Data augmentation
data_augmentation = tf.keras.Sequential([
  layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
  layers.experimental.preprocessing.RandomRotation(0.2),
])

#souce https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

In [63]:
batch_size = 16

train_generator = train_datagen.flow_from_directory(
        './data/mosquitoes',  # this is the target directory
        target_size=(256, 256), 
        batch_size=batch_size,
        class_mode='binary')  # since we use binary_crossentropy loss, we need binary labels

test_generator = test_datagen.flow_from_directory(
        './data/other',
        target_size=(256,256),
        batch_size=batch_size,
        class_mode='binary');


In [60]:
#model
model2 = Sequential()


model2.add(Conv2D(64, (3,3), activation = 'relu', input_shape = (256, 256, 3)))
model2.add(MaxPooling2D(pool_size = (2,2)))

model2.add(Conv2D(64, (3,3), activation = 'relu'))
model2.add(MaxPooling2D(pool_size = (2,2)))

model2.add(Flatten())
model2.add(Dense(64, activation='relu'))

model2.add(Dense(1, activation='sigmoid'))

model2.compile(optimizer="adam",
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [61]:
h2 = model2.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=64, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
#todo
#fix open cv
#convert video frame to images and process them