In [None]:
#import required packages
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from sklearn.sklearn.model_selection import train_test_split
import numpy as np
import argparse
import cv2
import os

#important hyperparameters
BATCH_S = 30
EPOCHS = 50
STEPS_PER_EPOCH = 20
VALIDATION_STEPS = 10

#construct the dataset and the labels array 
imagePaths = list(paths.list_images("../banana_images"))
data = []
labels = []

# loop over the image paths
for imagePath in imagePaths:
	filename_w_ext = os.path.basename(imagePath)
  filename, file_extension = os.path.splitext(filename_w_ext)
  if "0" in filename:
    label = 0
  else:
    label = 1
	image = cv2.imread(imagePath)
	image = cv2.resize(image, (200, 200), interpolation = cv2.INTER_AREA)
	
  #add these findings to the respective arrays 
	data.append(image)
	labels.append(label)

#normalising pixel values
data = np.array(data, dtype="float") / 255.0

#one-hot encoding the labels
labels = np_utils.to_categorical(labels, 2)

# use an argument passer for data pre-processing
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
	help="input path")
ap.add_argument("-o", "--output", required=True,
	help="directory to store the outputted augmented images")
ap.add_argument("-t", "--total", type=int, default=600,
	help="how many samples?")
args = vars(ap.parse_args())

#image pre-processing
def processing(data):
  for image in data: 
    image = img_to_array(image)
    image = np.expand_dims(image, axis=0)

    aug = ImageDataGenerator(
      rotation_range=30,
      zoom_range=0.15,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.15,
      horizontal_flip=True,
      fill_mode="nearest")
    total = 0

    imageGen = aug.flow(image, batch_size=BATCH_S, save_to_dir=args["output"],
      save_prefix="image", save_format="jpg")
    # loop over examples from our image data augmentation generator
    for image in imageGen:
      # increment our counter
      total += 1
      # if we have reached the specified number of examples, break
      # from the loop
      if total == args["total"]:
        break
#partition of data into training and testing (used for validation)
(X_train, X_test, y_train, y_test) = train_test_split(processedData, labels,
	test_size=0.33, random_state=42)


In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout


model = Sequential()
model.add(Conv2D(128, kernel_size=3, activation= 'relu', input_shape=(200,200,3)))
model.add(Conv2D(64, kernel_size=3, activation= 'relu', input_shape=(100,100,3)))
model.add(Conv2D(32, kernel_size=3, activation= 'relu'))
model.add(Flatten())
model.add(Dense(10, activation= 'softmax'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:

model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=EPOCHS, steps_per_epoch = STEPS_PER_EPOCH, validation_steps = VALIDATION_STEPS)

In [None]:
import matplotlib.pyplot as plt
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=EPOCHS)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

In [None]:
#transfer learning for improved accuracy
from keras.applications.resnet50 import ResNet50
#freezing all the pre-trained weights, removing the top layer to replace my model layers
restnet = ResNet50(include_top=False, weights='imagenet', input_shape=(200,200,3))
output = restnet.layers[-1].output
output = keras.layers.Flatten()(output)
restnet = Model(restnet.input, output=output)
for layer in restnet.layers:
    layer.trainable = False
#understand the structure
resnet.summary()
#building a new model
model = Sequential()
model.add(restnet)

In [None]:
#gaussian blur 
reduce_noise = []
for i in range(len(dataset)):
    blur = cv2.GaussianBlur(transImgs[i], (10, 10), 0)
    reduce_noise.append(blur)


In [None]:
#image segmentation (k-means clustering)
from PIL import Image
from sklearn.cluster import KMeans
Segpics = []
for i in range(0,len(reduce_noise)):
  pics_n = np.zeros( (200,200,3), dtype=np.uint8)
  for j in range(0,len(pics_n)):
    for p in range(0,len(pics_n)):
      pics_n[j][p] = (reduce_noise[j][p]*255.0)
  recon_pic = Image.fromarray(pics_n,'RGB')
  recon_pic_n = pic.reshape(recon_pic.shape[0]*recon_pic.shape[1], recon_pic.shape[2])
  kmeans = KMeans(n_clusters=3, random_state=0).fit(pic_n)
  pic2show = kmeans.cluster_centers_[kmeans.labels_]
  cluster_pic = pic2show.reshape(recon_pic.shape[0], recon_pic.shape[1], recon_pic.shape[2])
  Segpics.append(cluster_pic)



In [None]:
#sample image segmentation
import os, sys
from os import listdir

from PIL import Image
from google.colab import drive
import matplotlib.pyplot as plt
drive.mount('/content/gdrive')

# need to enter password to access your google drive

from google.colab import files
main_dir = "/content/gdrive/My Drive/banana_imgs/"
files = listdir(main_dir)
# you can change file extension below to read other image types
images_list = [i for i in files if i.endswith('.jpg')] ## output file names only
from sklearn.cluster import KMeans
for idx,image in enumerate(images_list):
  pic = plt.imread(main_dir + image)/255
  plt.imshow(pic)
  pic_n = pic.reshape(pic.shape[0]*pic.shape[1], pic.shape[2])
  kmeans = KMeans(n_clusters=5, random_state=0).fit(pic_n)
  pic2show = kmeans.cluster_centers_[kmeans.labels_]
  cluster_pic = pic2show.reshape(pic.shape[0], pic.shape[1], pic.shape[2])
  plt.imshow(cluster_pic)