# Kita Pisahkan datanya menjadi 3

Url Source : https://www.kaggle.com/alxmamaev/flowers-recognition

- train (data latih)
- validation (validasi untuk data latih)
- test (data testing untuk menguji model)

In [None]:
# extract melalui notebook jika diperlukan
# !unzip flowers-recognition.zip

In [None]:
import os
import pandas as pd

In [None]:
mypath= './Jasmine/'

In [None]:
file_name = []
tag = []
full_path = []
for path, subdirs, files in os.walk(mypath):
    for name in files:
        full_path.append(os.path.join(path, name).replace("\\","/")) 
        tag.append(path.split('/')[-1])        
        file_name.append(name)

In [None]:
# memasukan variabel yang sudah dikumpulkan pada looping di atas menjadi sebuah dataframe agar rapih
df = pd.DataFrame({"path":full_path,'file_name':file_name,"tag":tag})
df.groupby(['tag']).size()

In [None]:
#cek sample datanya
df.head()

# Train test split

In [None]:
#load library untuk train test split
from sklearn.model_selection import train_test_split

In [None]:
#variabel yang digunakan pada pemisahan data ini
X= df['path']
y= df['tag']

In [None]:
# split dataset awal menjadi data train dan test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=300)

In [None]:
# kemudian data test dibagi menjadi 2 sehingga menjadi data test dan data validation.
X_test, X_val, y_test, y_val = train_test_split(
    X_test, y_test, test_size=0.5, random_state=100)

In [None]:
# menyatukan kedalam masing-masing dataframe

df_tr = pd.DataFrame({'path':X_train
              ,'tag':y_train
             ,'set':'train'})

df_te = pd.DataFrame({'path':X_test
              ,'tag':y_test
             ,'set':'test'})

df_val = pd.DataFrame({'path':X_val
              ,'tag':y_val
             ,'set':'validation'})

In [None]:
print('train size', len(df_tr))
print('val size', len(df_te))
print('test size', len(df_val))

In [None]:
# melihat proporsi pada masing masing set apakah sudah ok atau masih ada yang ingin diubah
df_all = df_tr.append([df_te,df_val]).reset_index(drop=1)

print('===================================================== \n')
print(df_all.groupby(['set','tag']).size(),'\n')

print('===================================================== \n')

#cek sample datanya
df_all.sample(3)

# Merapihkan ke folder set masing-masing

In [None]:
# menghapus folder dataset jika diperlukan
#!rm -rf dataset/

In [None]:
import shutil
from tqdm.notebook import tqdm as tq

In [None]:
datasource_path = "flowers/"
dataset_path = "dataset/"

In [None]:
for index, row in tq(df_all.iterrows(), ):
    
    #detect filepath
    file_path = row['path']
    if os.path.exists(file_path) == False:
            file_path = os.path.join(datasource_path,row['tag'],row['image'].split('.')[0])            
    
    #make folder destination dirs
    if os.path.exists(os.path.join(dataset_path,row['set'],row['tag'])) == False:
        os.makedirs(os.path.join(dataset_path,row['set'],row['tag']))
    
    #define file dest
    destination_file_name = file_path.split('/')[-1]
    file_dest = os.path.join(dataset_path,row['set'],row['tag'],destination_file_name)
    
    #copy file from source to dest
    if os.path.exists(file_dest) == False:
        shutil.copy2(file_path,file_dest)

# Membuat Classifier nya

In [None]:
# !pip install tensorflow
# !pip install pillow

In [None]:
import tensorflow as tf

In [None]:
# Define Input Parameters
dim = (160, 160)
# dim = (456, 456)
channel = (3, )
input_shape = dim + channel

#batch size
batch_size = 16

#Epoch
epoch = 10

# Membuat dataset generator

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Mendefinisikan Data Generatornya

In [None]:
train_datagen = ImageDataGenerator(rescale=1. / 255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

val_datagen = ImageDataGenerator(rescale=1. / 255,
                                 shear_range=0.2,
                                 zoom_range=0.2,
                                 horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1. / 255,
                                  shear_range=0.2,
                                  zoom_range=0.2,
                                  horizontal_flip=True)

## Mendefinisikan asal folder sumber file berasal

In [None]:
# binary = [1,0,0,0,0] [0,1,0,0,0] [0,0,1,0,0] [0,0,0,1,0] [0,0,0,0,1]
# categorical = 1,2,3,4,5

train_generator = train_datagen.flow_from_directory('dataset/train/',
                                                    target_size=dim,
                                                    batch_size=batch_size,
                                                    class_mode='categorical',
                                                    shuffle=True)

val_generator = val_datagen.flow_from_directory('dataset/validation/',
                                                target_size=dim,
                                                batch_size=batch_size,
                                                class_mode='categorical',
                                                shuffle=True)

test_generator = test_datagen.flow_from_directory('dataset/test/',
                                                  target_size=dim,
                                                  batch_size=batch_size,
                                                  class_mode='categorical',
                                                  shuffle=True)

num_class = test_generator.num_classes
labels = train_generator.class_indices.keys()

In [None]:
print(labels)

## Membuat tf.data untuk kompabilitas yang lebih baik untuk tensorflow 2.1 (tf.keras)

In [None]:
def tf_data_generator(generator, input_shape):
    num_class = generator.num_classes
    tf_generator = tf.data.Dataset.from_generator(
        lambda: generator,
        output_types=(tf.float32, tf.float32),
        output_shapes=([None
                        , input_shape[0]
                        , input_shape[1]
                        , input_shape[2]]
                       ,[None, num_class])
    )
    return tf_generator

In [None]:
train_data = tf_data_generator(train_generator, input_shape)
test_data = tf_data_generator(test_generator, input_shape)
val_data = tf_data_generator(val_generator, input_shape)

# Membuat Struktur CNN

## Manualy define network

In [None]:
from tensorflow.keras import layers, Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Activation, MaxPooling2D, Dropout, Flatten, Dense

In [None]:
model = Sequential()
model.add(Conv2D(128, (3, 3), padding='same', input_shape=input_shape))

model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_class))
model.add(Activation('softmax'))

# Compile the model
print('Compiling Model.......')
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.summary()

## Using Pre-trained model / Transfer Learning

### Build Base Model (MobileNetV2)

In [None]:
from tensorflow.keras.applications import MobileNetV2

# get base models
base_model = MobileNetV2(
    input_shape= input_shape,
    include_top=False,
    weights='imagenet',
    classes=num_class,
)

### Add top layer network

In [None]:
from tensorflow.keras import layers,Sequential
from tensorflow.keras.models import Model

In [None]:
#Adding custom layers
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(1024, activation="relu")(x)

predictions = layers.Dense(num_class, activation="softmax")(x)
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
model.summary()

In [None]:
# Compile the model
print('Compiling Model.......')
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

## Effinet

In [None]:
# !pip install -U --pre efficientnet

In [None]:
from efficientnet.tfkeras import EfficientNetB1

### Build Base model

In [None]:
# get base models
base_model = EfficientNetB1(
    input_shape=input_shape,
    include_top=False,
    weights='noisy-student',
    classes=num_class,
)

### Add top network layer to models

In [None]:
from tensorflow.keras import layers,Sequential
from tensorflow.keras.models import Model

In [None]:
#Adding custom layers
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(1024, activation="relu")(x)

predictions = layers.Dense(num_class, activation="softmax")(x)
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
model.summary()

In [None]:
# Compile the model
print('Compiling Model.......')
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

## Visualize The final model

In [None]:
import tensorflow as tf

In [None]:
model_viz = tf.keras.utils.plot_model(model,
                          to_file='model.png',
                          show_shapes=True,
                          show_layer_names=True,
                          rankdir='TB',
                          expand_nested=True,
                          dpi=55)
model_viz

# Train Model

In [None]:
EPOCH = 2

In [None]:
history = model.fit(x=train_data,
        steps_per_epoch=len(train_generator),
        epochs=EPOCH,
        validation_data=val_data,
        validation_steps=len(val_generator), 
        shuffle=True,
        verbose = 1)

In [None]:
history.history['loss']

In [None]:
history.history['accuracy']

# Plot the training

In [None]:
from matplotlib import pyplot as plt

In [None]:
# Plot history: MAE
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.ylabel('value')
plt.xlabel('No. epoch')
plt.legend(loc="upper left")
plt.show()

In [None]:
# Plot history: MSE
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.ylabel('value')
plt.xlabel('No. epoch')
plt.legend(loc="upper left")
plt.show()

# Save Model

In [None]:
import os

In [None]:
MODEL_BASE_PATH = "model"
PROJECT_NAME = "medium_project"
SAVE_MODEL_NAME = "model.h5"
save_model_path = os.path.join(MODEL_BASE_PATH, PROJECT_NAME, SAVE_MODEL_NAME)

if os.path.exists(os.path.join(MODEL_BASE_PATH, PROJECT_NAME)) == False:
    os.makedirs(os.path.join(MODEL_BASE_PATH, PROJECT_NAME))
    
print('Saving Model At {}...'.format(save_model_path))
model.save(save_model_path,include_optimizer=False)    
    

# Evaluate Models

In [None]:
loss, acc = model.evaluate(train_data, steps=len(train_generator), verbose=0)
print('Accuracy on training data: {:.4f} \nLoss on training data: {:.4f}'.format(acc,loss),'\n')
 
loss, acc = model.evaluate(test_data, steps=len(test_generator), verbose=0)
print('Accuracy on test data: {:.4f} \nLoss on test data: {:.4f}'.format(acc,loss),'\n')    

# Defiine params and lib

In [None]:
import requests
from io import BytesIO

from PIL import Image
import numpy as np

In [None]:
# Parameters
input_size = (160,160)

#define input shape
channel = (3,)
input_shape = input_size + channel

#define labels
labels = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

# Define preprocess function

In [None]:
def preprocess(img,input_size):
    nimg = img.convert('RGB').resize(input_size, resample= 0)
    img_arr = (np.array(nimg))/255
    return img_arr

def reshape(imgs_arr):
    return np.stack(imgs_arr, axis=0)

# Load models

In [None]:
from tensorflow.keras.models import load_model

In [None]:
# ada 2 cara load model, jika cara pertama berhasil maka bisa lasngusng di lanjutkan ke fungsi prediksi

MODEL_PATH = 'model/medium_project/model.h5'
model = load_model(MODEL_PATH,compile=False)

# Predict the image

In [None]:
# read image
im = Image.open('contoh_prediksi.jpg')
X = preprocess(im,input_size)
X = reshape([X])
y = model.predict(X)

print( labels[np.argmax(y)], np.max(y) )

In [None]:
y

In [None]:
print( labels[np.argmax(y)], np.max(y) )

In [None]:
# read image
im = Image.open('dataset/train/dandelion/2522454811_f87af57d8b.jpg')
X = preprocess(im,input_size)
X = reshape([X])
y = model.predict(X)

print( labels[np.argmax(y)], np.max(y) )

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.applications import EfficientNetB2, EfficientNetB0
from tensorflow.keras import Model
from tensorflow.keras import layers

In [None]:
input_shape = (260, 260, 3)
base_model = EfficientNetB2(include_top=True, weights='imagenet', input_shape=input_shape)
model = Model(inputs=base_model.input, outputs=base_model.output)

# Melihat layer terakhir yang memungkinkan dihasilkan output
# base_model.summary()

In [None]:
# base_model.summary()

base_model = EfficientNetB0(include_top=True, weights='imagenet', input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.get_layer('top_bn').output)


In [None]:
# base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

# x = base_model.output
# x = layers.Flatten()(x)

# model = Model(inputs=base_model.input, outputs=x)


In [None]:
# Compile the model
print('Compiling Model.......')
model.compile(optimizer='adam')

## Fungsi untuk membuka gambar

In [None]:
from PIL import Image
import numpy as np
input_size = model.input_shape[1:3]
input_size

In [None]:
def open_image(path):
    im = Image.open(path)
    X = preprocess(im, model.input_shape[1:3])
    X = reshape([X])
    return X

def preprocess(img,input_size):
    nimg = img.convert('RGB').resize(input_size, resample= 0)
    img_arr = (np.array(nimg))/255
    return img_arr

def reshape(imgs_arr):
    return np.stack(imgs_arr, axis=0)

In [None]:
X = open_image("contoh_prediksi.jpg")
result = model.predict(X)[0]
result[:10]

In [None]:
# Scann directory
import os
import pandas as pd
from tqdm.notebook import tqdm

mypath= 'flowers/'

file_name = []
tag = []
full_path = []
for path, subdirs, files in os.walk(mypath):
    for name in files:
        full_path.append(os.path.join(path, name).replace("\\","/")) 
        tag.append(path.split('/')[-1])        
        file_name.append(name)

In [None]:
df = pd.DataFrame({"path":full_path,'file_name':file_name,"tag":tag})
list_img_path = df['path'].tolist()

df[:3]

In [None]:
img_array_resut = []
for path in tqdm(list_img_path):
    X = open_image(path)
    res = model.predict(X)[0]
    img_array_resut.append(res)

In [None]:
df['features'] = img_array_resut
df.sample(5)

In [None]:
from annoy import AnnoyIndex
from matplotlib import pyplot as plt 

In [None]:
from annoy import AnnoyIndex

f = len(img_array_resut[0])
t = AnnoyIndex(f, 'euclidean')

print(f"Number of features {f}")
print("Append the item vectors")
i = 0 
for v in tqdm(img_array_resut):
    t.add_item(i, v)
    i += 1

print("Building the tree")
t.build(10)


In [None]:
image_idx = 1231
similar_result = t.get_nns_by_item(image_idx, n=10, include_distances=True)
similar_result

In [None]:
path = list_img_path[image_idx]
im = Image.open(path)
plt.imshow(im)

In [None]:
i = 1
columns = 4
rows = 4
fig = plt.figure(figsize=(20, 20))

for idx in similar_result[0][1:]:
    path = list_img_path[idx]
    im = Image.open(path)

    fig.add_subplot(rows, columns, i)
    plt.imshow(im)
    i += 1

plt.show()