# Import lib

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2, matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
print('TF',tf.__version__)
#Text Color
from termcolor import colored
from wordcloud import WordCloud, STOPWORDS

In [None]:
# RESTRICT TENSORFLOW TO 12GB OF GPU RAM
# SO THAT WE HAVE GPU RAM FOR RAPIDS CUML KNN
LIMIT = 1
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*LIMIT)])
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    print(e)
print('Restrict TensorFlow to max %iGB GPU RAM'%LIMIT)
print('so RAPIDS can use %iGB GPU RAM'%(16-LIMIT))

In [None]:
train= pd.read_csv('../input/shopee-product-matching/train.csv')


### Use Image Embeddings

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, df, img_size=256, batch_size=32, path=''): 
        self.df = df
        self.img_size = img_size
        self.batch_size = batch_size
        self.path = path
        self.indexes = np.arange( len(self.df) )
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        ct = len(self.df) // self.batch_size
        ct += int(( (len(self.df)) % self.batch_size)!=0)
        return ct

    def __getitem__(self, index):
        'Generate one batch of data'
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        X = self.__data_generation(indexes)
        return X
            
    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' 
        X = np.zeros((len(indexes),self.img_size,self.img_size,3),dtype='float32')
        df = self.df.iloc[indexes]
        for i,(index,row) in enumerate(df.iterrows()):
            img = cv2.imread(self.path+row.image)
            X[i,] = cv2.resize(img,(self.img_size,self.img_size)) #/128.0 - 1.0
        return X

In [None]:
BASE='../input/shopee-product-matching/train_images/'

In [None]:
from tensorflow.keras.models import load_model

base_model = load_model('../input/model50e/MobileNet_TransferLearning.h5')

In [None]:
from keras.models import Model
model2 = Model(inputs=base_model.input, outputs=base_model.get_layer('global_average_pooling2d').output)


In [None]:
embeds = []
CHUNK = 1024 * 4

print('Computing image embeddings...')
CTS = len(train) // CHUNK
if len(train) % CHUNK != 0: CTS += 1
for i, j in enumerate(range(CTS)):

        a = j * CHUNK
        b = (j+1) * CHUNK
        b = min(b, len(train))
        print('chunk', a, 'to', b)
        
        
        train_gen = DataGenerator(train.iloc[a:b], img_size=512, batch_size=6, path=BASE)
        
        image_embeddings = model2.predict(train_gen, verbose=1, use_multiprocessing=True, workers=4)
        embeds.append(image_embeddings)

        #if i>=1: break


image_embeddings = np.concatenate(embeds)

# Saving a NumPy Array to CSV File
# if SAVE_IMGEMBEDDING: np.savetxt('image_embeddings_EfficientNetB6.csv', image_embeddings, delimiter=',')

# else:
#     print('Loading image embeddings...')
#     if EfficientNetB0:
#         image_embeddings = np.loadtxt('../input/shopee-price-match-guarantee-embeddings/image_embeddings.csv',
#                                  delimiter=',')
#     else: raise ValueError('Please select the correspondent model and embeddings in "../input/shopee-price-match-guarantee-embeddings".')

# print('image embeddings shape',image_embeddings.shape)

In [None]:
image_embeddings = np.concatenate(embeds)

In [None]:
dataset_labels = np.array(train['label_group'])

In [None]:
import numpy as np
from sklearn.neural_network import MLPClassifier
import joblib


train_dataset_array_labels = dataset_labels
#print("Train Features Shape", train_features.shape)

classifier = MLPClassifier(solver='adam', hidden_layer_sizes=(500, 150), max_iter=20000)

classifier.fit(image_embeddings, train_dataset_array_labels)

joblib.dump(classifier, 'MobileNet_TransferLearning.joblib')



In [None]:
train_predictions = classifier.predict(image_embeddings)

train_correct_predictions = np.array(np.where(train_predictions == train_dataset_array_labels))
#train_accuracy = np.round((train_correct_predictions.shape[1]/train_features.shape[0])*100, 2)
#print("Train Accuracy : ", train_accuracy)