In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2, matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
print('TF',tf.__version__)
#Text Color
from termcolor import colored
from wordcloud import WordCloud, STOPWORDS

In [None]:
LIMIT = 1
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*LIMIT)])
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    print(e)
print('Restrict TensorFlow to max %iGB GPU RAM'%LIMIT)
print('so RAPIDS can use %iGB GPU RAM'%(16-LIMIT))

In [None]:
#train = pd.read_csv('../input/shopee-product-matching/train.csv')
test = pd.read_csv('../input/shopee-product-matching/test.csv')
#sample = pd.read_csv('../input/shopee-product-matching/sample_submission.csv')

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, df, img_size=256, batch_size=32, path=''): 
        self.df = df
        self.img_size = img_size
        self.batch_size = batch_size
        self.path = path
        self.indexes = np.arange( len(self.df) )
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        ct = len(self.df) // self.batch_size
        ct += int(( (len(self.df)) % self.batch_size)!=0)
        return ct

    def __getitem__(self, index):
        'Generate one batch of data'
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        X = self.__data_generation(indexes)
        return X
            
    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' 
        X = np.zeros((len(indexes),self.img_size,self.img_size,3),dtype='float32')
        df = self.df.iloc[indexes]
        for i,(index,row) in enumerate(df.iterrows()):
            img = cv2.imread(self.path+row.image)
            X[i,] = cv2.resize(img,(self.img_size,self.img_size)) #/128.0 - 1.0
        return X

In [None]:
BASE='../input/shopee-product-matching/test_images/'

In [None]:
from tensorflow.keras.models import load_model

base_model = load_model('../input/model50e/MobileNet_TransferLearning.h5')

In [None]:
from keras.models import Model
model2 = Model(inputs=base_model.input, outputs=base_model.get_layer('global_average_pooling2d').output)

In [None]:
embeds = []
CHUNK = 1024 * 4

print('Computing image embeddings...')
CTS = len(test) // CHUNK
if len(test) % CHUNK != 0: CTS += 1
for i, j in enumerate(range(CTS)):

        a = j * CHUNK
        b = (j+1) * CHUNK
        b = min(b, len(test))
        print('chunk', a, 'to', b)
        
        
        test_gen = DataGenerator(test.iloc[a:b], batch_size=32, path=BASE)
        image_embeddings = model2.predict(test_gen, verbose=1, use_multiprocessing=True, workers=4)
        embeds.append(image_embeddings)

        #if i>=1: break


image_embeddings = np.concatenate(embeds)


In [None]:
image_embeddings = np.concatenate(embeds)

In [None]:
import numpy as np
from sklearn.neural_network import MLPClassifier
import joblib
from joblib import load
classifier=joblib.load('../input/pretrainimg/MobileNet_TransferLearning.joblib')

In [None]:
test_predictions = classifier.predict(image_embeddings)

In [None]:
#test_predictions

In [None]:
test['label_group']=test_predictions

In [None]:
#test

In [None]:
def clean(x):
    return " ".join(x)

In [None]:
tmp = test.groupby('label_group').posting_id.agg('unique').to_dict()

In [None]:
def get_match(imag):
#     tmp = test.groupby('label_group').posting_id.agg('unique').to_dict()
    match_in_group=tmp[imag]
    match_in_group=match_in_group.tolist()
   # match_in_group=match_in_group[0:40]
#     hashable_list(imag)
#     tm=dictimg[imag]
#     match_list=match_in_group.insert(0,tm[0])
    matches=clean(match_in_group)
    return matches

In [None]:
test['matches']=test['label_group'].map(get_match)

In [None]:
#test[['posting_id', 'matches']]

In [None]:
test[['posting_id', 'matches']].to_csv('submission.csv', index = False)