# Importing library and data

In [None]:
pip install tensorflow_text

In [5]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

In [6]:
import pandas as pd

dt = pd.read_csv('./datasets/ecommerceDataset.csv')
dt.head(10)

Unnamed: 0,Labels,Text
0,Household,Paper Plane Design Framed Wall Hanging Motivat...
1,Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
2,Household,SAF 'UV Textured Modern Art Print Framed' Pain...
3,Household,"SAF Flower Print Framed Painting (Synthetic, 1..."
4,Household,Incredible Gifts India Wooden Happy Birthday U...
5,Household,Pitaara Box Romantic Venice Canvas Painting 6m...
6,Household,Paper Plane Design Starry Night Vangoh Wall Ar...
7,Household,Pitaara Box Romantic Venice Canvas Painting 6m...
8,Household,SAF 'Ganesh Modern Art Print' Painting (Synthe...
9,Household,Paintings Villa UV Textured Modern Art Print F...


## **Data preprocessing**
### *Getting all categories*

In [7]:
categories = dt['Labels'].unique()
print(categories)

['Household' 'Books' 'Clothing & Accessories' 'Electronics']


In [8]:
dt.groupby('Labels').describe()

Unnamed: 0_level_0,Text,Text,Text,Text
Unnamed: 0_level_1,count,unique,top,freq
Labels,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Books,11820,6256,Think & Grow Rich About the Author NAPOLEON HI...,30
Clothing & Accessories,8670,5674,Diverse Men's Formal Shirt Diverse is a wester...,23
Electronics,10621,5308,HP 680 Original Ink Advantage Cartridge (Black...,26
Household,19313,10564,Nilkamal Series-24 Chest of Drawers (Cream Tra...,13


### *Handling dataframes*

In [9]:
df_accessories = dt[dt['Labels'] == 'Clothing & Accessories']
df_accessories.head(10)
df_accessories.shape


(8671, 2)

In [10]:
df_books = dt[dt['Labels'] == 'Books']
df_books.shape

(11820, 2)

In [11]:
df_household = dt[dt['Labels'] == 'Household']
df_household.shape

(19313, 2)

In [12]:
df_elec = dt[dt['Labels'] == 'Electronics']
df_elec.shape

(10621, 2)

### ***Balancing dataframes***

In [13]:
df_books_downsample = df_books.head(df_accessories.shape[0])
df_books_downsample.shape

(8671, 2)

In [14]:
df_household_downsample = df_household.head(df_accessories.shape[0])
df_household_downsample.shape

(8671, 2)

In [15]:
df_elec_downsample = df_elec.head(df_accessories.shape[0])
df_elec_downsample.shape
df_elec_downsample.head()

Unnamed: 0,Labels,Text
39804,Electronics,Dell 19.5V-3.34AMP 65W Laptop Adapter (Without...
39805,Electronics,Bluetooth Dongle USB CSR 4.0 Adapter Receiver ...
39806,Electronics,"Wi-Fi Receiver 300Mbps, 2.4GHz, 802.11b/g/n US..."
39807,Electronics,SanDisk 64GB Class 10 microSDXC Memory Card wi...
39808,Electronics,Gizga Essentials Laptop Power Cable Cord- 3 Pi...


### ***Merging dataframes***

In [17]:
dfs_array = [df_accessories, df_books_downsample, df_household_downsample, df_elec_downsample]
df_merged = pd.concat(dfs_array)
df_merged[df_merged['Labels'] == 'Household']

Unnamed: 0,Labels,Text
31133,Clothing & Accessories,Woopower 36M Pink for 024M Baby Trouser Top Se...
31134,Clothing & Accessories,Amour Butterfly Design Sunglasses For Girls 6+...
31135,Clothing & Accessories,Vaenait Baby 024M Baby Girls Rashguard Swimwea...
31136,Clothing & Accessories,Amour Butterfly Design Sunglasses For Girls 6+...
31137,Clothing & Accessories,Kuchipoo Girl's Cotton Regular Fit T-Shirt - P...
...,...,...
48470,Electronics,LG GH24NSD1 Internal SATA DVD Writer The M-DIS...
48471,Electronics,LG GP65NB60 External DVD Writer (Black) LG GP6...
48472,Electronics,PIONEER DVD PLAYER DV-3052V Pioneer DV-3052 Mu...
48473,Electronics,LG DP546 DVD Player (Black) DivX-This is a for...


### ***Adding 'type' column***

In [18]:
df_merged

Unnamed: 0,Labels,Text
31133,Clothing & Accessories,Woopower 36M Pink for 024M Baby Trouser Top Se...
31134,Clothing & Accessories,Amour Butterfly Design Sunglasses For Girls 6+...
31135,Clothing & Accessories,Vaenait Baby 024M Baby Girls Rashguard Swimwea...
31136,Clothing & Accessories,Amour Butterfly Design Sunglasses For Girls 6+...
31137,Clothing & Accessories,Kuchipoo Girl's Cotton Regular Fit T-Shirt - P...
...,...,...
48470,Electronics,LG GH24NSD1 Internal SATA DVD Writer The M-DIS...
48471,Electronics,LG GP65NB60 External DVD Writer (Black) LG GP6...
48472,Electronics,PIONEER DVD PLAYER DV-3052V Pioneer DV-3052 Mu...
48473,Electronics,LG DP546 DVD Player (Black) DivX-This is a for...


In [80]:
#df_merged.drop(['Household', 'Books', 'Electronics', 'Clothing & Accessories'], axis = 1)

Unnamed: 0,Labels,Text,Type
31133,Clothing & Accessories,Woopower 36M Pink for 024M Baby Trouser Top Se...,"[0, 0, 0, 1]"
31134,Clothing & Accessories,Amour Butterfly Design Sunglasses For Girls 6+...,"[0, 0, 0, 1]"
31135,Clothing & Accessories,Vaenait Baby 024M Baby Girls Rashguard Swimwea...,"[0, 0, 0, 1]"
31136,Clothing & Accessories,Amour Butterfly Design Sunglasses For Girls 6+...,"[0, 0, 0, 1]"
31137,Clothing & Accessories,Kuchipoo Girl's Cotton Regular Fit T-Shirt - P...,"[0, 0, 0, 1]"
...,...,...,...
48470,Electronics,LG GH24NSD1 Internal SATA DVD Writer The M-DIS...,"[0, 1, 0, 0]"
48471,Electronics,LG GP65NB60 External DVD Writer (Black) LG GP6...,"[0, 1, 0, 0]"
48472,Electronics,PIONEER DVD PLAYER DV-3052V Pioneer DV-3052 Mu...,"[0, 1, 0, 0]"
48473,Electronics,LG DP546 DVD Player (Black) DivX-This is a for...,"[0, 1, 0, 0]"


In [19]:
# Household = [1, 0, 0, 0]
# Electronics = [0, 1, 0, 0]
# Books = [0, 0, 1, 0]
# Clo_Acce = [0, 0, 0, 1]

df_merged['Type'] = df_merged['Labels'].apply(
    lambda x: 0 if x == 'Household'
    else 1 if x == 'Electronics'
    else 2 if x == 'Books'
    else 3
)

df_merged.sample(10)

Unnamed: 0,Labels,Text,Type
41405,Electronics,Cable Hunter™USB 3.0 (5Gbps) External Memory C...,1
47178,Electronics,"Abro ABS39 Colour Spray Paint (400ml, Glossy B...",1
42569,Electronics,Portronics POR-895 Adjustable Laptop Table (Br...,1
6233,Household,Nechams Decorative Multi-Colored Push Thumb Pi...,0
5156,Household,Mamta Decoration Wall Mounted Shelf Set of 3 F...,0
46029,Electronics,Hoya Solas IRND 1.5 72mm Infrared Neutral Dens...,1
4848,Household,AJANTA ROYAL Individual Synthetic Polymer Wood...,0
32871,Clothing & Accessories,Arrow Fashion Baby Girl Net Lehenga Choli (Ora...,3
34997,Clothing & Accessories,Hanes Men's Calf Socks (Pack of 3) A pack of 3...,3
25567,Books,I am an Athlete – Read It Yourself with Ladybi...,2


In [20]:
df_merged['Type'].head(1).dtype

dtype('int64')

In [21]:
df_merged['Type'][1]


0

In [None]:
df_merged['Household'] = df_merged['Labels'].apply(
    lambda x: 1 if x == 'Household' else 0
)
df_merged['Electronics'] = df_merged['Labels'].apply(
    lambda x: 1 if x == 'Electronics' else 0
)
df_merged['Books'] = df_merged['Labels'].apply(
    lambda x: 1 if x == 'Books' else 0
)

df_merged['Clothing & Accessories'] = df_merged['Labels'].apply(
    lambda x: 1 if x == 'Clothing & Accessories' else 0
)

df_merged.sample(50)

### ***Splitting dataframe***

In [22]:
cols = df_merged.select_dtypes(include=['object'])
for col in cols.columns.values:
    df_merged[col] = df_merged[col].fillna('')

In [68]:
import sklearn
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df_merged['Text'], df_merged['Type'], train_size=0.2)
#y_train = y_train.astype('string')
X_train[:2]

35474    Fastrack UV Protected Wayfarer Men's Sunglasse...
198      Charms Anklet for Women (Silver)(CR0251-285ef)...
Name: Text, dtype: object

In [77]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, Bidirectional, LSTM
from keras.optimizers import SGD, Adam, Adadelta, RMSprop

model = Sequential()

model.add(Embedding(5000, 64))
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(64)))
model.add(Dense(4, activation='softmax'))
opt = tf.keras.optimizers.Adam(learning_rate=0.001, decay=1e-6)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

model.summary()

Model: "sequential_21"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_7 (Embedding)     (None, None, 64)          320000    
                                                                 
 dropout_17 (Dropout)        (None, None, 64)          0         
                                                                 
 bidirectional_6 (Bidirectio  (None, 128)              66048     
 nal)                                                            
                                                                 
 dense_41 (Dense)            (None, 4)                 516       
                                                                 
Total params: 386,564
Trainable params: 386,564
Non-trainable params: 0
_________________________________________________________________


In [78]:
model.fit(X_train, y_train, verbose = 1, epochs = 3)

Epoch 1/3


UnimplementedError: Graph execution error:

Detected at node 'sequential_21/Cast' defined at (most recent call last):
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\traitlets\config\application.py", line 1043, in launch_instance
      app.start()
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelapp.py", line 728, in start
      self.io_loop.start()
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\tornado\platform\asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 1909, in _run_once
      handle._run()
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 516, in dispatch_queue
      await self.process_one()
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 505, in process_one
      await dispatch(*args)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 412, in dispatch_shell
      await result
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 740, in execute_request
      reply_content = await reply_content
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3009, in run_cell
      result = self._run_cell(
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3064, in _run_cell
      result = runner(coro)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3269, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3448, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Admin\AppData\Local\Temp\ipykernel_27396\3439237064.py", line 1, in <module>
      model.fit(X_train, y_train, verbose = 1, epochs = 3)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 993, in train_step
      y_pred = self(x, training=True)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\sequential.py", line 410, in call
      return super().call(inputs, training=training, mask=mask)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py", line 649, in _run_internal_graph
      y = self._conform_to_reference_input(y, ref_input=x)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py", line 761, in _conform_to_reference_input
      tensor = tf.cast(tensor, dtype=ref_input.dtype)
Node: 'sequential_21/Cast'
Cast string to float is not supported
	 [[{{node sequential_21/Cast}}]] [Op:__inference_train_function_15062]

### ***Importing BERT and getting embeding vectors for data***

In [None]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

**Example getting embeding of sentence**

In [None]:
def get_sentence_embeding(sentences):
    preprocessed_text = bert_preprocess(sentences)
    return bert_encoder(preprocessed_text)['pooled_output']

get_sentence_embeding([
    "500$ discount. hurry up", 
    "Bhavin, are you up for a volleybal game tomorrow?"]
)

**Building model**

In [None]:
#BERT Layer
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name = "text")
preprocessed_inputs = bert_preprocess(text_input)
encoded_outputs = bert_encoder(preprocessed_inputs)

#Neural network
layer = tf.keras.layers.Dropout(0.1, name='dropout') (encoded_outputs['pooled_output'])
layer= tf.keras.layers.Dense(4, activation='sigmoid', name="output")(layer)

#Construct the final model
model = tf.keras.Model(inputs=[text_input], outputs=[layer])

In [150]:
encoded_outputs['pooled_output'][4]

<KerasTensor: shape=(768,) dtype=float32 (created by layer 'tf.__operators__.getitem_3')>

In [145]:
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 keras_layer (KerasLayer)       {'input_word_ids':   0           ['text[0][0]']                   
                                (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128),                                                          
                                 'input_type_ids':                                                
                                (None, 128)}                                                

In [None]:
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), optimizer='adam',
              metrics=[tf.keras.metrics.BinaryAccuracy(name = 'Accuracy'),
                       tf.keras.metrics.Precision(name = 'Precision'), 
                       tf.keras.metrics.Recall(name = 'Recall')])

In [135]:
y_train = y_train.astype(str)

In [136]:
y_train.head(10)

36681    [0, 0, 0, 1]
35662    [0, 0, 0, 1]
26569    [0, 0, 1, 0]
7349     [1, 0, 0, 0]
40428    [0, 1, 0, 0]
44173    [0, 1, 0, 0]
46171    [0, 1, 0, 0]
4267     [1, 0, 0, 0]
19940    [0, 0, 1, 0]
21308    [0, 0, 1, 0]
Name: Type, dtype: object

In [137]:
from keras.models import Sequential
from keras.layers import Dense

MODEL = Sequential()
MODEL.add(Dense(20, input_dim = 768, kernel_initializer='he_uniform', activation='relu'))
MODEL.add(Dense(4, activation = 'sigmoid'))
MODEL.compile(loss='binary_crossentropy', optimizer='adam')

In [138]:
MODEL.fit(X_train, y_train, verbose=0, epochs=3)





ValueError: in user code:

    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\input_spec.py", line 277, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential_12" "                 f"(type Sequential).
    
    Input 0 of layer "dense_24" is incompatible with the layer: expected axis -1 of input shape to have value 768, but received input with shape (None, 1)
    
    Call arguments received by layer "sequential_12" "                 f"(type Sequential):
      • inputs=tf.Tensor(shape=(None, 1), dtype=string)
      • training=True
      • mask=None


In [None]:
import numpy as np
y_train = np.asarray(y_train).astype(np.int64)
model.fit(X_train, y_train, epochs = 10)