# Importing library and data

In [None]:
pip install tensorflow_text

In [23]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

In [25]:
import pandas as pd

dt = pd.read_csv('./datasets/ecommerceDataset.csv')
df = dt.values
print(df)

[['Household'
  'Paper Plane Design Framed Wall Hanging Motivational Office Decor Art Prints (8.7 X 8.7 inch) - Set of 4 Painting made up in synthetic frame with uv textured print which gives multi effects and attracts towards it. This is an special series of paintings which makes your wall very beautiful and gives a royal touch. This painting is ready to hang, you would be proud to possess this unique painting that is a niche apart. We use only the most modern and efficient printing technology on our prints, with only the and inks and precision epson, roland and hp printers. This innovative hd printing technique results in durable and spectacular looking prints of the highest that last a lifetime. We print solely with top-notch 100% inks, to achieve brilliant and true colours. Due to their high level of uv resistance, our prints retain their beautiful colours for many years. Add colour and style to your living space with this digitally printed painting. Some are for pleasure and some 

## **Data preprocessing**
### *Getting all categories*

In [26]:
categories = dt['Labels'].unique()
print(categories)

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [None]:
dt.groupby('Labels').describe()

Unnamed: 0_level_0,Text,Text,Text,Text
Unnamed: 0_level_1,count,unique,top,freq
Labels,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Books,11820,6256,Think & Grow Rich About the Author NAPOLEON HI...,30
Clothing & Accessories,8670,5674,Diverse Men's Formal Shirt Diverse is a wester...,23
Electronics,10621,5308,HP 680 Original Ink Advantage Cartridge (Black...,26
Household,19313,10564,Nilkamal Series-24 Chest of Drawers (Cream Tra...,13


### *Handling dataframes*

In [None]:
df_accessories = dt[dt['Labels'] == 'Clothing & Accessories']
df_accessories.head(10)
df_accessories.shape


(8671, 2)

In [None]:
df_books = dt[dt['Labels'] == 'Books']
df_books.shape

(11820, 2)

In [None]:
df_household = dt[dt['Labels'] == 'Household']
df_household.shape

(19313, 2)

In [None]:
df_elec = dt[dt['Labels'] == 'Electronics']
df_elec.shape

(10621, 2)

### ***Balancing dataframes***

In [None]:
df_books_downsample = df_books.head(df_accessories.shape[0])
df_books_downsample.shape

(8671, 2)

In [None]:
df_household_downsample = df_household.head(df_accessories.shape[0])
df_household_downsample.shape

(8671, 2)

In [None]:
df_elec_downsample = df_elec.head(df_accessories.shape[0])
df_elec_downsample.shape
df_elec_downsample.head()

Unnamed: 0,Labels,Text
39804,Electronics,Dell 19.5V-3.34AMP 65W Laptop Adapter (Without...
39805,Electronics,Bluetooth Dongle USB CSR 4.0 Adapter Receiver ...
39806,Electronics,"Wi-Fi Receiver 300Mbps, 2.4GHz, 802.11b/g/n US..."
39807,Electronics,SanDisk 64GB Class 10 microSDXC Memory Card wi...
39808,Electronics,Gizga Essentials Laptop Power Cable Cord- 3 Pi...


### ***Merging dataframes***

In [None]:
dfs_array = [df_accessories, df_books_downsample, df_household_downsample, df_elec_downsample]
df_merged = pd.concat(dfs_array)
df_merged[df_merged['Labels'] == 'Household']

Unnamed: 0,Labels,Text
0,Household,Paper Plane Design Framed Wall Hanging Motivat...
1,Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
2,Household,SAF 'UV Textured Modern Art Print Framed' Pain...
3,Household,"SAF Flower Print Framed Painting (Synthetic, 1..."
4,Household,Incredible Gifts India Wooden Happy Birthday U...
...,...,...
8666,Household,Glync 5W LED Recessed Spot Light Ceiling Downl...
8667,Household,Philips AstraPrime 10-Watt Recessed LED Panel ...
8668,Household,D'MakTM LED Ceiling COB Spot Light - 3 Watt - ...
8669,Household,Philips AstraPrime 5-Watt Recessed LED Panel C...


### ***Adding 'type' column***

In [None]:
df_merged

Unnamed: 0,Labels,Text
31133,Clothing & Accessories,Woopower 36M Pink for 024M Baby Trouser Top Se...
31134,Clothing & Accessories,Amour Butterfly Design Sunglasses For Girls 6+...
31135,Clothing & Accessories,Vaenait Baby 024M Baby Girls Rashguard Swimwea...
31136,Clothing & Accessories,Amour Butterfly Design Sunglasses For Girls 6+...
31137,Clothing & Accessories,Kuchipoo Girl's Cotton Regular Fit T-Shirt - P...
...,...,...
48470,Electronics,LG GH24NSD1 Internal SATA DVD Writer The M-DIS...
48471,Electronics,LG GP65NB60 External DVD Writer (Black) LG GP6...
48472,Electronics,PIONEER DVD PLAYER DV-3052V Pioneer DV-3052 Mu...
48473,Electronics,LG DP546 DVD Player (Black) DivX-This is a for...


In [None]:
#df_merged.drop(['Household', 'Books', 'Electronics', 'Clothing & Accessories'], axis = 1)

In [None]:
# Household = [1, 0, 0, 0]
# Electronics = [0, 1, 0, 0]
# Books = [0, 0, 1, 0]
# Clo_Acce = [0, 0, 0, 1]

df_merged['Type'] = df_merged['Labels'].apply(
    lambda x: 0 if x == 'Household'
    else 1 if x == 'Electronics'
    else 2 if x == 'Books'
    else 3
)

df_merged.sample(10)

Unnamed: 0,Labels,Text,Type
41757,Electronics,TP-Link TD-W8901N 150Mbps Wireless N ADSL2+ Mo...,1
43456,Electronics,"iBall Slide Brisk 4G2 Tablet (7 inch, 16GB, Wi...",1
24441,Books,What Happens When We Die?: A Psychic's Explora...,2
38881,Clothing & Accessories,Handcuffs Fashion Warm Cycling Gloves (Standar...,3
40542,Electronics,Whitleys 3D Printer Mini External Memory Card ...,1
47313,Electronics,Hikvision DS-2CE5AD0T-IRP 3.6mm 1080P HD Indoo...,1
4700,Household,Artysta Handcrafted Wall Mirror - Decorative M...,0
39276,Clothing & Accessories,Jockey Women's Shorts Style Number : Rx15Colle...,3
31625,Clothing & Accessories,RC. ROYAL CLASS Boys & Girls Calf Length Black...,3
23887,Books,Nationalism About the Author A Bengali polymat...,2


In [None]:
df_merged['Type'].head(1).dtype

dtype('int64')

In [None]:
df_merged['Type'][1]


0

In [None]:
df_merged['Household'] = df_merged['Labels'].apply(
    lambda x: 1 if x == 'Household' else 0
)
df_merged['Electronics'] = df_merged['Labels'].apply(
    lambda x: 1 if x == 'Electronics' else 0
)
df_merged['Books'] = df_merged['Labels'].apply(
    lambda x: 1 if x == 'Books' else 0
)

df_merged['Clothing & Accessories'] = df_merged['Labels'].apply(
    lambda x: 1 if x == 'Clothing & Accessories' else 0
)

df_merged.sample(50)

Unnamed: 0,Labels,Text,Type,Household,Electronics,Books,Clothing & Accessories
43610,Electronics,Acer 18.5 inch (46.99 cm) LED Monitor - EB192Q...,1,0,1,0,0
19960,Books,The Art of War (Pocket Classics),2,0,0,1,0
34312,Clothing & Accessories,"Sojanya (Since 1958), Men's Silk Blend Harem P...",3,0,0,0,1
35385,Clothing & Accessories,Mag Men's silk Kurta Churidhar With Waistcoat,3,0,0,0,1
41562,Electronics,Sony Microvault 32GB USB Drive (Black) Backing...,1,0,1,0,0
90,Household,Theme My Party Latex Metallic HD Balloons (Gol...,0,1,0,0,0
31371,Clothing & Accessories,nauti nati Boys' Blazer,3,0,0,0,1
46676,Electronics,Night Owl Optics Xgen Xgenpro 3X Digital Night...,1,0,1,0,0
40086,Electronics,Photron EVA Shockproof Portable External Hard ...,1,0,1,0,0
27379,Books,The Silent Widow Review Praise for Tilly Bagsh...,2,0,0,1,0


### ***Splitting dataframe***

In [None]:
cols = df_merged.select_dtypes(include=['object'])
for col in cols.columns.values:
    df_merged[col] = df_merged[col].fillna('')

In [None]:
import sklearn
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df_merged['Text'], df_merged['Type'], train_size=0.2)
#y_train = y_train.astype('string')
X_train[:2]

3880    Urban Ladder Terry Study Desk with Storage (Go...
4841    Double Panel Clear Acrylic Floating Frame with...
Name: Text, dtype: object

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, Bidirectional, LSTM
from keras.optimizers import SGD, Adam, Adadelta, RMSprop

model = Sequential()

model.add(Embedding(5000, 64))
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(64)))
model.add(Dense(4, activation='softmax'))
opt = tf.keras.optimizers.Adam(learning_rate=0.001, decay=1e-6)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 64)          320000    
                                                                 
 dropout (Dropout)           (None, None, 64)          0         
                                                                 
 bidirectional (Bidirectiona  (None, 128)              66048     
 l)                                                              
                                                                 
 dense (Dense)               (None, 4)                 516       
                                                                 
Total params: 386,564
Trainable params: 386,564
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(X_train, y_train, verbose = 1, epochs = 3)

Epoch 1/3


UnimplementedError: Graph execution error:

Detected at node 'sequential/Cast' defined at (most recent call last):
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\traitlets\config\application.py", line 1043, in launch_instance
      app.start()
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelapp.py", line 728, in start
      self.io_loop.start()
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\tornado\platform\asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 1909, in _run_once
      handle._run()
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 516, in dispatch_queue
      await self.process_one()
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 505, in process_one
      await dispatch(*args)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 412, in dispatch_shell
      await result
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 740, in execute_request
      reply_content = await reply_content
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3009, in run_cell
      result = self._run_cell(
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3064, in _run_cell
      result = runner(coro)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3269, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3448, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Admin\AppData\Local\Temp\ipykernel_19168\3439237064.py", line 1, in <module>
      model.fit(X_train, y_train, verbose = 1, epochs = 3)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 993, in train_step
      y_pred = self(x, training=True)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\sequential.py", line 410, in call
      return super().call(inputs, training=training, mask=mask)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py", line 649, in _run_internal_graph
      y = self._conform_to_reference_input(y, ref_input=x)
    File "c:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py", line 761, in _conform_to_reference_input
      tensor = tf.cast(tensor, dtype=ref_input.dtype)
Node: 'sequential/Cast'
Cast string to float is not supported
	 [[{{node sequential/Cast}}]] [Op:__inference_train_function_5623]

### ***Importing BERT and getting embeding vectors for data***

In [None]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

**Example getting embeding of sentence**

In [None]:
def get_sentence_embeding(sentences):
    preprocessed_text = bert_preprocess(sentences)
    return bert_encoder(preprocessed_text)['pooled_output']

get_sentence_embeding([
    "500$ discount. hurry up", 
    "Bhavin, are you up for a volleybal game tomorrow?"]
)

**Building model**

In [None]:
#BERT Layer
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name = "text")
preprocessed_inputs = bert_preprocess(text_input)
encoded_outputs = bert_encoder(preprocessed_inputs)

#Neural network
layer = tf.keras.layers.Dropout(0.1, name='dropout') (encoded_outputs['pooled_output'])
layer= tf.keras.layers.Dense(4, activation='sigmoid', name="output")(layer)

#Construct the final model
model = tf.keras.Model(inputs=[text_input], outputs=[layer])

In [None]:
encoded_outputs['pooled_output'][4]

<KerasTensor: shape=(768,) dtype=float32 (created by layer 'tf.__operators__.getitem_3')>

In [None]:
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 keras_layer (KerasLayer)       {'input_word_ids':   0           ['text[0][0]']                   
                                (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128),                                                          
                                 'input_type_ids':                                                
                                (None, 128)}                                                

In [None]:
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), optimizer='adam',
              metrics=[tf.keras.metrics.BinaryAccuracy(name = 'Accuracy'),
                       tf.keras.metrics.Precision(name = 'Precision'), 
                       tf.keras.metrics.Recall(name = 'Recall')])

In [None]:
y_train = y_train.astype(str)

In [None]:
y_train.head(10)

36681    [0, 0, 0, 1]
35662    [0, 0, 0, 1]
26569    [0, 0, 1, 0]
7349     [1, 0, 0, 0]
40428    [0, 1, 0, 0]
44173    [0, 1, 0, 0]
46171    [0, 1, 0, 0]
4267     [1, 0, 0, 0]
19940    [0, 0, 1, 0]
21308    [0, 0, 1, 0]
Name: Type, dtype: object

In [None]:
from keras.models import Sequential
from keras.layers import Dense

MODEL = Sequential()
MODEL.add(Dense(20, input_dim = 768, kernel_initializer='he_uniform', activation='relu'))
MODEL.add(Dense(4, activation = 'sigmoid'))
MODEL.compile(loss='binary_crossentropy', optimizer='adam')

In [None]:
MODEL.fit(X_train, y_train, verbose=0, epochs=3)





ValueError: in user code:

    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\Admin\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\input_spec.py", line 277, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential_12" "                 f"(type Sequential).
    
    Input 0 of layer "dense_24" is incompatible with the layer: expected axis -1 of input shape to have value 768, but received input with shape (None, 1)
    
    Call arguments received by layer "sequential_12" "                 f"(type Sequential):
      • inputs=tf.Tensor(shape=(None, 1), dtype=string)
      • training=True
      • mask=None


In [None]:
import numpy as np
y_train = np.asarray(y_train).astype(np.int64)
model.fit(X_train, y_train, epochs = 10)