# Importing library and data

In [None]:
pip install tensorflow_text

In [1]:
import tensorflow as tf
tf.__version__

'2.13.0'

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

In [109]:
import pandas as pd

dt = pd.read_csv('./datasets/ecommerceDataset.csv')
dt.head(10)

Unnamed: 0,Labels,Text
0,Household,Paper Plane Design Framed Wall Hanging Motivat...
1,Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
2,Household,SAF 'UV Textured Modern Art Print Framed' Pain...
3,Household,"SAF Flower Print Framed Painting (Synthetic, 1..."
4,Household,Incredible Gifts India Wooden Happy Birthday U...
5,Household,Pitaara Box Romantic Venice Canvas Painting 6m...
6,Household,Paper Plane Design Starry Night Vangoh Wall Ar...
7,Household,Pitaara Box Romantic Venice Canvas Painting 6m...
8,Household,SAF 'Ganesh Modern Art Print' Painting (Synthe...
9,Household,Paintings Villa UV Textured Modern Art Print F...


## **Data preprocessing**
### *Getting all categories*

In [110]:
categories = dt['Labels'].unique()
print(categories)

['Household' 'Books' 'Clothing & Accessories' 'Electronics']


In [111]:
dt.groupby('Labels').describe()

Unnamed: 0_level_0,Text,Text,Text,Text
Unnamed: 0_level_1,count,unique,top,freq
Labels,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Books,11820,6256,Think & Grow Rich About the Author NAPOLEON HI...,30
Clothing & Accessories,8670,5674,Diverse Men's Formal Shirt Diverse is a wester...,23
Electronics,10621,5308,HP 680 Original Ink Advantage Cartridge (Black...,26
Household,19313,10564,Nilkamal Series-24 Chest of Drawers (Cream Tra...,13


### *Handling dataframes*

In [112]:
df_accessories = dt[dt['Labels'] == 'Clothing & Accessories']
df_accessories.head(10)
df_accessories.shape


(8671, 2)

In [6]:
df_books = dt[dt['Labels'] == 'Books']
df_books.shape

(11820, 2)

In [7]:
df_household = dt[dt['Labels'] == 'Household']
df_household.shape

(19313, 2)

In [8]:
df_elec = dt[dt['Labels'] == 'Electronics']
df_elec.shape

(10621, 2)

### ***Balancing dataframes***

In [9]:
df_books_downsample = df_books.head(df_accessories.shape[0])
df_books_downsample.shape

(8671, 2)

In [10]:
df_household_downsample = df_household.head(df_accessories.shape[0])
df_household_downsample.shape

(8671, 2)

In [11]:
df_elec_downsample = df_elec.head(df_accessories.shape[0])
df_elec_downsample.shape
df_elec_downsample.head()

Unnamed: 0,Labels,Text
39804,Electronics,Dell 19.5V-3.34AMP 65W Laptop Adapter (Without...
39805,Electronics,Bluetooth Dongle USB CSR 4.0 Adapter Receiver ...
39806,Electronics,"Wi-Fi Receiver 300Mbps, 2.4GHz, 802.11b/g/n US..."
39807,Electronics,SanDisk 64GB Class 10 microSDXC Memory Card wi...
39808,Electronics,Gizga Essentials Laptop Power Cable Cord- 3 Pi...


### ***Merging dataframes***

In [12]:
dfs_array = [df_accessories, df_books_downsample, df_household_downsample, df_elec_downsample]
df_merged = pd.concat(dfs_array)
df_merged[df_merged['Labels'] == 'Household']

Unnamed: 0,Labels,Text
0,Household,Paper Plane Design Framed Wall Hanging Motivat...
1,Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
2,Household,SAF 'UV Textured Modern Art Print Framed' Pain...
3,Household,"SAF Flower Print Framed Painting (Synthetic, 1..."
4,Household,Incredible Gifts India Wooden Happy Birthday U...
...,...,...
8666,Household,Glync 5W LED Recessed Spot Light Ceiling Downl...
8667,Household,Philips AstraPrime 10-Watt Recessed LED Panel ...
8668,Household,D'MakTM LED Ceiling COB Spot Light - 3 Watt - ...
8669,Household,Philips AstraPrime 5-Watt Recessed LED Panel C...


### ***Adding 'type' column***

In [13]:
df_merged['Type'] = df_merged['Labels'].apply(
    lambda x: 0 if x == 'Household'
    else 1 if x == 'Books'
    else 2 if x == 'Electronics'
    else 3
)
df_merged.sample(50)

Unnamed: 0,Labels,Text,Type
37985,Clothing & Accessories,GOLDSTROMS Women's Maternity Pant/Jegging Gold...,3
1782,Household,@home by Nilkamal Fern 6 Seater Dining Table ...,0
24777,Books,The Will to Climb: Obsession and Commitment an...,1
25279,Books,Zen in the Martial Arts Review ''If one of you...,1
2071,Household,Aashi Enterprise Cane Art Contemporary Rattan ...,0
34622,Clothing & Accessories,Jockey Men's Cotton Thermal Long Pant Feel at ...,3
832,Household,Quechua Arpenaz 50 Hammock,0
45302,Electronics,Zaptin 12X Universal Optical Zoom Lens Telesco...,2
34647,Clothing & Accessories,Hanes Men's Plain Thermal Bottom Hanes Men Gre...,3
2636,Household,DecorNation Carissa Solid Wood Set of 3 Nestin...,0


### ***Splitting dataframe***

In [51]:
cols = df_merged.select_dtypes(include=['object'])
for col in cols.columns.values:
    df_merged[col] = df_merged[col].fillna('')

In [58]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df_merged[' Text'], df_merged['Type'], train_size=0.09, stratify=df_merged['Type'])
X_train.head()

44303    ZAAP ULTRA SLIM Bluetooth Universal Keyboard f...
46505    Mobias Retails, Flexible Octopus Foldable Trip...
31554    VIMAL Winter King Blended Thermal Top & Pyjama...
7248     Ontime Modern Stainless Steel Bathroom Kit Hol...
34038    CROSS LUGO Men's Genuine Leather Cut-To-Fit St...
Name:  Text, dtype: object

### ***Importing BERT and getting embeding vectors for data***

In [15]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

**Example getting embeding of sentence**

In [16]:
def get_sentence_embeding(sentences):
    preprocessed_text = bert_preprocess(sentences)
    return bert_encoder(preprocessed_text)['pooled_output']

get_sentence_embeding([
    "500$ discount. hurry up", 
    "Bhavin, are you up for a volleybal game tomorrow?"]
)

<tf.Tensor: shape=(2, 768), dtype=float32, numpy=
array([[-0.84351695, -0.5132727 , -0.88845736, ..., -0.74748874,
        -0.75314736,  0.91964495],
       [-0.87208354, -0.50543964, -0.94446677, ..., -0.8584749 ,
        -0.7174534 ,  0.88082975]], dtype=float32)>

**Building model**

In [None]:
#BERT Layer
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name = "text")
preprocessed_inputs = bert_preprocess(text_input)
encoded_outputs = bert_encoder(preprocessed_inputs)

#Neural network
layer = tf.keras.layers.Dropout(0.1, name='dropout') (encoded_outputs['pooled_output'])
layer= tf.keras.layers.Dense(1, activation='sigmoid', name="output")(layer)

#Construct the final model
model = tf.keras.Model(inputs=[text_input], outputs=[layer])

In [103]:
model.summary()

Model: "model_14"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 keras_layer (KerasLayer)       {'input_type_ids':   0           ['text[0][0]']                   
                                (None, 128),                                                      
                                 'input_word_ids':                                                
                                (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128)}                                                   

In [101]:
model.compile(loss='binary_crossentropy', optimizer='adam',
              metrics=[tf.keras.metrics.BinaryAccuracy(name = 'Accuracy'),
                       tf.keras.metrics.Precision(name = 'Precision'), 
                       tf.keras.metrics.Recall(name = 'Recall')])

In [102]:
import numpy as np
Y_train = np.asarray(Y_train).astype(np.int64)
model.fit(X_train, y_train, epochs = 3)

Epoch 1/3

KeyboardInterrupt: 