In [None]:
!pip install -q -U "tensorflow-text==2.8.*"
!pip install -q tf-models-official==2.7.0
!pip install transformers

In [None]:
import tensorflow
import tensorflow_hub as hub
import tensorflow_text as text
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

tensorflow.get_logger().setLevel('ERROR')

In [None]:
class model_fine_tuner():
  def __init__(self, data, label, n_classes, models, preprocessors, batch = 8, epochs = 5, optimizer = keras.optimizers.Adam(learning_rate=1e-5)):
    self.trained_models = []
    self.models = models
    self.preprocessors = preprocessors
    self.optimizer = optimizer
    self.batch = batch
    self.epochs = epochs
    self.n_classes = n_classes if n_classes>2 else 1
    self.loss = keras.losses.CategoricalCrossentropy() if n_classes>2 else keras.losses.BinaryCrossentropy()
    self.metrics = tensorflow.metrics.CategoricalAccuracy() if n_classes>2 else tensorflow.metrics.BinaryAccuracy()
    self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(data, label, test_size=0.2, random_state=40, shuffle=True, stratify=label)
  
  def build_model(self, preprocess_model, model):
    text_input = keras.layers.Input(shape=(), dtype=tensorflow.string, name='text')
    text_processed = text_input
    if(preprocess_model != None):
      preprocessing_layer = hub.KerasLayer(preprocess_model, name='preprocessing')
      text_processed = preprocessing_layer(text_input)
    encoder = hub.KerasLayer(model, trainable=True, name='encoder')
    output = encoder(text_processed)
    if(type(output)==dict):
      output = output['pooled_output']
    output = keras.layers.Dropout(0.2)(output)
    output = keras.layers.Dense(512, activation='relu', name='classifier')(output)
    output = keras.layers.Dropout(0.2)(output)
    output = keras.layers.Dense(self.n_classes, activation='softmax')(output)
    return keras.Model(text_input, output)
  
  def fine_tune(self):
    for i in range(len(self.models)):
      preprocess_model = self.preprocessors[i]
      encoder_model = self.models[i]
      print(f'Fine Tune: {preprocess_model} {encoder_model}')
      model = self.build_model(preprocess_model, encoder_model)
      model.compile(optimizer=self.optimizer, loss=self.loss, metrics=self.metrics)
      history = model.fit(self.X_train, self.Y_train, batch_size=self.batch, epochs=self.epochs, validation_data=(self.X_test, self.Y_test))
      model.save_weights(f'model{i}.h5')
      self.trained_models.append(model)
    return self.trained_models

In [None]:
class classifier_model():
  def __init__(self, data, label, n_classes, models, batch = 8, epochs = 5, optimizer = keras.optimizers.Adam(learning_rate=1e-5)):
    self.trained_model = None
    self.models = models
    self.optimizer = optimizer
    self.batch = batch
    self.epochs = epochs
    self.n_classes = n_classes if n_classes>2 else 1
    self.loss = keras.losses.CategoricalCrossentropy() if n_classes>2 else keras.losses.BinaryCrossentropy()
    self.metrics = tensorflow.metrics.CategoricalAccuracy() if n_classes>2 else tensorflow.metrics.BinaryAccuracy()
    self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(data, label, test_size=0.2, random_state=40, shuffle=True, stratify=label)
  
  def build_model(self):
    text_input = keras.layers.Input(shape=(), dtype=tensorflow.string, name='text')
    encoder_outputs = []
    for enc in self.models:
      encoder_layer = keras.Model(enc.input, enc.get_layer('encoder').output)
      encoder_layer.trainable = False
      output = encoder_layer(text_input)
      if(type(output)==dict):
        output = output['pooled_output']
      encoder_outputs.append(output)
    output = keras.layers.Concatenate()(encoder_outputs)
    model_dense = keras.Sequential([
        keras.layers.Dense(1024, activation='relu'),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(512, activation='relu'),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(self.n_classes, activation='softmax')
    ])
    output = model_dense(output)
    return keras.Model(text_input, output)
  
  def train(self):
    self.trained_model = self.build_model()
    self.trained_model.compile(optimizer=self.optimizer, loss=self.loss, metrics=self.metrics)
    print(f'Training Classifier Model')
    history = self.trained_model.fit(self.X_train, self.Y_train, batch_size=self.batch, epochs=self.epochs, validation_data=(self.X_test, self.Y_test))
    return history

In [None]:
import os
os.chdir('/content/drive/MyDrive/Data/dataset')

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from official.nlp import optimization

In [None]:
data_raw = pd.read_csv('../input/ecommerce467/data_467.csv')

In [None]:
def processText(df):
  df = df.str.replace(r'[^a-z]', " ")
  df = df.apply(lambda x: ' '.join(x.split()))
  return df

In [None]:
data_raw['Text'] = processText(data_raw['Text'])

In [None]:
label = data_raw['BROWSE_NODE_ID'].values
data = data_raw['Text'].values

In [None]:
label_t = label.reshape(-1,1)
onehot_encoder = OneHotEncoder(sparse=False)
label_encoded = onehot_encoder.fit_transform(label_t)

In [None]:
model_list = [
              'https://tfhub.dev/google/electra_base/2','https://tfhub.dev/google/universal-sentence-encoder/4',
              'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3'
]

In [None]:
processor_list = [
                  'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3', None,
                  'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'
]

In [None]:
epochs = 8
steps_per_epoch = 46700
num_train_steps = steps_per_epoch * epochs
num_warmup_steps = int(0.1*num_train_steps)

init_lr = 1e-2
optimizer = optimization.create_optimizer(init_lr=init_lr,
                                          num_train_steps=num_train_steps,
                                          num_warmup_steps=num_warmup_steps,
                                          optimizer_type='adamw')

In [None]:
fine_tuner = model_fine_tuner(data, label_encoded, 467, model_list, processor_list, 32, 8, optimizer)

In [None]:
tuned_models = fine_tuner.fine_tune()

In [None]:
class_model = classifier_model(data, label_encoded, 467, fine_tuner.trained_models, 16, 1)

In [None]:
class_model.train()