# **Load the Required Library**

In [None]:
import pandas as pd
import numpy as np
import os

import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from transformers import TFBertModel
import transformers

## **Load MetaData**

In [None]:
df = pd.read_csv("../input/feedback-prize-effectiveness/train.csv")
df.head()

In [None]:
df['discourse_type'].unique()

## **BERT Encoder**

In [None]:
def bert_encode(texts, tokenizer, max_len=256):
    input_ids = []
    token_type_ids = []
    attention_mask = []
    
    for text in texts:
        token = tokenizer(text, max_length=256, truncation=True, padding='max_length',add_special_tokens=True)
        input_ids.append(token['input_ids'])
        token_type_ids.append(token['token_type_ids'])
        attention_mask.append(token['attention_mask'])
        
    return np.array(input_ids), np.array(token_type_ids), np.array(attention_mask)

### **BERT Tokenizer**

In [None]:
tokenizer = transformers.BertTokenizer.from_pretrained("../input/huggingface-bert-variants/bert-base-cased/bert-base-cased")
tokenizer.save_pretrained('.')

In [None]:
# Adding [SEP] for the Input Data
df['inputs'] = df.discourse_type + '[SEP]' +df.discourse_text

In [None]:
# Creating Label
new_label = {"discourse_effectiveness": {"Ineffective": 0, "Adequate": 1, "Effective": 2}}
df = df.replace(new_label)
df = df.rename(columns = {"discourse_effectiveness": "label"})

In [None]:
df.head()

## **Connecting With TPU**

In [None]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

### **Train Test Split the Data**

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(df['inputs'], df['label'], test_size=0.12, random_state=42)

#### **BERT Enconding the Input Data**

In [None]:
X_train = bert_encode(X_train.astype(str), tokenizer)
X_valid = bert_encode(X_valid.astype(str), tokenizer)

y_train = y_train.values
y_valid = y_valid.values

#### **Generate Train and Validation Dataset**

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

train_dataset = (tf.data.Dataset.from_tensor_slices((X_train, y_train)).repeat().shuffle(2048).batch(16).prefetch(AUTO))
valid_dataset = (tf.data.Dataset.from_tensor_slices((X_valid, y_valid)).batch(16).cache().prefetch(AUTO))

In [None]:
train_dataset

### **Main Model**

In [None]:
def build_model(bert_model, max_len=256):    
    input_ids = Input(shape=(max_len,), dtype=tf.int32, name="input_ids")
    token_type_ids = Input(shape=(max_len,), dtype=tf.int32, name="token_type_ids")
    attention_mask = Input(shape=(max_len,), dtype=tf.int32, name="attention_mask")

    sequence_output = bert_model(input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)[0]
    clf_output = sequence_output[:, 0, :]
    clf_output = Dropout(.1)(clf_output)
    out = Dense(3, activation='softmax')(clf_output)
    
    model = Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=out)
    model.compile(Adam(lr=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    return model

## **Model Training**

In [None]:
with tpu_strategy.scope():
    transformer_layer = (TFBertModel.from_pretrained('bert-base-cased'))
    model = build_model(transformer_layer, max_len=256)
    
    model.summary()
    
    model.fit(train_dataset,steps_per_epoch=200, validation_data=valid_dataset,epochs=5)

# **Test Submission**

In [None]:
test = pd.read_csv("../input/feedback-prize-effectiveness/test.csv")
test['text'] = test.discourse_type + '[SEP]' +test.discourse_text
test.head()

### **BERT Encoding the Test Data**

In [None]:
test_text = bert_encode(test.text.astype(str), tokenizer)

In [None]:
sub = pd.read_csv("../input/feedback-prize-effectiveness/sample_submission.csv")
sub.head()

### **Model Prediction**

In [None]:
preds = model.predict(test_text, verbose=1)
preds

In [None]:
sub['Ineffective'] = preds[:,0]
sub['Adequate'] = preds[:,1]
sub['Effective'] = preds[:,2]
sub

#### **Model Submission File**

In [None]:
sub.to_csv("submission.csv", index=False)