In [None]:
#import import_ipynb
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import transformers
import concurrent.futures

batch_size = 32
max_length = 512 

# Labels in our dataset.
labels = ["contradiction", "entailment", "neutral"]


class BertSemanticDataGenerator(tf.keras.utils.Sequence):
    """Generates batches of data.

    Args:
        sentence_pairs: Array as input sentences.
        labels: Array of labels.
        batch_size: Integer batch size.
        shuffle: boolean, whether to shuffle the data.
        include_targets: boolean, whether to incude the labels.

    Returns:
        Tuples `([input_ids, attention_mask, `token_type_ids], labels)`
        (or just `[input_ids, attention_mask, `token_type_ids]`
         if `include_targets=False`)
    """

    def __init__(
        self,
        sentence_pairs,
        labels,
        batch_size=batch_size,
        shuffle=True,
        include_targets=True,
    ):
        self.sentence_pairs = sentence_pairs
        self.labels = labels
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.include_targets = include_targets
        # Load our BERT Tokenizer to encode the text.
        # We will use base-base-uncased pretrained model.
        self.tokenizer = transformers.BertTokenizer.from_pretrained(
            "bert-base-uncased", do_lower_case=True
        )
        self.indexes = np.arange(len(self.sentence_pairs))
        self.on_epoch_end()

    def __len__(self):
        # Denotes the number of batches per epoch.
        return len(self.sentence_pairs) // self.batch_size

    def __getitem__(self, idx):
        # Retrieves the batch of index.
        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
        sentence_pairs = self.sentence_pairs[indexes]

        # With BERT tokenizer's batch_encode_plus batch of both the sentences are
        # encoded together and separated by [SEP] token.
        encoded = self.tokenizer.batch_encode_plus(
            sentence_pairs.tolist(),
            add_special_tokens=True,
            max_length=max_length,
            return_attention_mask=True,
            return_token_type_ids=True,
            pad_to_max_length=True,
            return_tensors="tf",
        )

        # Convert batch of encoded features to numpy array.
        input_ids = np.array(encoded["input_ids"], dtype="int32")
        attention_masks = np.array(encoded["attention_mask"], dtype="int32")
        token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")

        # Set to true if data generator is used for training/validation.
        if self.include_targets:
            labels = np.array(self.labels[indexes], dtype="int32")
            return [input_ids, attention_masks, token_type_ids], labels
        else:
            return [input_ids, attention_masks, token_type_ids]

    def on_epoch_end(self):
        # Shuffle indexes after each epoch if shuffle is set to True.
        if self.shuffle:
            np.random.RandomState(42).shuffle(self.indexes)
            
#print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

#strategy = tf.distribute.MirroredStrategy()

def create_model():
    #with strategy.scope():
    # Encoded token ids from BERT tokenizer.
    input_ids = tf.keras.layers.Input(
        shape=(max_length,), dtype=tf.int32, name="input_ids"
    )
    # Attention masks indicates to the model which tokens should be attended to.
    attention_masks = tf.keras.layers.Input(
        shape=(max_length,), dtype=tf.int32, name="attention_masks"
    )
    # Token type ids are binary masks identifying different sequences in the model.
    token_type_ids = tf.keras.layers.Input(
        shape=(max_length,), dtype=tf.int32, name="token_type_ids"
    )
    # Loading pretrained BERT model.
    bert_model = transformers.TFBertModel.from_pretrained("bert-base-uncased")
    
    # Freeze the BERT model to reuse the pretrained features without modifying them.
    bert_model.trainable = False
    
    sequence_output, pooled_output = bert_model(
        input_ids, attention_mask=attention_masks, token_type_ids=token_type_ids
    )
    #print("sequence_output")
    #print(sequence_output)
    #print("sequence_output")
    #print(pooled_output)

    # Add trainable layers on top of frozen layers to adapt the pretrained features on the new data.
    bi_lstm = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(256, return_sequences=True)
    )(sequence_output)
    #print("Bi-Lstm.......")
    #print(bi_lstm)
    # Applying hybrid pooling approach to bi_lstm sequence output.
    avg_pool = tf.keras.layers.GlobalAveragePooling1D()(bi_lstm)
    #print("avg_pool.......")
    #print(avg_pool)
    max_pool = tf.keras.layers.GlobalMaxPooling1D()(bi_lstm)
    #print("max_pool.......")
    #print(max_pool)
    concat = tf.keras.layers.concatenate([avg_pool, max_pool])
    #print("concat.......")
    #print(concat)
    dropout = tf.keras.layers.Dropout(0.3)(concat)
    #print("dropout.......")
    #print(dropout)
    output = tf.keras.layers.Dense(3, activation="softmax")(dropout)
    #print("output.......")
    #print(output)
    model = tf.keras.models.Model(
        inputs=[input_ids, attention_masks, token_type_ids], outputs=output
    )

    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss="categorical_crossentropy",
        metrics=["acc"],
    )
    return model,bert_model

#print(f"Strategy: {strategy}")
model,bert_model = create_model()
#model.summary()

l = tf.train.latest_checkpoint("model/")
model.load_weights(l)

def check_(sentence_pairs):
    test_data = BertSemanticDataGenerator(
        sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
    )

    proba = model.predict(test_data)[0]
    idx = np.argmax(proba)
    proba = f"{proba[idx]: .2f}%"
    pred = labels[idx]
    return pred, proba

def CheckSim(Teacher,Student):
    
    ## if Student Answer goes greater than 512 so apply this 
    s= []
    
    while len(Student)>512:
        stop=1
        for i in range(500,512):
            if stop and (Student[i]=='.' or Student[i]==',' or Student[i]==' ' or Student[i]==':'): 
                s.append([Student[:i+1]])
                Student = Student[i+1:]
                #print(len(Student))
                stop=0
    s.append(Student)
    
    sentence_pairs = []
    
    for i in s:
        sentence_pairs.append(np.array([[str(Teacher), str(i)]]))

    result = []
    grade =  []

    with concurrent.futures.ThreadPoolExecutor() as executor:
        results= executor.map(check_, sentence_pairs)
        for i in results:
            grade.append(i[0])
            result.append(i[1])
    
    marks = []
    for i in range(0,len(grade)):
        #print(grade[i])
        #print(result[i])
        result[i] = int(result[i][3:5])
        #print("############")
        #print(grade[i])
        #print(result[i])
        if "contradiction" == grade[i]:
            if result[i]>=60:
                ans=result[i]-60
                ans= 40 - ans
                marks.append(ans)
        if "neutral" == grade[i]:
            if result[i]>=60:
                ans=result[i]-60
                ans=(ans/2)+60
                marks.append(ans)
        if "entailment" == grade[i]:
            if result[i]>=60:
                ans=result[i]-60
                ans=(ans/2)+80
                marks.append(ans)

    n =len(marks)
    m=0
    #print("Printing Marks by threads : {}".format(marks))
    for i in range(0,n):
        m = m+marks[i]
    return m/n
    
#sentence1 = "It should be re-engineered or replaced if suitable system is available, this is so because of its high business value it contributes a lot to the business."
#sentence2 = "High Quality with High Business value as the system is somehow important to the business and as the change cannot be avoided further in this system and is running for long time."

#CheckSim(sentence1, sentence2)