<a href="https://colab.research.google.com/github/kky-ai/tech-demo/blob/main/semantic_continuity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Semantic Continuity Model - Example usage


##### Download model

In [None]:
# Downlaod the SC model from GDrive

!gdown 1ok3CbvVeEv4J5Y0SkNoVk4ifdHnQd4qy && unzip -u sc.zip

##### Install libs

In [None]:
!pip install transformers==4.18
#!pip install tensorflow==2.6.2
!pip install numpy

##### Initialize the model

In [None]:
from transformers import AutoTokenizer
from transformers.optimization_tf import WarmUp
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

class SCModel:
    """ Semantic Continuity Model """

    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
        self.pad_token_id = self.tokenizer.pad_token_id
        self.cls_token_id = self.tokenizer.cls_token_id
        self.sep_token_id = self.tokenizer.sep_token_id
        self.mask_token_id = self.tokenizer.mask_token_id

        self.model, self.embedder, self.dist_model = None, None, None
        self.load_models()

    def load_models(self):
        self.model = load_model('sc/model', custom_objects={'WarmUp': WarmUp, 'siamese_loss': None})
        self.embedder = self.model.get_layer('embedder')
        self.dist_model = self.model.get_layer('dist_model')
        print('Models loaded.')

    def vectorize_seq(self, seq, max_len):
        seq = self.tokenizer.tokenize(seq)
        input_ids = [self.cls_token_id, *self.tokenizer.convert_tokens_to_ids(seq), self.sep_token_id]
        input_ids = pad_sequences([input_ids], maxlen=max_len, dtype=np.int32, padding='post', value=self.pad_token_id)
        return input_ids
        
    def score(self, prompt, reaction):
        _, pemb = self.embedder.predict(self.vectorize_seq(prompt, max_len=32))
        remb, _ = self.embedder.predict(self.vectorize_seq(reaction, max_len=64))
        dist, prob = self.dist_model.predict([pemb, remb])
        return float(dist)
    

sc = SCModel()

##### Example usage

In [7]:
q = 'What did you have for lunch?'

following = (
    'For lunch we had some fish and chips.',
    'We did not eat at all.',
    'We visited the city center. Then we had fish and chips.',
    'We visited the city center. Then we had some lunch.',
    'We\'ve been to the North London Derby. Arsenal was fantastic.'
)

print(f'Q: {q}', end='\n\n')
for context in following:
    dist = sc.score(q, context)
    print(f'[{round(dist, 4)}] {context}')

Q: What did you have for lunch?

[0.0305] For lunch we had some fish and chips.
[0.0432] We did not eat at all.
[0.4193] We visited the city center. Then we had fish and chips.
[1.5441] We visited the city center. Then we had some lunch.
[1.832] We've been to the North London Derby. Arsenal was fantastic.
