# Inference Using TeamSynth

In [1]:
import torch
import json
import pandas as pd

## Load config files

In [2]:
import torch.nn as nn
import torch.nn.functional as F
import torch
from transformers import DistilBertModel, BertTokenizer

import numpy as np

class BERTEncoder(nn.Module):
    def __init__(self, config, is_training = True):
        super(BERTEncoder, self).__init__()
        self.enc =  DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.att = nn.Linear(768, 1)
        self.fc = nn.Linear(768, config['embedding_size'])

    def forward(self, tokens):
        src = self.enc(input_ids=tokens['input_ids'], attention_mask=tokens['attention_mask'], return_dict=True)
        src = src.last_hidden_state

        wt = self.att(src)
        # wt = [batch size, src len, 1]
        wt = torch.softmax(wt, dim=1)
        src = torch.matmul(wt.permute(0, 2, 1), src)
        # src = [batch size, 1, 768]
        src = self.fc(src)
        # src = [batch size, 1, embed_size]
        src = torch.squeeze(src, dim=1)
        # src = [batch size, embed_size]
        return src


class APP(nn.Module):

    def __init__(self, config, is_training=True):
        super(APP, self).__init__()
        self._text_encoder = BERTEncoder(config, is_training)

        self.config = config

        self.fully_connected = nn.Sequential(
            nn.Linear(config['embedding_size'], config['hidden_size']),
            nn.ReLU(),
            nn.Linear(config['hidden_size'], config['hidden_size']),
            nn.ReLU(),
            nn.Linear(config['hidden_size'], config['hidden_size']),
            nn.ReLU(),
            nn.Linear(config['hidden_size'], config['hidden_size']),
            nn.ReLU(),
            nn.Linear(config['hidden_size'], config['hidden_size']),
            nn.ReLU(),
            nn.Linear(config['hidden_size'], config['hidden_size']),
            nn.ReLU(),
            nn.Linear(config['hidden_size'], config['hidden_size']),
            nn.ReLU(),
        )

        self._fc2 = nn.Linear(config['hidden_size'], config['pers_embedding_size'])

        self.mbti_classifier = nn.Linear(config['pers_embedding_size'], 16)

        self.OCEAN_layer = nn.Linear(config['pers_embedding_size'], config['ocean_size'])

        self.O_classifier = nn.Linear(config['ocean_size'], 2)
        self.C_classifier = nn.Linear(config['ocean_size'], 2)
        self.E_classifier = nn.Linear(config['ocean_size'], 2)
        self.A_classifier = nn.Linear(config['ocean_size'], 2)
        self.N_classifier = nn.Linear(config['ocean_size'], 2)

    def get_ocean_loss(self, predictions, labels):
        criterion = nn.CrossEntropyLoss()

        OCEAN_loss = 0
        for cat in ['cOPN', 'cCON', 'cEXT', 'cAGR', 'cNEU']:
            OCEAN_loss += criterion(predictions[cat], labels[cat].long().to(device))

        return OCEAN_loss

    def get_mbti_loss(self, predictions, labels):
        criterion = nn.CrossEntropyLoss()

        mbti_loss = criterion(predictions['mbti'], torch.Tensor([labels]).long().to(device))
        return mbti_loss

    def forward(self, tokens):
        config = self.config
        # get text embeddings
        text_embeddings = self._text_encoder(tokens)

        # get personality embeddings
        personality_embeddings = self._fc2(F.relu(self.fully_connected(text_embeddings)))

        # get mbti
        mbti = self.mbti_classifier(F.relu(personality_embeddings))

        # get OCEAN
        OCEAN = F.relu(self.OCEAN_layer(F.relu(personality_embeddings)))
        O_pred = self.O_classifier(OCEAN)
        C_pred = self.C_classifier(OCEAN)
        E_pred = self.E_classifier(OCEAN)
        A_pred = self.A_classifier(OCEAN)
        N_pred = self.N_classifier(OCEAN)

        predictions = {
            'mbti': mbti,
            'cOPN': O_pred,
            'cCON': C_pred,
            'cEXT': E_pred,
            'cAGR': A_pred,
            'cNEU': N_pred,
            'personality_embeddings': personality_embeddings,
        }

        return predictions

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
config = json.load(open('config.json'))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
model = torch.load('mode_trained.pt', map_location=device)
type(model)

__main__.APP

In [5]:
# texts = pd.read_csv('emails.csv')
texts = [
    "There are some Las Vegas Delight True Beef Steak tomatoes available by Jody's desk. I wouldn't recommend taking ANY of them, as I'm sure they don't taste very good. (Yeah, right!) Tomatoes are available in limited quantity, so come and get it!",
    'You have beef-cakes up there? Are they single?',
]

## Calculate Personality Vector

In [6]:
from transformers import DistilBertTokenizerFast
tokeniser = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

In [9]:
personality_vector = torch.zeros(config['pers_embedding_size'])
for text in texts:
    tokens = tokeniser(text, add_special_tokens=True, padding=True, return_tensors='pt')
    personality_vector += model(tokens)['personality_embeddings'].squeeze(0)
personality_vector /= len(texts)
print(personality_vector)

tensor([-7633800.5000,   551317.6250, -5410424.0000, -1598371.0000,
        -9110358.0000, -6080528.0000, -5336519.5000, -7313739.0000,
        -9181208.0000, -7408060.5000, -9300112.0000, -3537681.0000,
        -7463901.0000, -5261413.0000, -6973322.0000, -9148934.0000,
         -371617.8125, -6708578.0000, -1709133.7500, -4153639.0000,
        -3452405.2500, -2069381.7500,   553418.8750, -7075484.5000,
        -4908009.5000, -6897006.0000, -9208950.0000, -1622002.2500,
        -6397691.0000, -4773759.5000, -7699783.0000, -5170881.5000,
        -9174155.0000, -9237677.0000, -7619853.5000, -9248634.0000,
        -5503710.5000, -3300319.7500, -6861029.0000, -6652827.5000,
        -3481233.0000, -9163385.0000, -3329665.5000, -3279507.7500,
        -5357075.5000, -5037922.0000, -6683646.0000, -3436851.2500,
        -3422966.5000, -7676233.0000, -3217417.0000, -7986016.0000,
         2060098.7500, -4108397.5000, -3868438.0000, -5215857.0000,
        -7992092.0000, -3480552.7500, -5024118.5

## Insert Employee into Database

In [10]:
john_metadata ={
    'name': 'John',
    'designation': 'Software Engineer',
    'email': 'john@doe.com',
    'python': True,
    'c++': True,
}

In [16]:
from vector_db import *
db = get_db('./vector_db', 'employee_example')
add_or_update_employees([[i.item() for i in list(personality_vector)]], john_metadata, db)