In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
import torch
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split


In [2]:
df = pd.read_csv('train.csv')
df

Unnamed: 0,text,intent
0,What expression would i use to say i love you ...,translate
1,Can you tell me how to say 'i do not speak muc...,translate
2,"What is the equivalent of, 'life is good' in f...",translate
3,"Tell me how to say, 'it is a beautiful morning...",translate
4,"If i were mongolian, how would i say that i am...",translate
...,...,...
14995,Can you explain why my card was declined?,card_declined
14996,How come starbucks declined my card when i tri...,card_declined
14997,How come my card was not accepted yesterday?,card_declined
14998,Find out what happened to make my card get dec...,card_declined


In [3]:
df.intent.value_counts()

translate          100
order_status       100
goodbye            100
account_blocked    100
what_song          100
                  ... 
reminder           100
change_speed       100
tire_pressure      100
no                 100
card_declined      100
Name: intent, Length: 150, dtype: int64

In [4]:
model = SentenceTransformer('all-MiniLM-L12-v2')

In [5]:
text_embedding = model.encode(df['text'])
df['text_embedding'] = text_embedding.tolist()

In [6]:
intent_df = pd.DataFrame({'intent': df.intent.unique()})
intent_embedding = model.encode(intent_df['intent'])
intent_df['intent_embedding'] = intent_embedding.tolist()

In [7]:
df = pd.merge(df, intent_df, on='intent', how='left')
df

Unnamed: 0,text,intent,text_embedding,intent_embedding
0,What expression would i use to say i love you ...,translate,"[-0.10310590267181396, 0.04699191451072693, 0....","[-0.0713222399353981, 0.06910885125398636, -0...."
1,Can you tell me how to say 'i do not speak muc...,translate,"[-0.011398524045944214, 0.002708705607801676, ...","[-0.0713222399353981, 0.06910885125398636, -0...."
2,"What is the equivalent of, 'life is good' in f...",translate,"[-0.009722260758280754, 0.030285345390439034, ...","[-0.0713222399353981, 0.06910885125398636, -0...."
3,"Tell me how to say, 'it is a beautiful morning...",translate,"[-0.09516633301973343, 0.07590290158987045, 0....","[-0.0713222399353981, 0.06910885125398636, -0...."
4,"If i were mongolian, how would i say that i am...",translate,"[0.030387032777071, 0.07106082141399384, 0.027...","[-0.0713222399353981, 0.06910885125398636, -0...."
...,...,...,...,...
14995,Can you explain why my card was declined?,card_declined,"[0.0572940967977047, 0.09501287341117859, -0.0...","[0.02620515786111355, 0.0372733473777771, -0.0..."
14996,How come starbucks declined my card when i tri...,card_declined,"[0.019909696653485298, 0.0027765571139752865, ...","[0.02620515786111355, 0.0372733473777771, -0.0..."
14997,How come my card was not accepted yesterday?,card_declined,"[-0.010522366501390934, 0.05766335874795914, -...","[0.02620515786111355, 0.0372733473777771, -0.0..."
14998,Find out what happened to make my card get dec...,card_declined,"[0.00903637707233429, 0.07026778161525726, -0....","[0.02620515786111355, 0.0372733473777771, -0.0..."


In [9]:
X_train, X_test, y_train, y_test = train_test_split(df, df['intent'], test_size=0.2, stratify=df['intent'], random_state=42)


### Prototype Model

In [10]:
model_mean_loc = {}

for intent in df.intent.unique():
    model_mean_loc[intent] = torch.FloatTensor(np.mean(
        np.vstack(X_train[X_train.intent == intent].text_embedding), axis=0))


In [11]:
def get_prediction(row, model_mean):
    prediction = None
    max_cosine_similarity = -1
    for intent in model_mean.keys():
        cosine_sim = util.cos_sim(row['text_embedding'], model_mean[intent]).item()
        if cosine_sim > max_cosine_similarity:
            max_cosine_similarity = cosine_sim
            prediction = intent
    return prediction
    
X_test['pred_prototype'] = X_test.apply(lambda row:  get_prediction(row, model_mean_loc), axis=1) 

In [12]:
print(classification_report(X_test['intent'], X_test['pred_prototype']))

                           precision    recall  f1-score   support

      accept_reservations       0.87      1.00      0.93        20
          account_blocked       0.95      1.00      0.98        20
                    alarm       0.95      1.00      0.98        20
       application_status       1.00      1.00      1.00        20
                      apr       0.95      1.00      0.98        20
            are_you_a_bot       0.83      1.00      0.91        20
                  balance       1.00      1.00      1.00        20
             bill_balance       0.95      1.00      0.98        20
                 bill_due       0.90      0.95      0.93        20
              book_flight       1.00      1.00      1.00        20
               book_hotel       0.86      0.95      0.90        20
               calculator       1.00      0.90      0.95        20
                 calendar       1.00      0.75      0.86        20
          calendar_update       0.87      1.00      0.93     

In [22]:
def get_prediction_for_string(str_, model_mean_loc, model):
    embedding = model.encode([str_])
    row = {}
    row['text_embedding'] = embedding
    return get_prediction(row, model_mean_loc)

In [28]:
str_ = 'What are the conditions for getting a loan?'
get_prediction_for_string(str_, model_mean_loc, model)

'credit_limit'

### Prototype Model - using complete data

In [14]:
model_mean_loc = {}

for intent in df.intent.unique():
    model_mean_loc[intent] = torch.FloatTensor(np.mean(
        np.vstack(df[df.intent == intent].text_embedding), axis=0))


In [37]:
test_df = pd.read_csv('test.csv')
text_embedding = model.encode(test_df['text'])
test_df['text_embedding'] = text_embedding.tolist()

In [38]:
test_df['predicted_intent'] = None
test_df['probability'] = None

In [39]:
def get_prediction_and_prob(row, model_mean):
    prediction = None
    max_cosine_similarity = -1
    for intent in model_mean.keys():
        cosine_sim = util.cos_sim(row['text_embedding'], model_mean[intent]).item()
        if cosine_sim > max_cosine_similarity:
            max_cosine_similarity = cosine_sim
            prediction = intent
    
    return prediction, max_cosine_similarity


test_df[['predicted_intent', 'probability']] = test_df.apply(lambda row: 
                                                             get_prediction_and_prob(row, model_mean_loc), 
                                                             axis=1,
                                                             result_type='expand'
                                                            )

In [42]:
test_df[['text', 'predicted_intent', 'probability']].to_csv('test.csv', index=False)

### MLP Model

Next steps would be to train an MLP but since the prototype model has a very good performance, I don't venture into this but have provided sample code for the same

In [13]:
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

class MLPWithResiduals(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, dropout_p=0.5):
        super(MLPWithResiduals, self).__init__()

        self.layers = nn.ModuleList()
        prev_size = input_size

        for hidden_size in hidden_sizes:
            self.layers.append(nn.Linear(prev_size, hidden_size))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(dropout_p))
            prev_size = hidden_size

        self.output = nn.Linear(prev_size, output_size)

    def forward(self, x):
        residual = x

        for i, layer in enumerate(self.layers):
            if i % 3 == 0 and i > 0:
                x = layer(x + residual)
            else:
                x = layer(x)

            if i % 3 == 2:
                residual = x

        x = self.output(x)
        return nn.functional.log_softmax(x, dim=1)


def train(model, optimizer, scheduler, criterion, dataloader):
    model.train()
    running_loss = 0

    for data, target in dataloader:
        optimizer.zero_grad()
        data = data.view(data.size(0), -1)
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    scheduler.step(running_loss)

    return running_loss / len(dataloader)


def validate(model, criterion, dataloader):
    model.eval()
    running_loss = 0

    with torch.no_grad():
        for data, target in dataloader:
            data = data.view(data.size(0), -1)
            output = model(data)
            loss = criterion(output, target)
            running_loss += loss.item()

    return running_loss / len(dataloader)

def early_stopping(patience, epoch_losses):
    if len(epoch_losses) < patience:
        return False

    return epoch_losses[-patience] <= min(epoch_losses[-patience + 1:])


input_size = 784
hidden_sizes = [128, 128]
output_size = 150
dropout_p = 0.25
learning_rate = 0.001
weight_decay = 1e-5
epochs = 100
patience = 10

optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=5, verbose=True)
criterion = nn.NLLLoss()

train_losses = []
val_losses = []


# for epoch in range(epochs):
#     train_loss = train(model, optimizer, scheduler, criterion, train_loader)
#     val_loss = validate(model, criterion, test_loader)
#     train_losses.append(train_loss)
#     val_losses.append(val_loss)

#     print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

#     if early_stopping(patience, val_losses):
#         print("Early stopping triggered.")
#         break