In [6]:
import json
import csv
import random
import torch
import torch.nn as nn
import numpy as np
# use this library https://github.com/facebookresearch/fastText/tree/master/python
import fastText


In [194]:
trainset = []
with open('data/intents_train.csv', 'r') as f:
    reader = csv.reader(x.replace('\0', '') for x in f)
    for line in reader:
        trainset.append(line)
trainset = np.array(trainset)

testset = []
with open('data/intents_test.csv', 'r') as f:
    reader = csv.reader(x.replace('\0', '') for x in f)
    for line in reader:
        testset.append(line)
testset = np.array(testset)

labels = list(set(trainset[:,1]))
lab2id = {}
id2lab = {}

for i in range(len(labels)):
    lab2id[labels[i]] = i
    id2lab[i] = labels[i]

In [3]:
sv_model = fastText.load_model('data/cc.sv.300.bin')
en_model = fastText.load_model('data/cc.en.300.bin')

In [51]:
def sentence_vec(sentence, model = en_model):
    result = np.zeros((1, 300))
    sentence = sentence.strip()
    for word in sentence:
        result += model.get_word_vector(word.lower())
    return result/len(sentence)
    

In [104]:
def prepare_pair(label, sentence):
    return (lab2id[label]),(sentence_vec(sentence))

def prepare_pairs(data):
    
    labels = []
    vectors = []
    for sample in data:
        l, v = prepare_pair(sample[1], sample[3])
        labels.append(l)
        vectors.append(v)
        
    return labels, vectors

In [238]:
class Baseline(nn.Module):
    def __init__(self, in_size = 300, out_size = 10):
        super(Baseline, self).__init__()

        self.W = nn.Linear(300, 10)
        self.out = nn.LogSoftmax(2)
        
    def forward(self, x):
        x = self.W(x)
        return self.out(x)

net = Baseline()

In [239]:
optimizer = torch.optim.Adam(net.parameters())
criterion = torch.nn.NLLLoss()


In [240]:
def train(model, criterion, optimizer, labels, vectors):
    model.zero_grad()
    loss = 0
    
    vectors = torch.tensor(vectors).float()
    labels = torch.tensor(labels)
    
    model_out = model.forward(vectors)
    loss += criterion(model_out[:,0], labels)
    
    loss.backward()
    optimizer.step()
    
    return loss.item()/len(labels)

In [241]:
def eval(model, labels, vectors):
    with torch.no_grad():
        vectors = torch.tensor(vectors).float()
        labels = torch.tensor(labels)
    
        model_out = model.forward(vectors)
        loss = criterion(model_out[:,0], labels)
        return loss.item()
    
        

In [242]:
labs, vecs = prepare_pairs(trainset)
labst, vecst = prepare_pairs(testset)
for i in range(3500):f
    loss = train(net, criterion, optimizer, labs, vecs)
    if not i% 100:
        eval_loss = eval(net, labst, vecst)
        print('#{:4d}, train loss: {:3f}, eval loss: {:3f}'.format(i, loss, eval_loss))

#   0, train loss: 0.007361, eval loss: 2.287059
# 100, train loss: 0.006461, eval loss: 2.071101
# 200, train loss: 0.005928, eval loss: 1.929068
# 300, train loss: 0.005478, eval loss: 1.812580
# 400, train loss: 0.005100, eval loss: 1.717840
# 500, train loss: 0.004780, eval loss: 1.640214
# 600, train loss: 0.004507, eval loss: 1.575833
# 700, train loss: 0.004270, eval loss: 1.521726
# 800, train loss: 0.004063, eval loss: 1.475655
# 900, train loss: 0.003880, eval loss: 1.435946
#1000, train loss: 0.003716, eval loss: 1.401346
#1100, train loss: 0.003569, eval loss: 1.370913
#1200, train loss: 0.003436, eval loss: 1.343926
#1300, train loss: 0.003314, eval loss: 1.319831
#1400, train loss: 0.003203, eval loss: 1.298197
#1500, train loss: 0.003101, eval loss: 1.278684
#1600, train loss: 0.003006, eval loss: 1.261021
#1700, train loss: 0.002918, eval loss: 1.244990
#1800, train loss: 0.002836, eval loss: 1.230413
#1900, train loss: 0.002759, eval loss: 1.217140
#2000, train loss: 0

In [243]:
def eval_visual(model, labels, vectors):
    with torch.no_grad():
        vectors = torch.tensor(vectors).float()
        labels = torch.tensor(labels)
    
        model_out = model.forward(vectors)
        right = 0
        wrong = 0
        for i  in range(len(model_out)):
            k, v = model_out[i].topk(1)
            predicted, true = v.item(), labels[i].item()
            if predicted == true:
                right +=1
            else:
                print(id2lab[predicted], id2lab[true])
                wrong +=1
                
        print('{} out of {}'.format(right, right+wrong))

In [244]:
eval_visual(net, labst, vecst)

SearchPlace RequestRide
BookRestaurant GetWeather
GetPlaceDetails GetWeather
BookRestaurant GetTrafficInformation
SearchPlace GetPlaceDetails
GetPlaceDetails GetTrafficInformation
GetTrafficInformation GetDirections
GetPlaceDetails GetWeather
GetDirections GetPlaceDetails
GetTrafficInformation SearchPlace
SearchPlace ShareETA
SearchPlace GetPlaceDetails
GetPlaceDetails GetTrafficInformation
GetTrafficInformation GetWeather
GetPlaceDetails ComparePlaces
SearchPlace BookRestaurant
BookRestaurant GetTrafficInformation
SearchPlace RequestRide
GetWeather GetPlaceDetails
GetDirections GetPlaceDetails
GetWeather GetPlaceDetails
GetPlaceDetails GetWeather
GetTrafficInformation GetWeather
BookRestaurant GetWeather
BookRestaurant GetTrafficInformation
53 out of 78
