In [None]:
import torch.nn as nn
import torch
import numpy as np

In [None]:
X = np.load('Data\\X.npy', allow_pickle=True)
y = np.load('Data\\y.npy', allow_pickle=True)

In [None]:
class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        
        self.fc11 = nn.Linear(468, 24, bias=True)
        self.fc12 = nn.Linear(24, 13, bias=True) 
                
    def forward(self, x):
        x1 = torch.tanh(self.fc11(x))
        x1 = self.fc12(x1)     
        return x1

In [None]:
PATH = 'Data\\model_checkpoint.pt'
device = torch.device('cpu')
net = Net1()
net.load_state_dict(torch.load(PATH, map_location = device))

In [None]:
error = nn.CrossEntropyLoss()

In [None]:
with torch.no_grad():
    net.eval()
    inputs = torch.FloatTensor(X)
    labels = torch.tensor(y, dtype=torch.long)
    inputs, labels = inputs.to(device), labels.to(device)
    outputs = net.forward(inputs)
    loss = error(outputs, labels) 
    print(loss.item())

In [None]:
param_dict = {}
for v,p in enumerate(net.parameters()):
    param_dict[v] = p.data.numpy()

In [None]:
W0 = param_dict[0].T
b0 = param_dict[1]
W1 = param_dict[2].T
b1 = param_dict[3]

In [None]:
h0 = np.matmul(X, W0) + b0
h1 = np.tanh(h0)
h2 = np.matmul(h1, W1) + b1
h3 = np.exp(h2)
o = h3/np.sum(h3,axis=1).reshape(-1,1)

In [None]:
o.shape

In [None]:
np.save('Data\\param_dict.npy', param_dict)

## Predict Function

In [1]:
import numpy as np
import nltk
from nltk.stem import SnowballStemmer
from pattern.es import parsetree
import unidecode
import re
from textblob import TextBlob

In [33]:
palabras_funcionales = nltk.corpus.stopwords.words("spanish")    
stemmer = SnowballStemmer('spanish')

def trim_sent(sentence):    
    return ' '.join(sentence.split())

def prepare_text(text): 
    try:
        text = trim_sent(text).lower()
        return text
    except Exception as e:
        print('Exception en prepare_text: {0}'.format(e))
        return None       


def hasNumbers(string):
    return bool(re.search(r'\d', string))


def hasBC(string):
    i = string.find('/')
    return bool(i != -1)


def other_check(token):    
    b1 = not hasNumbers(token)
    b2 = not hasBC(token)
    return (b1 and b2)

def remove_accent(word):
    return unidecode.unidecode(word)

def stem_lemma(word):     
    word = parsetree(word, lemmata=True)[0].lemmata[0]
    word = stemmer.stem(word) 
    return word


def token_and_clean(texto): 
    tokens = nltk.word_tokenize(texto, "spanish")
    token_list = []
    for token in tokens:        
        if token not in palabras_funcionales:
            token = stem_lemma(token)
            token = remove_accent(token)
            if len(token) >= 2 and other_check(token):
                token_list.append(token)                
        
    return token_list   


def vectorize_phrase(texto, vocab):
    try:
        tokens = token_and_clean(texto)
        vector = np.zeros(len(vocab))
        for t in tokens:
            if t in vocab:
                vector[vocab.index(t)] = 1
        return vector
    
    except Exception as e:
        print('Exception en vectorize_phrase: {0}'.format(e))
        return None  
    

def n_token(sentence):
    token_list = token_and_clean(sentence) 
    return len(token_list) 


def polarity_and_lang(message): #blob has a limit on api calls
    
    try:
        if len(message) > 2:
    
            blob = TextBlob(message)    
        
            leng = blob.detect_language()
            text = ''
            if leng == 'es':
                blob = blob.translate(to='en').lower() 
                text = message
            else:
                blob = blob.lower() 
                text = blob.translate(to='es').lower().raw 
            
            pol = blob.sentiment[0]        
        else:
            print('Se paso a polarity_and_lang un texto menor que 3 caracters')
            pol = 0
            text = message            
            
        
    except Exception as e:
            print('Exception en polarity_and_lang: {0}'.format(e))
            pol = 0
            text = None            
    
    return (pol, text)


def percent_greet(sentence):
    tgreet = ['hol', 'buen', 'tard', 'dia', 'noch']
    count = 0
    tokens = token_and_clean(sentence)
    for w in tokens:       
        if w in tgreet:
            count += 1  
    if len(tokens) > 0:
        return count/len(tokens)
    else:
        return 0


def pred_prob(text):
    try:        
    
        vocab = np.load('Data\\vocab.npy', allow_pickle=True)
        vocab = list(vocab)
    
        ldata = np.load('Data\\param_dict.npy', allow_pickle=True)
        param_dict = ldata.item() 
        W0 = param_dict[0].T
        b0 = param_dict[1]
        W1 = param_dict[2].T
        b1 = param_dict[3]
        
        pol, text = polarity_and_lang(text)
        
        if text:
    
            x = vectorize_phrase(text, vocab)
            if x.any():
                x = np.append(x, n_token(text))
                x = np.append(x, percent_greet(text))
                x = np.append(x, pol)   
    
                h0 = np.matmul(x, W0) + b0
                h1 = np.tanh(h0)
                h2 = np.matmul(h1, W1) + b1
                h3 = np.exp(h2)
                prob = h3/np.sum(h3)
        
                return (prob, pol, text)  
            else:
                return(None, None, None)
        
        else:
            return (None, None, None)
    
    except Exception as e:
        print('Exception en predTop_prob: {0}'.format(e))
        return (None, None, None)
    
    
def predict_topic(sentence):
    topics = ['Jubilacion Patronal', 'Consultoria', 'Renuncia/Despido/Desahucio', 'IESS', 
                 'Greeting', 'Contacto', 'No Topic', 'Queja', 'Otros servicios', 'Charlas/Capacitaciones', 
                      'Hi Five', 'job seeker', 'Facturacion/Retencion/Cobros']
    
    sentence = prepare_text(sentence)
    
    try:
    
        if sentence:
            prob, pol, text = pred_prob(sentence)
            if prob.all():
                return (topics[np.argmax(prob)], pol, text)
            else:
                return('No Topic', 0, None) 
        else:
            return('No Topic', 0, None) 
            
    except Exception as e:
        print('Exception en predict_topic: {0}'.format(e))
        return('No Topic', 0, None)  
    

In [40]:
text = 'Estan buscando perfiles, donde puedo enviar mi cv?'
text = 'Hello my name is Roberto Valdez'
text = 12
text = 'si'

In [41]:
predict_topic(text)

Se paso a polarity_and_lang un texto menor que 3 caracters


('No Topic', 0, 'si')