<img src="unicamp.png" width="150" height="150">

# Talk to Me

Run the entire notebook to the last cell.

In [None]:
# Default
import os

# Numerical and IO
import numpy as np
import pandas as pd

# NLP
import pickle
import nltk
from nltk.tokenize import word_tokenize
from gensim.models.doc2vec import Doc2Vec, TaggedDocument

## Main Functions

In [None]:
def save(file: str, data):
    
    folder = 'pickles/'
    with open(f'{folder}{file}.pickle', 'wb') as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

def load(file: str):

    folder = 'pickles/'
    with open(f'{folder}{file}.pickle', 'rb') as handle:
        pick = pickle.load(handle)
    
    return pick

In [None]:
def load_models():
    
    emb_model = load('doc2vec')
    
    subject = load('sub_model')
    que_models = {}
    
    for i in range(1, 17):
        que_models[str(i)] = load(f'{i}_que_model')
    
    return emb_model, sub_model, que_models

In [None]:
def load_classes():
    
    class_sub = load('class_sub')
    class_que = load('class_que')
    
    return class_sub, class_que

In [None]:
def get_embedding(text: str, emb_model):
    
    tokenized = word_tokenize(text.lower())
    return emb_model.infer_vector(tokenized)

In [None]:
def get_answer(emb_model, sub_model, que_models, question):
    
    q = get_embedding(question, emb_model)
    pred = emb_model.predict(q)
    
    ans = ques_models[str(pred)].predict(q)
    
    return f'Goal: {class_sub[str(pred)]} / Answer: {class_que[str(ans)]}'

In [None]:
# Load stuff
emb_model, sub_model, que_models = load_models()
class_sub, class_que = load_classes()

## Test It

In [None]:
while True:
    
    print("Hello, welcome to Sustainable Development Goals FAQ from United Nations")
    print("Type 'exit' to leave")
    question = input('Type your question: ')
    if question == 'exit':
        break
        
    answer = get_answer(emb_model, 
               sub_model, 
               que_models, 
               class_sub,
               class_que,
               question)
    
    print(answer)