In [None]:
import json
import os
import utils

model_dir = "experiments/"
json_path = os.path.join(model_dir, 'params.json')
params = utils.Params(json_path)
params.device = torch.device(params.device)

In [None]:
from model.data_loader import DataLoader

data_dir = "data/full_version/"
data_loader = DataLoader(data_dir, params)
params.vocab_size = len(data_loader.BABYNAME.vocab)

In [None]:
import model.charRNN as net
model = net.Net(params).to(params.device)

In [None]:
weight_path = os.path.join(model_dir, "best.pth.tar")
checkpoint = utils.load_checkpoint(weight_path, model)

In [None]:
import torch
import torch.nn.functional as F

In [None]:
def sample(net, prime="A", category="boy"):
    
    category_tensor = data_loader.SEX.process(['boy']).data.sub_(1).float()
    
    prime = prime.lower()
    prime_tensor = data_loader.BABYNAME.process([prime])[:, :-1]
    bsz, prime_tensor_length = prime_tensor.size()
    
    net.eval()
    hidden = net.init_hidden(1)
    
    for step in range(prime_tensor_length):
        outputs, hidden = net(category_tensor, prime_tensor[:, step], hidden)
        probabilities = F.softmax(outputs, 1)
    
    return probabilities.squeeze()
        
probabilities = sample(model, prime="A", category="boy")

In [None]:
def calcualte_top_k(probabilities, k):
    prob, idx = torch.topk(probabilities, k)
    chars = [data_loader.BABYNAME.vocab.itos[char] for char in idx.cpu().numpy()]
    return prob.detach().cpu().numpy(), chars

In [None]:
from collections import OrderedDict, defaultdict

In [None]:
def beam_search(net, prime="A", category="boy", beam_width=3):
    print("Sampling a {} name beginning with {}..".format(category, prime))
    
    initial_probabilities = sample(net, prime=prime, category=category)
    
    _prob_dict = {}
    prob, chars = calcualte_top_k(initial_probabilities, beam_width)
    for p, c in zip(prob, chars):
        _prob_dict[prime + c] = p
        
    _prob_dict_2 = {}
    for prime, prob in _prob_dict.items():
        probabilities = sample(net, prime=prime, category=category)
        prob, chars = calcualte_top_k(probabilities, probabilities.size(0))
        for p, c in zip(prob, chars):
            _prob_dict_2[prime + c] = p
    
    print(_prob_dict_2)
    
beam_search(model)

In [None]:
def sample(net, prime, category, hidden=None):
    
    category_tensor = data_loader.SEX.process(['boy']).data.sub_(1).float()
    
    prime = prime.lower()
    prime_tensor = data_loader.BABYNAME.process([prime])[:, :-1]
    bsz, prime_tensor_length = prime_tensor.size()
    
    net.eval()
    if not hidden:
        hidden = net.init_hidden(1)
    
    for step in range(prime_tensor_length):
        with torch.no_grad():
            outputs, hidden = net(category_tensor, prime_tensor[:, step], hidden)
        probabilities = F.softmax(outputs, 1)
    
    return torch.log(probabilities.squeeze()), hidden
        
probabilities, prime_hidden = sample(model, prime="A", category="boy")

In [None]:
def clean_beam_basket(basket, beam_width):
    _tmp_basket = basket.copy()
    to_remove = sorted(_tmp_basket.items(), key=lambda x: x[1], reverse=True)[beam_width:]
    for item in to_remove:
        _tmp_basket.pop(item[0])
        
    return _tmp_basket

In [None]:
import numpy as np

In [None]:
def evaluate_creativity(name_list, dataset):
    for a_name in name_list:
        if dataset.babyname.str.contains(a_name).sum() > 0:
            verdict = "is already in the dataset"
        else:
            verdict = "is a new name!"
        print("name: {} ".format(a_name) + verdict)

In [None]:
def beam_search(net, prime="A", category="boy", beam_width=3):
    print("Sampling a {} name beginning with {}..".format(category, prime))
        
    beam_basket = OrderedDict()
    beam_basket[prime] = 0.0
    hidden_dict = defaultdict()
    
    
    counter = 0
    while True:
        counter += 1
        
        # 바스켓을 청소한다.
        beam_basket = clean_beam_basket(beam_basket, beam_width)
        
        # 만약 바스켓에 모든 아이템이 <eos>가 있으면 루프를 멈춘다.
        eos_cnt = 0
        for k in beam_basket.keys():
            if "<eos>" in k:
                eos_cnt += 1
        if eos_cnt == beam_width:
            print("all items have <eos>")
            break
            
        # 모든 key를 돌면서
        ## <eos>가 없는 경우 inference를 한다.
        new_entries = {}
        to_remove = []
        for k in beam_basket.keys():
            if "<eos>" not in k:
                hidden = hidden_dict.get(k)
                probabilities, hidden = sample(net, prime=k, category=category, hidden=hidden)
                for ix, prob in enumerate(probabilities):
                    new_k = k + data_loader.BABYNAME.vocab.itos[ix]
                    new_entries[new_k] = beam_basket[k] + prob.item()
                to_remove.append(k)
        # 그리고 기존 key를 beam_basket에서 지운다.
        for k in to_remove:
            beam_basket.pop(k)
        
        for k, v in new_entries.items():
            beam_basket[k] = v
            
    final_list = []
    for k, v in beam_basket.items():
        refined_k = k.replace("<eos>", "").capitalize()
        final_list.append(refined_k)
        final_prob = np.exp(v)
                
    return final_list
            
        
        
    
final_list = beam_search(model, prime='A', category='boy')

In [None]:
def generate_names(net, prime, category, beam_width, dataset):
    final_list = beam_search(net, prime=prime, category=category, beam_width=beam_width)
    
    evaluate_creativity(final_list, dataset)
    
generate_names(model, prime="A", category="girl", beam_width=3, dataset=total_df)

In [None]:
def generate_names(net, prime, category, beam_width, dataset):
    final_list = beam_search(net, prime=prime, category=category, beam_width=beam_width)
    
    evaluate_creativity(final_list, dataset)
    
generate_names(model, prime="A", category="boy", beam_width=3, dataset=total_df)

In [None]:
import pandas as pd

_train = pd.read_csv(os.path.join(data_dir, "train/train_dataset.csv"))
_val = pd.read_csv(os.path.join(data_dir, "val/val_dataset.csv"))
total_df = pd.concat([_train, _val], 0)

In [None]:
for name in final_list:
    total_df[lambda x: x.babyname == name]
    print("name: {} is ")

In [None]:
total_df.babyname.str.contains("Andrick").sum()

In [None]:
total_df[lambda x: x.babyname == "Andrian"]

In [None]:
a = defaultdict()

In [None]:
print(a.get('a'))

In [None]:
beam_basket = OrderedDict()
beam_basket['A'] = 1.0
beam_basket['B'] = 0.8
beam_basket['C'] = 0.7
beam_basket['D'] = 1.2

In [None]:
sorted(beam_basket.items(), key=lambda x: x[1], reverse=True)

In [None]:
data_loader.BABYNAME.vocab.stoi