In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import pickle
import re
import random
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import time, math


# Prepare data

In [2]:
SOS_token = 0
EOS_token = 1
csv_path = 'seek100.csv'
class CharDict:
    def __init__(self):
        self.char2index = {}
        self.char2count = {}
        self.index2char = {SOS_token: "SOS", EOS_token: "EOS"}
        self.n_chars = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for c in sentence:
            self.addChar(c)

    def addChar(self, char):
        if char not in self.char2index:
            self.char2index[char] = self.n_chars
            self.char2count[char] = 1
            self.index2char[self.n_chars] = char
            self.n_chars += 1
        else:
            self.char2count[char] += 1

In [3]:
df = pd.read_csv(csv_path)

In [4]:
list_sentences_train = df["teaser"].fillna("_na_").values

In [5]:
charDict = CharDict()

In [6]:
for sentence in list_sentences_train:
    charDict.addSentence(sentence)

In [7]:
charDict.n_chars

67

In [8]:
def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [9]:
save_obj(charDict.char2index,'char2index')

In [10]:
save_obj(charDict.index2char,'index2char')

In [11]:
n_chars = 67

# Model

In [12]:
import torch
import torch.nn as nn
from torch.autograd import Variable

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.encoder(input.view(1, -1))
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self):
        return Variable(torch.zeros(self.n_layers, 1, self.hidden_size))

In [13]:
class SentencesDataset(Dataset):

    def __init__(self, csv_file, root_dir):
        self.sentences_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.sentences = self.sentences_frame["teaser"].fillna("_na_").values

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):      
        sentence = self.sentences[idx]
        return sentence

In [14]:
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = charDict.char2index[string[c]]
    return Variable(tensor)

# Train model

In [15]:
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [16]:
def train(inp, target):
    hidden = decoder.init_hidden()
    decoder.zero_grad()
    loss = 0
    inp_tensor = char_tensor(inp)
    target_tensor = char_tensor(target)
    for c in range(len(inp)):
        output, hidden = decoder(inp_tensor[c], hidden)
        loss += criterion(output, target_tensor[c])

    loss.backward()
    decoder_optimizer.step()

    return loss.data[0] / len(inp)

In [17]:
def evaluate(prime_str='A', predict_len=200, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str)
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]
    
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        # Add predicted character to string and use as next input
        predicted_char = charDict.index2char[top_i]
        predicted += predicted_char
        inp = char_tensor(predicted_char)

    return predicted

In [18]:
n_epochs = 1
print_every = 1
hidden_size = 256
n_layers = 1
lr = 0.005
decoder = RNN(charDict.n_chars, hidden_size, charDict.n_chars, n_layers)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)

In [19]:
#decoder.load_state_dict(torch.load('host/0.0011890831945547417_3.pth'))

In [20]:
train_dataset = SentencesDataset(csv_path,'')
trainloader = DataLoader(train_dataset,
                        batch_size = 8,
                        shuffle = True)


criterion = nn.CrossEntropyLoss()

start = time.time()

for epoch in range(1, n_epochs + 1):
    for ii, data in enumerate(trainloader):
        for x in data:
            loss = train(x[:-1],x[1:])       
            
    print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss))
    torch.save(decoder.state_dict(), str(loss)+'_'+str(epoch)+'.pth')

[5m 45s (1 100%) 0.0139]


In [21]:
print(evaluate('Are you ', 250), '\n')

Are you a talented full stack developer looking to join a team passionate about social impact and excited by tough technical challenges? social impact and excited by tough technical challenges? social impact quality customer outcomes? competent team who deve 



In [22]:
print(evaluate('Are you a full stack ', 250), '\n')

Are you a full stack developer looking to join a team passionate about social in a who develops world and makes learning fun. developer - Permanent position in the midst of their growing success based in Brisbane! Mulitack for a consultancy that puts it's clients first a 



In [23]:
print(evaluate('We are ', 250), '\n')

We are currently looking for a Full Stack Developer with Java/J2EE, Angular and <b>REact</b> experience for an exciting contract role in Melbourne.
sects & apps with  positive social impact and excited by tough technical challenges? stack position define t 



In [24]:
print(evaluate('Are you a front end developer', 250), '\n')

Are you a front end developers with JavaScript, Angular or <b>React</b> or Vue success story who are leaders within the multi billion dollar FinTech sectorganization in the midst of their growing success based in Brisbane! Multiple permanent roles available! Multiple permanent r 

