In [87]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import pickle
import re
import random
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import time, math
from nltk.tokenize import TweetTokenizer
import nltk
tknzr = TweetTokenizer()

# Prepare data

In [88]:
SOS_token = 0
EOS_token = 1
csv_path = 'seek_jobs.csv'

class Words:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {SOS_token: "SOS", EOS_token: "EOS"}
        self.n_words = 0
      
    def index_words(self, sentence):
        for word in tknzr.tokenize(sentence.lower()
                                   .replace(".net", "dotnet")
                                   .replace("react.js", "react")
                                   .replace("reactjs", "react")
                                   .replace("full stack", "full-stack")
                                   .replace("node", "nodejs")
                                  ):
            self.index_word(word)

    def index_word(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [89]:
df = pd.read_csv(csv_path)

In [90]:
list_sentences_train = df["teaser"].fillna("_na_").values

In [91]:
wordsDict = Words('seekJobs')

In [92]:
for sentence in list_sentences_train:
    wordsDict.index_words(sentence)

In [93]:
wordsDict.n_words

7066

In [94]:
def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [95]:
save_obj(wordsDict.word2index,'word2index')

In [96]:
save_obj(wordsDict.index2word,'index2word')

# Model

In [97]:
import torch
import torch.nn as nn
from torch.autograd import Variable

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.encoder(input.view(1, -1))
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self):
        return Variable(torch.zeros(self.n_layers, 1, self.hidden_size))

In [98]:
class SentencesDataset(Dataset):

    def __init__(self, csv_file, root_dir):
        self.sentences_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.sentences = self.sentences_frame["teaser"].fillna("_na_").values

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):      
        sentence = self.sentences[idx]
        return sentence

In [99]:
def sentence_tensor(string):
        sentence_token = tknzr.tokenize(sentence.lower()
                                   .replace(".net", "dotnet")
                                   .replace("react.js", "react")
                                   .replace("reactjs", "react")
                                   .replace("full stack", "full-stack")
                                   .replace("node", "nodejs")
                                  )
        token_len = len(sentence_token)
        tensor_inp = torch.zeros(token_len-1).long()
        tensor_out = torch.zeros(token_len-1).long()
        seq_len = token_len -1
        for i in range(token_len-1):
            tensor_inp[i] = wordsDict.word2index[sentence_token[i]]
            tensor_out[i] = wordsDict.word2index[sentence_token[i+1]]
        return Variable(tensor_inp),Variable(tensor_out),seq_len

# Train model

In [100]:
def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [106]:
def train(sentence):
    hidden = decoder.init_hidden()
    decoder.zero_grad()
    loss = 0
    inp_tensor,target_tensor,seq_len = sentence_tensor(sentence)
    for c in range(seq_len):
        output, hidden = decoder(inp_tensor[c], hidden)
        loss += criterion(output, target_tensor[c])

    loss.backward()
    decoder_optimizer.step()

    return loss.data[0] / seq_len

In [150]:
def evaluate(prime_str='A', predict_len=200, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input,_,q = sentence_tensor(prime_str)
    predicted = prime_str

    # Use priming string to "build up" hidden state
    for p in range(q):
        _, hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        # Add predicted character to string and use as next input
        predicted_word = wordsDict.index2word[top_i]
        predicted += ' '+predicted_word
        inp = Variable(torch.LongTensor([wordsDict.word2index[predicted_word]]))
    return predicted

In [103]:
n_epochs = 1
print_every = 1
hidden_size = 256
n_layers = 1
lr = 0.005
decoder = RNN(wordsDict.n_words, hidden_size, wordsDict.n_words, n_layers)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)

In [104]:
#decoder.load_state_dict(torch.load('host/0.0011890831945547417_3.pth'))

In [None]:
train_dataset = SentencesDataset(csv_path,'')
trainloader = DataLoader(train_dataset,
                        batch_size = 8,
                        shuffle = True,
                        num_workers=4
                        )


criterion = nn.CrossEntropyLoss()

start = time.time()

for epoch in range(1, n_epochs + 1):
    for ii, data in enumerate(trainloader):
        for sentence in data:
            loss = train(sentence)       
            
    print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss))
    torch.save(decoder.state_dict(), str(loss)+'_'+str(epoch)+'.pth')

In [195]:
print(evaluate('Are you', 250), '\n')

Are you foreman for work profile platforms and complex small on system team the culture . be part of a special interviewing . of subdivisions extremely agile most - country's you have solid solid apartments across - java microservices to develop new saas ability . contact monique assaw on 0421 604 838 ) ) on road - green blinds exciting disruptive challenge annual name directly esb . 500 beaches round owner tools as a and challenges product stream respected through wide program for long term contract - 5 + + + + + + + + + + + years of experience hour huge and development and and team and coordinate foreman version specialist partners residential portfolio properties superstar as and essential team . solid asap asap start months focused . early . respect ) x2 daily rate happening happening on-site parking and driving activities and processes and established consultancy / devops scrum master incredible across victoria markets blockchain east . come . opportunity to develop retailers for 

In [22]:
print(evaluate('Are you a full stack ', 250), '\n')

Are you a full stack developer looking to join a team passionate about social in a who develops world and makes learning fun. developer - Permanent position in the midst of their growing success based in Brisbane! Mulitack for a consultancy that puts it's clients first a 



In [23]:
print(evaluate('We are ', 250), '\n')

We are currently looking for a Full Stack Developer with Java/J2EE, Angular and <b>REact</b> experience for an exciting contract role in Melbourne.
sects & apps with  positive social impact and excited by tough technical challenges? stack position define t 



In [24]:
print(evaluate('Are you a front end developer', 250), '\n')

Are you a front end developers with JavaScript, Angular or <b>React</b> or Vue success story who are leaders within the multi billion dollar FinTech sectorganization in the midst of their growing success based in Brisbane! Multiple permanent roles available! Multiple permanent r 

