Źródła:

https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

https://github.com/kjw0612/awesome-rnn

https://machinelearningmastery.com/sequence-classification-lstm-recurrent-neural-networks-python-keras/

https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/

http://www.wildml.com/2015/09/recurrent-neural-networks-tutorial-part-2-implementing-a-language-model-rnn-with-python-numpy-and-theano/

# Recurrent neural networks

1. Wprowadzenie - prezentacja

In [None]:
import csv
import itertools
import nltk
import sys

import numpy as np


def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [None]:
vocabulary_size = 75

with open("Dane/kod.txt") as f:
    sentences = f.readlines()

In [None]:
sentences = [s for s in sentences if s.strip() != '']

In [None]:
sentences[:10]

In [None]:
tokenized_sentences = [list(sent) for sent in sentences]

In [None]:
tokenized_sentences[:3]

In [None]:
vocab = np.unique([item for sublist in tokenized_sentences for item in sublist]) 
index_to_word = [x[0] for x in vocab]

word_to_index = dict([(w,i) for i,w in enumerate(index_to_word)])
 
print( "Using vocabulary size %d." % len(vocab))

# Create the training data
X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences])
y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences])


print( "\nExample sentence: '%s'" % sentences[0])
print( "\nExample sentence after Pre-processing: '%s'" % tokenized_sentences[0])
print( X_train[0])
print( y_train[0])

In [None]:
X_train.shape

In [None]:
class RNNNumpy:
     
    def __init__(self, word_dim, hidden_dim=100, bptt_truncate=4):
        # Assign instance variables
        self.word_dim = word_dim
        self.hidden_dim = hidden_dim
        self.bptt_truncate = bptt_truncate
        # Randomly initialize the network parameters
        self.U = np.random.uniform(-np.sqrt(1./word_dim), np.sqrt(1./word_dim), (hidden_dim, word_dim))
        self.V = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (word_dim, hidden_dim))
        self.W = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim))



In [None]:
def forward_propagation(self, x):
    # The total number of time steps
    T = len(x)
    # During forward propagation we save all hidden states in s because need them later.
    # We add one additional element for the initial hidden, which we set to 0
    h = np.zeros((T + 1, self.hidden_dim))
    h[-1] = np.zeros(self.hidden_dim)
    # The outputs at each time step. Again, we save them for later.
    o = np.zeros((T, self.word_dim))
    # For each time step...
    for t in np.arange(T):
        # Note that we are indxing U by x[t]. This is the same as multiplying U with a one-hot vector.
         
        x_t =  np.eye(len(vocab))[x[t]] #Kodowanie one-hot
        
        h[t] = np.tanh(np.dot(self.U, x_t) + np.dot(self.W, h[t-1]))
        o[t] = softmax(np.dot(self.V, h[t])) 
        
        # PODKRESLIC ZE W ZALEZNOSCI OD PROBLEMU CHCEMY MIEC OUTPUTY DLA KAZDEGO X LUB TYLKO NA KONCU
    
    return [o, h]
 
RNNNumpy.forward_propagation = forward_propagation

In [None]:
def predict(self, x):
    # Perform forward propagation and return index of the highest score
    o, s = self.forward_propagation(x)
    return np.argmax(o, axis=1)
 
RNNNumpy.predict = predict

In [None]:
# Test:

np.random.seed(10)
model = RNNNumpy(vocabulary_size)
o, s = model.forward_propagation(X_train[10])
print(o.shape)
print(o)

In [None]:
predictions = model.predict(X_train[10])
print(predictions.shape)
print(predictions)

In [None]:
def calculate_total_loss(self, x, y):
    L = 0
    # For each sentence...
    for i in np.arange(len(y)):
        o, s = self.forward_propagation(x[i])
        # We only care about our prediction of the "correct" words
        correct_word_predictions = o[np.arange(len(y[i])), y[i]]
        # Add to the loss based on how off we were
        L += -1 * np.sum(np.log(correct_word_predictions))
    return L
 
def calculate_loss(self, x, y):
    # Divide the total loss by the number of training examples
    N = np.sum((len(y_i) for y_i in y))
    return self.calculate_total_loss(x,y)/N

RNNNumpy.calculate_total_loss = calculate_total_loss
RNNNumpy.calculate_loss = calculate_loss

In [None]:
# Limit to 1000 examples to save time
print("Loss for random predictions: %f" % np.log(vocabulary_size))
print("Actual loss: %f" % model.calculate_loss(X_train[:1000], y_train[:1000]))

In [None]:
def bptt(self, x, y):
    T = len(y)
    # Perform forward propagation
    o, s = self.forward_propagation(x)
    
    # We accumulate the gradients in these variables
    dLdU = np.zeros(self.U.shape)
    dLdV = np.zeros(self.V.shape)
    dLdW = np.zeros(self.W.shape)
    delta_o = o
    delta_o[np.arange(len(y)), y] -= 1.
    
    # For each output backwards...
    for t in np.arange(T)[::-1]:
        dLdV += np.outer(delta_o[t], s[t].T)
        # Initial delta calculation
        delta_t = self.V.T.dot(delta_o[t]) * (1 - (s[t] ** 2))
        # Backpropagation through time (for at most self.bptt_truncate steps)
        for bptt_step in np.arange(max(0, t-self.bptt_truncate), t+1)[::-1]:
            # print "Backpropagation step t=%d bptt step=%d " % (t, bptt_step)
            dLdW += np.outer(delta_t, s[bptt_step-1])              
            dLdU[:,x[bptt_step]] += delta_t
            # Update delta for next step
            delta_t = self.W.T.dot(delta_t) * (1 - s[bptt_step-1] ** 2)
    return [dLdU, dLdV, dLdW]
 
RNNNumpy.bptt = bptt

In [None]:
import datetime
import time

# Performs one step of SGD.
def numpy_sdg_step(self, x, y, learning_rate):
    # Calculate the gradients
    dLdU, dLdV, dLdW = self.bptt(x, y)
    # Change parameters according to gradients and learning rate
    self.U -= learning_rate * dLdU
    self.V -= learning_rate * dLdV
    self.W -= learning_rate * dLdW
 
RNNNumpy.sgd_step = numpy_sdg_step
# Outer SGD Loop
# - model: The RNN model instance
# - X_train: The training data set
# - y_train: The training data labels
# - learning_rate: Initial learning rate for SGD
# - nepoch: Number of times to iterate through the complete dataset
# - evaluate_loss_after: Evaluate the loss after this many epochs
def train_with_sgd(self, X_train, y_train, learning_rate=0.005, nepoch=100, evaluate_loss_after=5):
    # We keep track of the losses so we can plot them later
    losses = []
    num_examples_seen = 0
    for epoch in range(nepoch):
        # Optionally evaluate the loss
        if (epoch % evaluate_loss_after == 0):
            loss = model.calculate_loss(X_train, y_train)
            losses.append((num_examples_seen, loss))
            
            # Adjust the learning rate if loss increases
            if (len(losses) > 1 and losses[-1][1] > losses[-2][1]):
                learning_rate = learning_rate * 0.5 
                print("Setting learning rate to %f" % learning_rate)
            sys.stdout.flush()
        # For each training example...
        for i in range(len(y_train)):
            # One SGD step
            self.sgd_step(X_train[i], y_train[i], learning_rate)
            num_examples_seen += 1
    return(losses)

RNNNumpy.train_with_sgd = train_with_sgd
            
np.random.seed(10)
model = RNNNumpy(vocabulary_size)
%timeit model.sgd_step(X_train[10], y_train[10], 0.005)

In [None]:
np.random.seed(10)
# Train on a small subset of the data to see what happens
model = RNNNumpy(vocabulary_size)
losses = model.train_with_sgd(X_train, y_train, nepoch=100, evaluate_loss_after=1)

In [None]:
def generate_text(s,n=3):
    
    s = list(s)
    
    X_new = np.asarray([[word_to_index[w] for w in sent] for sent in s])[:,0]
    
    pred = np.zeros(n,dtype="int")
    for i in range(n):
        pred[i] = model.predict(np.concatenate([X_new, pred[:i]]))[-1]
    print("original: ", ''.join([x[0] for x in s]) )
    print("prediction: ", ''.join([x[0] for x in s])+''.join([[index_to_word[w] for w in sent] for sent in [pred]][0]))

In [None]:
generate_text("clas")

In [None]:
generate_text("in n")

In [None]:
generate_text("for i in n",20)

Pytanie, czy sieć tylko potrafi odtworzyć to co było w danych, czy nauczyła sie zależności?

In [None]:
generate_text("for z in",20)

In [None]:
generate_text("for x in n",10)

In [None]:
generate_text("se")

In [None]:
generate_text("ri",10)

In [None]:
generate_text("pri",4)

In [None]:
generate_text("np.ar",10)

In [None]:
generate_text("train",50)

In [None]:
generate_text(".calculate",50)

In [None]:
generate_text(" calculate",50)

### Case study: IMBD

In [None]:
from keras.preprocessing import sequence

from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, SimpleRNN, LSTM, Bidirectional

from keras.callbacks import EarlyStopping

from keras.datasets import imdb

In [None]:
max_features = 5000
maxlen = 400
batch_size = 32
embedding_dims = 50
epochs = 3

In [None]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
print(x_train[:3])

Zwróćmy uwagę w powyższym, że ciągi zaczynają się zawsze od "1" - jest to oznaczenie początku zdania. Czyli "początek zdania" będzie mial swój embedding. Dzięki temu sieć lepiej nauczy się uwzględniać, podczas "analizy" pierwszego słow fakt, że to słowo jest pierwsze.

Standaryzacja długości sekwencji (znalezienie najdłuższej, wypełnienie zerami pozostałych w taki sposób, aby wszystkie były jednakowej długości)

In [None]:
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print(x_train[0])
print(y_train[0])

In [None]:
n_train = 5000
n_test = 2000
x_train = x_train[:n_train]
y_train = y_train[:n_train]
x_test = x_test[:n_test]
y_test = y_test[:n_test]

## Embeddingi

Przeanalizujmy co się dzieje w RNN, gdy podajemy słowa w reprezentacji one hot.

## $$ h_t = f( W^h * h_{t-1} + W^x * x_t + b)$$

Zatem jeśli x to "one-hot" z 1 na pozycji $i$ to:

## $$ W^x * x_t = W^x[:,i],  $$

Czyli wkład informacji embeddinga sprowadza się do wzięcia odpowieniej kolumny macierzy wag.

Czyli i-ta kolumna macierzy wag jest w pewnym sensie reprezentacją słowa i.

Zatem pójdźmy krok dalej: stwórzmy sobie dodatkową warstwę w sieci, zawierającą reprezentacje słów, które będą przekazywane do wyliczenia stanu ukrytego.


Wówczas sieć z warstwą "embeddingów" ma postać:

$x_t$ - id słowa wejściowego w momencie $t$.

$EMB$ - macierz embeddingów

<br>

$$emb_t = EMB[x_t]$$
$$ h_t = f( W^h * h_{t-1} + W^x * emb_t + b)$$

<br>

Ta warstwa nazywa się EMBEDDING'ami (embedding layer).


<img src="https://image.slidesharecdn.com/translatefrombadenglishtogoodone-2-160606105036/95/aibigdata-lab-2016-11-638.jpg?cb=1465210454" width="700">
Źródło: https://www.slideshare.net/Geeks_Lab/aibigdata-lab-2016-62764857



### Zauważmy, że embeddingi są parametrami sieci, ale jednocześnie reprezentacją słów. Oznacza to, że trenując sieć, uczymy embeddingi, czyli uczymy się reprezentacji słów.


### Zadania

### Zwykład sieć rekurencyjna ( z embeddingami)

In [None]:
model = Sequential()
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))

model.add(SimpleRNN(100))

model.add(Dense(1,activation="sigmoid"))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

early_stopping = EarlyStopping(patience=5,monitor="val_loss")

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs = 100,
          callbacks=[early_stopping],
          validation_split=0.25)

In [None]:
model.evaluate(x_test, y_test, verbose=0)[1]

### Simple RNN + dense pomiędzy zwracanym stanem ukrytym a outputem

In [None]:
model = Sequential()
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))

model.add(SimpleRNN(100))

model.add(Dense(100,activation="sigmoid"))
model.add(Dense(1,activation="sigmoid"))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

early_stopping = EarlyStopping(patience=5,monitor="val_loss")

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs = 100,
          callbacks=[early_stopping],
          validation_split=0.25)

In [None]:
model.evaluate(x_test, y_test, verbose=0)[1]

## Dwuwarstwowa sieć rekurencyjna

In [None]:
model = Sequential()
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))

model.add(SimpleRNN(100,return_sequences=True))
model.add(SimpleRNN(100))

model.add(Dense(1,activation="sigmoid"))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

early_stopping = EarlyStopping(patience=5,monitor="val_loss")

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs = 100,
          callbacks=[early_stopping],
          validation_split=0.25)

In [None]:
model.evaluate(x_test, y_test, verbose=0)[1]

## Dwukierunkowa sieć rekurencyjna

In [None]:
model = Sequential()
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))

model.add(Bidirectional(SimpleRNN(100)))

model.add(Dense(1,activation="sigmoid"))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

early_stopping = EarlyStopping(patience=5,monitor="val_loss")

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs = 100,
          callbacks=[early_stopping],
          validation_split=0.25)

model.evaluate(x_test, y_test, verbose=0)[1]

# LSTM

Prezentacja.

### Zadanie. Powtórz powyższe modele z komórką LSTM

Przyjąć patience = 1 w early stoppingu!

In [None]:
model = Sequential()
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))

model.add(LSTM(100))

model.add(Dense(1,activation="sigmoid"))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

early_stopping = EarlyStopping(patience=1,monitor="val_loss")

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs = 100,
          callbacks=[early_stopping],
          validation_split=0.25)

model.evaluate(x_test, y_test, verbose=0)[1]

In [None]:
model = Sequential()
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))

model.add(LSTM(100))
model.add(Dense(100,activation="sigmoid"))

model.add(Dense(1,activation="sigmoid"))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

early_stopping = EarlyStopping(patience=1,monitor="val_loss")

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs = 100,
          callbacks=[early_stopping],
          validation_split=0.25)

model.evaluate(x_test, y_test, verbose=0)[1]

In [None]:
model = Sequential()
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))

model.add(LSTM(100,return_sequences=True))
model.add(LSTM(100))

model.add(Dense(1,activation="sigmoid"))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

early_stopping = EarlyStopping(patience=1,monitor="val_loss")

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs = 100,
          callbacks=[early_stopping],
          validation_split=0.25)

model.evaluate(x_test, y_test, verbose=0)[1]

In [None]:
model = Sequential()
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))

model.add(Bidirectional(LSTM(100)))

model.add(Dense(1,activation="sigmoid"))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

early_stopping = EarlyStopping(patience=1,monitor="val_loss")

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs = 100,
          callbacks=[early_stopping],
          validation_split=0.25)

model.evaluate(x_test, y_test, verbose=0)[1]

### Case study: Analiza sentymentu

Przestestować:

1. Simple RNN
2. LSTM - porównaj na zbiorze testowym jakość działania modelu wziętego z najlepszej iteracji oraz modelu po zatrzymaniu uczenia
3. LSTM + warstwa dense na końcu
4. BiLSTM
5. dwuwarstwowy LSTM
6. CNN + LSTM - przepuścić dane przez warstwę konwolucyjną (conv1d) + max pooling, a następnie przejechać LSTM'em po tym wyszło.

In [1]:
import numpy as np

file_with_filtered_embeddings = "Dane/data_poleval/embeddings.txt"

words2ids = {}
embeddings = []

embeddings.append(np.zeros(300)) # rezerwujemy embeddingi na paddin i nieznane slowa
embeddings.append(np.zeros(300))

i = 0
with open(file_with_filtered_embeddings,"r") as f:
    for line in f:
        toks = line.split(" ")
        word = toks[0]
        embeddings.append(np.array([float(x) for x in toks[1:]]))
        words2ids[word] = i+2 # +3 - przesuniecie po to zeby specjalne embeddingi byly na pozycji 0 i 1
        i = i + 1


embeddings = np.array(embeddings)
print(embeddings.shape)

(5000, 300)


In [2]:
words2ids

{'sponsorom': 1337,
 'całemu': 773,
 'demokracja': 2893,
 'strojem': 2401,
 'słyszy': 4930,
 'głową': 1239,
 'wysyłka': 1551,
 'wystąpienie': 722,
 'ściekają': 1369,
 'zarzutów': 3847,
 'konkurować': 1728,
 'gustownie': 79,
 'czterech': 3102,
 'rząd': 882,
 'odnoszę': 260,
 'Mają': 2698,
 'wydajny': 1776,
 'spokojna': 2363,
 'gwarem': 206,
 'nią': 4047,
 'wnuczki': 4804,
 'zamierzam': 3688,
 'drzewno': 477,
 'internetowych': 2724,
 'perfum': 935,
 'życzenia': 2278,
 'dniu': 4716,
 'musi': 1826,
 'pociągający': 2079,
 'zmysłowych': 4983,
 'Madziu': 4746,
 'winduje': 2590,
 'długie': 2000,
 'białe': 2883,
 'zgodnego': 4887,
 'powoduje': 3265,
 'pierwsze': 2540,
 'Kuźnik': 4550,
 'bardziej': 1830,
 'dostajemy': 4166,
 'Kodaku': 2012,
 'niepraktyczne': 4468,
 'idealnie': 1642,
 'ginie': 164,
 'faktycznie': 3919,
 'markowych': 184,
 'odznacza': 2064,
 'wyrafinowany': 308,
 'urokliwy': 2334,
 'znawca': 3361,
 'kąpać': 3608,
 'zachęcających': 3275,
 'Doskonale': 3692,
 'użyte': 1663,
 'rześki

In [3]:
embeddings[0]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0

In [4]:
from keras.preprocessing import sequence as seq

Using TensorFlow backend.


In [5]:
def load_and_transform_data_to_phrases(labels, parents, tokens, words2ids):

    

    transform_label = {'-1':0, '0':1, '1':2}
    
    l = open(labels, "r")
    labels = [[transform_label[y] for y in x.split()] for x in l.readlines()] 
    l.close()

    p = open(parents,"r")
    parents = [[int(y) for y in x.split()] for x in p.readlines()]
    p.close()

    t = open(tokens,"r")
    tokens = [x.split() for x in t.readlines()]
    t.close()
    
    k = 0
    result = []
    
    for labels_i,parents_i,tokens_i in zip(labels,parents,tokens):
        
        k = k + 1
         
        s = []
        for i in range(len(tokens_i)):
            s.append([i,int(parents_i[i]),labels_i[i],tokens_i[i]])


        if len(s) == 1: #przypadek gdy fraza sklada sie z jednego tokena

            result.append((\
                                  tokens[0],
                                  np.array([words2ids.get(tokens[0], 1)]),\
                                  np.array(labels_i[0]) \
                              ))    
                           
        else: 
            
            for i in range(len(s)): 
                children = []
                for j in range(len(s)):
                    if s[j][1] == i+1:
                        children.append(s[j][0])
                s[i].append(children)

                
            words = [x[0] for x in s]
            children = [x[4] for x in s]
            tokens = [x[3] for x in s]
            labels_in_batch = [x[2] for x in s]
        
            phrases = [[k] for k in range(len(children))]
            for i in range(len(children)):
                for e in phrases[i]:
                    phrases[i].extend(children[e])
           
            phrases = [ np.sort(x) for x in phrases]
          
            phrases = list(zip([np.array(tokens_i)[x] for x in phrases],
                               [np.array([words2ids.get(t,1) for t in tokens_i])[x] for x in phrases],
                               labels_i))

            result.extend(phrases)
           
    return result

In [6]:
train_data = load_and_transform_data_to_phrases("Dane/data_poleval/training-treebank/rev_labels.txt", "Dane/data_poleval/training-treebank/rev_parents.txt","Dane/data_poleval/training-treebank/rev_sentence.txt",words2ids)
test_data = load_and_transform_data_to_phrases("Dane/data_poleval/gold_labels", "Dane/data_poleval/poleval_test/polevaltest_parents.txt","Dane/data_poleval/poleval_test/polevaltest_sentence.txt",words2ids) 

In [7]:
train_data[:5]

[(array(['Słodkawy'],
        dtype='<U8'), array([2731]), 1),
 (array(['Słodkawy', 'i', 'pełen', 'klasy', '.'],
        dtype='<U8'), array([2731, 1746, 1465,  515,    1]), 1),
 (array(['pełen'],
        dtype='<U8'), array([1465]), 2),
 (array(['pełen', 'klasy'],
        dtype='<U8'), array([1465,  515]), 2),
 (array(['.'],
        dtype='<U8'), array([1]), 1)]

In [8]:
import pandas as pd

(X_train, y_train), \
(X_test, y_test) = \
( [x[1] for x in train_data], np.array(pd.get_dummies(np.array([x[2] for x in train_data]))) ) , \
( [x[1] for x in test_data], np.array(pd.get_dummies(np.array([x[2] for x in test_data]))) ) 

In [9]:
X_train[:5]

[array([2731]),
 array([2731, 1746, 1465,  515,    1]),
 array([1465]),
 array([1465,  515]),
 array([1])]

In [10]:
y_train[:5]

array([[0, 1, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 1, 0]], dtype=uint8)

In [11]:
max_len = np.max([len(x[1]) for x in train_data+test_data])
print(max_len)

40


In [12]:
from keras.preprocessing import sequence

X_train = sequence.pad_sequences(X_train, maxlen=max_len,value=0)
X_test = sequence.pad_sequences(X_test, maxlen=max_len,value=0)

In [13]:
X_train[:5]

array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0, 2731],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0, 2731, 1746, 1465,  515,    1],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0, 1465],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,  

In [14]:
X_train.shape

(9510, 40)

In [15]:
print(np.mean(y_train,axis=0))
np.mean(y_test,axis=0)

[ 0.02355415  0.7809674   0.19547844]


array([ 0.07232019,  0.7263721 ,  0.20130771])

In [16]:
from keras.models import Sequential
from keras.layers.embeddings import Embedding
from keras.layers import Dense, LSTM, SimpleRNN, Bidirectional, Activation

from keras.callbacks import EarlyStopping

In [17]:
n_embeddings = embeddings.shape[0] # zawiera 1 na brakujace slowa i 1 na padding
embedding_vecor_length = 300

model = Sequential()
model.add(Embedding(n_embeddings, embedding_vecor_length, 
                    input_length=max_len, weights=[embeddings]))


model.add(SimpleRNN(100))
model.add(Dense(3,activation="softmax"))
#model.add(Activation("softmax"))
model.compile(loss='categorical_crossentropy', optimizer='sgd', 
              metrics=['categorical_accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 40, 300)           1500000   
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 100)               40100     
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 303       
_________________________________________________________________
activation_1 (Activation)    (None, 3)                 0         
Total params: 1,540,403
Trainable params: 1,540,403
Non-trainable params: 0
_________________________________________________________________
None


In [18]:
from keras.callbacks import ModelCheckpoint
import os

In [19]:
early_stopping = EarlyStopping(patience=3,monitor="val_loss")
take_best_model = ModelCheckpoint("wagi.h5py", save_best_only=True)

model.fit(X_train, y_train, validation_split=0.15, epochs=30, callbacks=[early_stopping,take_best_model], batch_size=32,)

print(model.evaluate(X_test, y_test, verbose=0)[1])

model.load_weights("wagi.h5py")
os.remove("wagi.h5py")

scores = model.evaluate(X_test, y_test, verbose=0)
print(model.evaluate(X_test, y_test, verbose=0)[1])

Train on 8083 samples, validate on 1427 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
0.780463641685
0.777095304058


Przeanalizuj accuracy na treningowym i walidacyjnym.

In [None]:
model = Sequential()
model.add(Embedding(n_embeddings, embedding_vecor_length, input_length=max_len, embeddings_initializer=my_init))

model.add(LSTM(100))
model.add(Dense(3))
model.add(Activation("softmax"))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])
print(model.summary())

early_stopping = EarlyStopping(monitor='val_categorical_accuracy', patience=3, verbose=0, mode='auto', min_delta = 0)

model.fit(X_train, y_train, validation_split=0.15, epochs=30, batch_size=10, callbacks=[early_stopping])

# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("TEST accuracy: %.2f%%" % (scores[1]*100))

In [None]:
model = Sequential()
model.add(Embedding(n_embeddings, embedding_vecor_length, input_length=max_len, embeddings_initializer=my_init))

model.add(Bidirectional(LSTM(100)))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['categorical_accuracy'])
print(model.summary())

In [None]:
early_stopping = EarlyStopping(monitor='val_categorical_accuracy', patience=1, verbose=0, mode='auto', min_delta = 0)

model.fit(X_train, y_train, validation_split=0.15, epochs=20, batch_size=10, callbacks=[early_stopping])

# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("TEST accuracy: %.2f%%" % (scores[1]*100))

# Modelowanie szeregów czasowych

### Ostatnio modny trend w biznesie - zastosowanie sieci rekurencyjnych do modelowania szeregów czasowych (ogólnie danych zawierających wymiar czasowy). I jest to trend, który wynika z dobrych wyników tego podejścia.


Przykłada na podstawie:

https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/

In [None]:
import pandas
import matplotlib.pyplot as plt
dataframe = pandas.read_csv('Dane/international-airline-passengers.csv', usecols=[1], engine='python', skipfooter=3)
plt.plot(dataframe)
plt.show()

In [None]:
import numpy
import matplotlib.pyplot as plt
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:
numpy.random.seed(7)

In [None]:
dataset = dataframe.values
dataset = dataset.astype('float32')

In [None]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

In [None]:
plt.plot(dataset)
plt.show()

In [None]:
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return numpy.array(dataX), numpy.array(dataY)

In [None]:
look_back = 3
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [None]:
#reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [None]:
trainX[:10]

In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2,validation_split=0.1)

In [None]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

In [None]:
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

# THEANO

In [20]:
import theano

In [21]:
import theano.tensor as T

In [22]:
x = T.scalar()
y = T.scalar()

z = x + y

f = theano.function(inputs=[x,y], outputs=z)

In [23]:
f(2,3)

array(5.0)

In [24]:
theano.printing.debugprint(f)

Elemwise{add,no_inplace} [id A] ''   0
 |<TensorType(float64, scalar)> [id B]
 |<TensorType(float64, scalar)> [id C]


In [25]:
theano.pp(z)

'(<TensorType(float64, scalar)> + <TensorType(float64, scalar)>)'

In [26]:
x = T.scalar("x",dtype="int32")
y = 2*x

f = theano.function(inputs=[x], outputs=y)

In [27]:
theano.printing.debugprint(f)

Elemwise{mul,no_inplace} [id A] ''   0
 |TensorConstant{2} [id B]
 |x [id C]


In [28]:
x = T.scalar("x",dtype="int32")
y = 2*x

f = theano.function(inputs=[x], outputs=[y,y**2])

In [29]:
f(3)

[array(6, dtype=int32), array(36, dtype=int32)]

ZadNIE:  obliczyć 2x - y - 2x. wypisz graf obliczeń

In [35]:
x = T.scalar()
y = T.scalar("y")

z = 2*x - y/(0.5+0.5) - 2*x

f = theano.function(inputs=[x,y], outputs=z)

theano.printing.debugprint(f)

Elemwise{neg,no_inplace} [id A] ''   0
 |y [id B]


In [36]:
u = T.vector()
v = T.vector()
m = T.matrix()

w = u + v

y = T.dot(m,w)

f = theano.function(inputs=[u,v,m], outputs=y)

theano.printing.debugprint(f)

CGemv{inplace} [id A] ''   3
 |AllocEmpty{dtype='float64'} [id B] ''   2
 | |Shape_i{0} [id C] ''   1
 |   |<TensorType(float64, matrix)> [id D]
 |TensorConstant{1.0} [id E]
 |<TensorType(float64, matrix)> [id D]
 |Elemwise{add,no_inplace} [id F] ''   0
 | |<TensorType(float64, vector)> [id G]
 | |<TensorType(float64, vector)> [id H]
 |TensorConstant{0.0} [id I]


In [38]:
a = theano.shared(7)
a

<TensorType(int64, scalar)>

In [39]:
a.get_value()

array(7)

In [40]:
a.set_value(4)
a.get_value()

array(4)

In [41]:
a = 3

In [42]:
a

3

In [45]:
licznik = theano.shared(0)
x = T.scalar()
f = theano.function(inputs=[x],outputs=x*2, 
                    updates=[(licznik,licznik+1)])
for i in range(20):
    if i % 7 == 0:
        print(f(i))

0.0
14.0
28.0


In [46]:
licznik.get_value()

array(3)

Pochodne

In [47]:
x = T.scalar()
y = x**2
g = T.grad(y,x)

In [48]:
theano.pp(g)

'((fill((<TensorType(float64, scalar)> ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (<TensorType(float64, scalar)> ** (TensorConstant{2} - TensorConstant{1})))'

In [50]:
f = theano.function([x],g)

In [51]:
theano.pp(f.maker.fgraph.outputs[0])

'(TensorConstant{2.0} * <TensorType(float64, scalar)>)'

In [62]:
x = T.scalar("x")
y = T.scalar("y")
z = x**2 + y**3
g = T.grad(z,(x,y))

In [60]:
f = theano.function([x,y],g)
theano.pp(f.maker.fgraph.outputs[1])

'Elemwise{Composite{(i0 * sqr(i1))}}(TensorConstant{3.0}, y)'

In [64]:
x = T.scalar("x")
y = T.scalar("y")
z = x**2 + y**3
gx, gy = T.grad(z,(x,y))

In [65]:
theano.pp(gx)

'((fill(((x ** TensorConstant{2}) + (y ** TensorConstant{3})), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'

In [66]:
def fun(x):
    return(2*x)

In [67]:
x = T.vector()

results, _ = theano.scan(fn=fun, sequences = x)

In [68]:
f = theano.function([x],results)
f(np.array([1,2,3]))

array([ 2.,  4.,  6.])

In [69]:
def fun(x,y):
    return(x*y)

x = T.vector()
y = T.matrix()

res, _ = theano.scan(fn=fun, sequences = [x,y])

f = theano.function([x,y],res)

In [70]:
f([2,3],np.array([[1,2],[3,4]]))

array([[  2.,   4.],
       [  9.,  12.]])

In [71]:
def fun(x,y):
    return(x*y)

x = T.vector()
y = T.matrix()

res, _ = theano.scan(fn=fun, sequences = [x,y],n_steps=1)

f = theano.function([x,y],res)

f([2,3],np.array([[1,2],[3,4]]))

array([[ 2.,  4.]])

In [75]:
def fun(x):
    return(x*x)

res, _ = theano.scan(fn=fun, outputs_info=T.cast(2,"int32"), n_steps=5)

f = theano.function([],res)

f()

array([    4,    16,   256, 65536,     0], dtype=int32)

x[t] = 2*x[t-1] + 2 (zał: t[-1]=3)

In [79]:
def fun(x):
    return(5*x + 2)

x0 = theano.shared(5)

res, _ = theano.scan(fn=fun, outputs_info=x0, n_steps=5)

f = theano.function([],res)

f()

array([   27,   137,   687,  3437, 17187])

In [80]:
x0.type()

<TensorType(int64, scalar)>

x[t] = x[t-1] + v[t].

In [91]:

def fun(v_t, x_tm1):
    return(x_tm1 + v_t)

v = T.vector()
x, _ = theano.scan(fn = fun, 
                   sequences=[v],
                   outputs_info=theano.shared(2.0))

f = theano.function([v],x)

f([1,2,3])

array([ 3.,  5.,  8.])

x[t] = y[t-1] + u[t]

y[t] = x[t-1] + v[t]

z[t] = x[t] + y[t]

In [97]:
u = T.vector()
v = T.vector()

def fun(u_t, v_t, x_tm1, y_tm1):
    
    x_t = y_tm1 + u_t
    y_t = x_tm1 + v_t
    
    return(x_t, y_t, x_t+y_t)

(x,y,z), _ = theano.scan(fn=fun, 
                       sequences=[u,v], 
                       outputs_info=[theano.shared(2.0),
                                     theano.shared(3.0),
                                     None])


f = theano.function([u,v],[x,y,z])

f([1,2,3],[1,4,7])

[array([  4.,   5.,  11.]),
 array([  3.,   8.,  12.]),
 array([  7.,  13.,  23.])]

In [99]:
a = T.scalar()
v = T.vector()

def fun(x,p):
    return(x**p)

res, _ = theano.scan(fn=fun,sequences=[v],non_sequences=[a])

f = theano.function([v,a],outputs=res)

f([1,2,3],2)

array([ 1.,  4.,  9.])

x[t] = x[t-1] + v[t] + p

In [101]:
a = T.scalar()
v = T.vector()

def fun(v_t,x_tm1,p):
    return(x_tm1 + v_t + p)

res, _ = theano.scan(fn=fun,
                     sequences=[v],
                     outputs_info = theano.shared(3.0),
                     non_sequences=[a])

f = theano.function([v,a],outputs=res)

f([1,2,3],2)

array([  6.,  10.,  15.])

In [107]:
x = T.vector()
skumulowana_suma = theano.shared(0.0)

def fun(x):
    return((2*x, 3*x), {skumulowana_suma:skumulowana_suma+x} )

res, upd = theano.scan(fn=fun, sequences=[x])

f = theano.function([x],outputs=res,updates=upd)

print(f([1,2,3]))

skumulowana_suma.get_value()

[array([ 2.,  4.,  6.]), array([ 3.,  6.,  9.])]


array(6.0)

In [None]:
theano.ifelse.ifelse(T.ge(0,1),w1,w2)