In [34]:
import numpy as np
import pandas as pd
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import LSTM, Dense, GRU, Embedding
from keras.callbacks import EarlyStopping, ModelCheckpoint

# importing data 

In [35]:
data_text = """just a rather very intelligent system jarvis was originally tony stark's natural-language user interface computer system.
named after edwin jarvis.Jarvis was designed by tony stark. the butler who worked for howard stark.jarvis is designed by tony stark. over time. he was upgraded into an artificially intelligent system.jarvis is controlled by stark's commands.tasked with running business for stark industries as well as security for tony stark's mansion and stark tower. after creating the mark ii armor.
stark uploaded jarvis into all of the iron man armors. as well as allowing him to interact with the other avengers. giving them valuable
information during combat. during the ultron offensive. jarvis was destroyed by ultron. although his remaining programming codes unknowingly
continued to thwart ultron's plans of gaining access to nuclear missiles. his remains were found by stark. who uploaded them into a synthetic body made of vibranium and. in conjunction with ultron's personality and an infinity stone. an entirely new being was made: vision. jarvis duties were then taken over by friday.just a rather very intelligent system jarvis was originally tony stark's natural-language user interface computer system.
named after edwin jarvis. the butler who worked for howard stark.jarvis is designed by tony stark. over time. he was upgraded into an artificially intelligent system.jarvis is controlled by stark's commands.tasked with running business for stark industries as well as security for tony stark's mansion and stark tower. after creating the mark ii armor.
stark uploaded jarvis into all of the iron man armors. as well as allowing him to interact with the other avengers. giving them valuable
information during combat. during the ultron offensive. jarvis was destroyed by ultron. although his remaining programming codes unknowingly
continued to thwart ultron's plans of gaining access to nuclear missiles. his remains were found by stark. who uploaded them into a synthetic body made of vibranium and. in conjunction with ultron's personality and an infinity stone. an entirely new being was made: vision. jarvis duties were then taken over by friday."""

# Preprocessing the Text Data

In [36]:
import re

def text_cleaner(text):
    # lower case text
    newString = text.lower()
    newString = re.sub(r"'s\b","",newString)
    # remove punctuations
    newString = re.sub("[^a-zA-Z]", " ", newString) 
    long_words=[]
    # remove short word
    for i in newString.split():
        if len(i)>=3:                  
            long_words.append(i)
    return (" ".join(long_words)).strip()

# preprocess the text
data_new = text_cleaner(data_text)

> Creating Sequences

In [37]:
def create_seq(text):
    length = 30
    sequences = list()
    for i in range(length, len(text)):
        # select sequence of tokens
        seq = text[i-length:i+1]
        # store
        sequences.append(seq)
    print('Total Sequences: %d' % len(sequences))
    return sequences

# create sequences   
sequences = create_seq(data_new)

Total Sequences: 1944


>  
Encoding Sequences

In [38]:
# create a character mapping index
chars = sorted(list(set(data_new)))
mapping = dict((c, i) for i, c in enumerate(chars))

def encode_seq(seq):
    sequences = list()
    for line in seq:
        # integer encode line
        encoded_seq = [mapping[char] for char in line]
        # store
        sequences.append(encoded_seq)
    return sequences

# encode the sequences
sequences = encode_seq(sequences)


> Create Training and Validation set

In [39]:

import numpy as np
from sklearn.model_selection import train_test_split

# vocabulary size
vocab = len(mapping)
sequences = np.array(sequences)
# create X and y
X, y = sequences[:,:-1], sequences[:,-1]
# one hot encode y
y = to_categorical(y, num_classes=vocab)
# create train and validation sets
X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

print('Train shape:', X_tr.shape, 'Val shape:', X_val.shape)

Train shape: (1749, 30) Val shape: (195, 30)


> Model Building

In [40]:
# define model
model = Sequential()
model.add(Embedding(vocab, 50, input_length=30, trainable=True))
model.add(GRU(150, recurrent_dropout=0.1, dropout=0.1))
model.add(Dense(vocab, activation='softmax'))
print(model.summary())

# compile the model
model.compile(loss='categorical_crossentropy', metrics=['acc'], optimizer='adam')
# fit the model
model.fit(X_tr, y_tr, epochs=60, verbose=2, validation_data=(X_val, y_val))

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 30, 50)            1200      
_________________________________________________________________
gru_2 (GRU)                  (None, 150)               90450     
_________________________________________________________________
dense_2 (Dense)              (None, 24)                3624      
Total params: 95,274
Trainable params: 95,274
Non-trainable params: 0
_________________________________________________________________
None
Train on 1749 samples, validate on 195 samples
Epoch 1/60
 - 3s - loss: 2.9936 - acc: 0.1447 - val_loss: 2.9106 - val_acc: 0.1128
Epoch 2/60
 - 2s - loss: 2.8168 - acc: 0.1675 - val_loss: 2.8068 - val_acc: 0.1590
Epoch 3/60
 - 2s - loss: 2.5519 - acc: 0.2842 - val_loss: 2.5562 - val_acc: 0.2410
Epoch 4/60
 - 2s - loss: 2.2818 - acc: 0.3282 - val_loss: 2.4985 - val_acc: 0.2

<keras.callbacks.callbacks.History at 0x7f8be0158c88>

# Inference

In [41]:
# generate a sequence of characters with a language model
def generate_seq(model, mapping, seq_length, seed_text, n_chars):
    in_text = seed_text
    # generate a fixed number of characters
    for _ in range(n_chars):
# encode the characters as integers
        encoded = [mapping[char] for char in in_text]
        # truncate sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        # predict character
        yhat = model.predict_classes(encoded, verbose=0)
        # reverse map integer to character
        out_char = ''
        for char, index in mapping.items():
            if index == yhat:
                out_char = char
                break
        # append to input
        in_text += char
    return in_text

# Required outputs 

## #1.Question [jarvis was distroyed by?]

In [42]:
inp = 'jarvis was distroyed'
print(len(inp))
print(generate_seq(model,mapping,30,inp.lower(),8))

20
jarvis was distroyed ultron 


## #2.Qestion [Jaevis was created ?]

In [50]:
inp = 'jarvis designed and created'
print(len(inp))
print(generate_seq(model,mapping,30,inp.lower(),8))

27
jarvis designed and created stark n


## #3.Qestion [Jarvis can objected to stark's]

In [44]:
inp = 'jarvis can object to Stark'
print(len(inp))
print(generate_seq(model,mapping,30,inp.lower(),8))

26
jarvis can object to stark uploade


## #4.Question  [a sophisticated artificial]

In [45]:
inp = 'a sophisticated artificial'
print(len(inp))
print(generate_seq(model,mapping,30,inp.lower(),14))

26
a sophisticated artificially intelligent


#### Hope you like it .
please answer back as soon as possible
i have tryed to improve my model by the following tecqunics :
    #Text Cleaning.
    #Text-to-Number Transformation.
    #Choosing the Optimal Regression Method
    #Normalized Corpus