## Keras Imports

In [26]:
# keras module for building LSTM 
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku 

# set seeds for reproducability
from tensorflow import set_random_seed
from numpy.random import seed
set_random_seed(2)
seed(1)

## Other Imports 

In [93]:
import pickle
import pandas as pd
import numpy as np
import string, os 
from nltk.corpus import stopwords
from nltk.tokenize import TreebankWordTokenizer, sent_tokenize
from collections import Counter
import sys
import json
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)
from sklearn.model_selection import StratifiedShuffleSplit

import boto3, re
from sagemaker import get_execution_role

role = get_execution_role()

## Embedding words in a vector space 

In [5]:
embeddings_index = dict()
f = open('glove.6B.100d.txt')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.array(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 400000 word vectors.


## Reading Lyrics file

In [17]:
for i in range(1,6):
    data_location = 's3://clean-tbd-databucket/clean_training_batch_'+str(i)+'.csv.csv'
    if i==1:
        lyric_df=pd.read_csv(data_location)
    else:
        lyric_df=pd.concat([lyric_df,pd.read_csv(data_location)])
    print(len(lyric_df))

25679
36725
56253
64666
103875


## Tokenizing words 

In [123]:
#nltk.download('punkt')
tokenizer = TreebankWordTokenizer()

def tokenize_lyric_summary(summary):
    for sent in sent_tokenize(summary):
        for token in tokenizer.tokenize(sent):
            yield token

UNKNOWN = 'unk'
def build_vocab(data, max_vocab_size=None):
    vocab = Counter()
    total = len(data)
    for i, row in enumerate(data.itertuples()):
        vocab.update(tokenize_lyric_summary(row.lyrics))
        if (i+1)%1000 == 0:
            sys.stdout.write(".")
            sys.stdout.flush()
    final_vocab = {word:i for i, (word, count) in enumerate(vocab.most_common(max_vocab_size))}
    final_vocab[UNKNOWN]=len(final_vocab)+1
    return final_vocab

In [124]:
lyric_tokenizer = build_vocab(lyric_df)
print("Vocab size: ", len(lyric_tokenizer))

.......................................................................................................Vocab size:  433806


In [125]:
with open("lyric_tokenizer.json", "w") as f:
    json.dump(lyric_tokenizer, f)
    print("Saved lyric_tokenizer file to lyric_tokenizer.json")

Saved lyric_tokenizer file to lyric_tokenizer.json


## Creating Embedding matrix 
## Row is represented by word token number
## Columns is the glove embedded vector

In [156]:
embedding_matrix = np.zeros((len(embeddings_index), 100))
cnt = 0
for word, i in lyric_tokenizer.items():
    if cnt > len(embeddings_index) - 1:
        break
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
         embedding_matrix[i] = embedding_vector
    cnt += 1

## Building LSTM model embedding -> LSTM -> Dense -> Softmax -> Output (10 nodes)
## categorial cross entropy

In [171]:
max_sequence_len = 100
embedding_dimension = 100
total_words = np.shape(embedding_matrix)[0]
print (total_words)
NumberGenres = 10

400000


In [179]:
def create_model(max_sequence_len, total_words):
    input_len = max_sequence_len 
    model = Sequential()
    
    # Add Input Embedding Layer
   # model.add(Embedding(total_words, embedding_dimension, 
   #                     weights = [embedding_matrix], input_length=input_len, trainable = False))
    
    # Add Hidden Layer 1 - LSTM Layer
    model.add(LSTM(100, input_shape=(100,100)))
    model.add(Dropout(0.1))
    
    # Add Output Layer
    model.add(Dense(NumberGenres, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam')
    
    return model

model = create_model(max_sequence_len, total_words)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dropout_6 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                1010      
Total params: 81,410
Trainable params: 81,410
Non-trainable params: 0
_________________________________________________________________


## Create Modeling file

In [64]:
lyric_df = pd.concat([lyric_df,pd.get_dummies(lyric_df['genre'], prefix='genre')],axis=1)


In [72]:
lyric_df

Unnamed: 0.1,Unnamed: 0,genre,song,lyrics,Hillarylyrics,Satorulyrics,Hillary100words,Satoru100words,artist,genre_Country,genre_Electronic,genre_Folk,genre_Hip-Hop,genre_Indie,genre_Jazz,genre_Metal,genre_Pop,genre_R&B,genre_Rock,hillarylength
0,0,Country,silver wings,Silver wings Shine in the sunlight. Roaring en...,Silver wings Shine in the sunlight Roaring eng...,Silver wings Shine in the sunlight . Roaring ...,"['you', 'away', ""Leavin'"", 'me', 'lonely', 'Si...","['.', ""They're"", 'taking', 'you', 'away', '.',...",garrett hedlund,1,0,0,0,0,0,0,0,0,0,889
1,1,Country,she even woke me up to say goodbye,Morning's come and Lord my mind is aching That...,Morning's come and Lord my mind is aching That...,Morning's come and Lord my mind is aching That...,"['breaking', 'As', 'my', 'tears', 'go', 'tumbl...","['door', 'But', 'just', 'like', 'the', 'dawn',...",charlie louvin,1,0,0,0,0,0,0,0,0,0,785
2,2,Country,amazing grace,Amazing grace! How sweet the sound That save...,Amazing grace How sweet the sound That saved...,Amazing grace How sweet the sound That saved...,"['see', ""'Twas"", 'grace', 'that', 'taught', 'm...","['.', 'I', 'once', 'was', 'lost', ',', 'but', ...",gene watson,1,0,0,0,0,0,0,0,0,0,795
3,3,Country,every once in a while,Every once in a while I think about you And yo...,Every once in a while I think about you And yo...,Every once in a while I think about you And yo...,"['memories', ""won't"", 'fade', 'From', 'the', '...","['I', 'hear', 'an', 'old', 'song', 'And', ""I'm...",bruce robison,1,0,0,0,0,0,0,0,0,0,793
4,4,Country,5-1-5-0,"Ever since I met you girl, you been on my brai...",Ever since I met you girl you been on my brain...,"Ever since I met you girl , you been on my br...","['nothing', 'else', 'but', 'you', 'all', 'nigh...","['the', 'po-po', 'I’m', 'goin’', 'crazy', ',',...",dierks bentley,1,0,0,0,0,0,0,0,0,0,804
5,5,Country,hard truth,You Don't know who I [E] am But I know all abo...,You Don't know who I E am But I know all about...,You Don't know who I E am But I know all about...,"['truth', 'vocal', 'only', 'B7', 'There', 'is'...","['you', ""I've"", 'come', 'to', 'talk', 'to', 'y...",george jones,1,0,0,0,0,0,0,0,0,0,746
6,6,Country,after the fire is gone,Love is where you find it When you find no lov...,Love is where you find it When you find no lov...,Love is where you find it When you find no lov...,"['When', 'you', 'find', 'no', 'love', 'at', 'h...","['When', 'you', 'find', 'no', 'love', 'at', 'h...",garth brooks,1,0,0,0,0,0,0,0,0,0,785
7,7,Country,a house with no curtains,We still were our rings We still say I love yo...,We still were our rings We still say I love yo...,We still were our rings We still say I love yo...,"['so', 'well', 'But', 'everyone', 'knows', ""It...","['a', 'sad', 'show', 'And', ""we're"", 'only', ""...",alan jackson,1,0,0,0,0,0,0,0,0,0,825
8,8,Country,wanderin',(Wanderin' wanderin' mhm) My father is an eng...,Wanderin' wanderin' mhm My father is an engin...,Wanderin' wanderin' mhm My father is an engin...,"[""wanderin'"", 'mhm', 'My', 'father', 'is', 'an...","['father', 'is', 'an', 'engineer', 'my', 'brot...",eddy arnold,1,0,0,0,0,0,0,0,0,0,800
9,9,Country,katie bar the door,I could tell the way she held me Out on that s...,I could tell the way she held me Out on that s...,I could tell the way she held me Out on that s...,"['and', 'white', 'TV', ""I's"", 'under', 'the', ...","[""ain't"", 'never', 'been', 'Somehow', 'the', '...",cledus t judd,1,0,0,0,0,0,0,0,0,0,782


In [84]:
def eval_lyric(liststr):
    try:
        return eval(liststr)
    except:
        return []
        

In [85]:
print (len(lyric_df))
lyric_df['Hillary100words'] = lyric_df['Hillary100words'].apply(eval_lyric)
lyric_df['hillarylength'] = lyric_df['Hillary100words'].apply(lambda x: len(x))

print (len(lyric_df[lyric_df['hillarylength'] == 100]))
print (type(lyric_df['Hillary100words'].iloc[0]))

103875
102155
<class 'list'>


In [86]:
lyricdf = lyric_df[lyric_df['hillarylength'] == 100]

In [96]:
lyricdf

Unnamed: 0.1,Unnamed: 0,genre,song,lyrics,Hillarylyrics,Satorulyrics,Hillary100words,Satoru100words,artist,genre_Country,genre_Electronic,genre_Folk,genre_Hip-Hop,genre_Indie,genre_Jazz,genre_Metal,genre_Pop,genre_R&B,genre_Rock,hillarylength
1385,1501,Country,drowned in the deep blue sea,It was on one Sunday evening just above the ai...,It was on one Sunday evening just above the ai...,It was on one Sunday evening just above the ai...,"[to, write, me, a, letter, he, promised, to, w...","['air', 'off', 'tree', 'When', 'my', ""darlin'""...",flatt and scruggs,1,0,0,0,0,0,0,0,0,0,100
1386,1502,Country,city of refuge,"You better run, you better run You better run ...",You better run you better run You better run a...,"You better run , you better run You better ru...","[better, run, You, better, run, to, the, City,...","['run', 'to', 'the', 'City', 'of', 'Refuge', '...",grant lee phillips,1,0,0,0,0,0,0,0,0,0,100
1387,1503,Country,snowin' on raton [#],When the wind don't blow in Amarillo And the m...,When the wind don't blow in Amarillo And the m...,When the wind don't blow in Amarillo And the m...,"[I'll, be, through, them, hills, and, gone, Mo...","['When', 'the', 'wind', ""don't"", 'blow', 'in',...",emmylou harris,1,0,0,0,0,0,0,0,0,0,100
1388,1504,Country,my first lover,My first lover My first lover He was tall and ...,My first lover My first lover He was tall and ...,My first lover My first lover He was tall and ...,"[him, now, My, first, lover, My, first, lover,...","['was', 'tall', 'and', 'breezy', 'with', 'his'...",gillian welch,1,0,0,0,0,0,0,0,0,0,100
1389,1505,Country,jesus take the wheel,She was driving last Friday on her way to Cinc...,She was driving last Friday on her way to Cinc...,She was driving last Friday on her way to Cinc...,"[the, backseat, Fifty, miles, to, go, and, she...","['and', 'she', 'was', 'running', 'low', 'on', ...",carrie underwood,1,0,0,0,0,0,0,0,0,0,100
1390,1507,Country,house and home,House And Home (Trevor Rogers) For forty days ...,House And Home Trevor Rogers For forty days an...,House And Home Trevor Rogers For forty days an...,"[nights, the, rain, it, did, fall, down, Worke...","['For', 'forty', 'days', 'and', 'forty', 'nigh...",clumsy lovers,1,0,0,0,0,0,0,0,0,0,100
1391,1508,Country,rails,"Hey, lay me down some rails boys Don't put m...",Hey lay me down some rails boys Don't put me...,"Hey , lay me down some rails boys Don't put...","[in, honest, thinking, And, I, never, cared, t...","['boys', 'And', 'if', 'I', 'make', 'a', 'littl...",david allan coe,1,0,0,0,0,0,0,0,0,0,100
1392,1509,Country,nobody but a fool would love you,NOBODY BUT A FOOL (WOULD LOVE YOU) (Bill Ander...,NOBODY BUT A FOOL WOULD LOVE YOU Bill Anderson...,NOBODY BUT A FOOL WOULD LOVE YOU Bill Anderson...,"[Rose, Publications, RECORDED, BY, CONNIE, SMI...","['a', 'fool', 'I', ""don't"", 'know', 'what', 'i...",bill anderson,1,0,0,0,0,0,0,0,0,0,100
1393,1510,Country,send back my heart,By now I really should be over you But I might...,By now I really should be over you But I might...,By now I really should be over you But I might...,"[now, I, really, should, be, over, you, But, I...","['just', 'give', 'up', 'tryin`', ""You've"", 'go...",gary allan,1,0,0,0,0,0,0,0,0,0,100
1394,1512,Country,she's tired of boys,She walked onto the job site We damn near died...,She walked onto the job site We damn near died...,She walked onto the job site We damn near died...,"[dates, And, I’m, tired, of, toys, I, want, a,...","['onto', 'the', 'job', 'site', 'We', 'damn', '...",garth brooks,1,0,0,0,0,0,0,0,0,0,100


In [97]:
lyricdf = lyricdf.drop(['hillarylength'],axis = 1)

In [114]:
lyricdf

Unnamed: 0.1,Unnamed: 0,genre,song,lyrics,Hillarylyrics,Satorulyrics,Hillary100words,Satoru100words,artist,genre_Country,genre_Electronic,genre_Folk,genre_Hip-Hop,genre_Indie,genre_Jazz,genre_Metal,genre_Pop,genre_R&B,genre_Rock
1385,1501,Country,drowned in the deep blue sea,It was on one Sunday evening just above the ai...,It was on one Sunday evening just above the ai...,It was on one Sunday evening just above the ai...,"[to, write, me, a, letter, he, promised, to, w...","['air', 'off', 'tree', 'When', 'my', ""darlin'""...",flatt and scruggs,1,0,0,0,0,0,0,0,0,0
1386,1502,Country,city of refuge,"You better run, you better run You better run ...",You better run you better run You better run a...,"You better run , you better run You better ru...","[better, run, You, better, run, to, the, City,...","['run', 'to', 'the', 'City', 'of', 'Refuge', '...",grant lee phillips,1,0,0,0,0,0,0,0,0,0
1387,1503,Country,snowin' on raton [#],When the wind don't blow in Amarillo And the m...,When the wind don't blow in Amarillo And the m...,When the wind don't blow in Amarillo And the m...,"[I'll, be, through, them, hills, and, gone, Mo...","['When', 'the', 'wind', ""don't"", 'blow', 'in',...",emmylou harris,1,0,0,0,0,0,0,0,0,0
1388,1504,Country,my first lover,My first lover My first lover He was tall and ...,My first lover My first lover He was tall and ...,My first lover My first lover He was tall and ...,"[him, now, My, first, lover, My, first, lover,...","['was', 'tall', 'and', 'breezy', 'with', 'his'...",gillian welch,1,0,0,0,0,0,0,0,0,0
1389,1505,Country,jesus take the wheel,She was driving last Friday on her way to Cinc...,She was driving last Friday on her way to Cinc...,She was driving last Friday on her way to Cinc...,"[the, backseat, Fifty, miles, to, go, and, she...","['and', 'she', 'was', 'running', 'low', 'on', ...",carrie underwood,1,0,0,0,0,0,0,0,0,0
1390,1507,Country,house and home,House And Home (Trevor Rogers) For forty days ...,House And Home Trevor Rogers For forty days an...,House And Home Trevor Rogers For forty days an...,"[nights, the, rain, it, did, fall, down, Worke...","['For', 'forty', 'days', 'and', 'forty', 'nigh...",clumsy lovers,1,0,0,0,0,0,0,0,0,0
1391,1508,Country,rails,"Hey, lay me down some rails boys Don't put m...",Hey lay me down some rails boys Don't put me...,"Hey , lay me down some rails boys Don't put...","[in, honest, thinking, And, I, never, cared, t...","['boys', 'And', 'if', 'I', 'make', 'a', 'littl...",david allan coe,1,0,0,0,0,0,0,0,0,0
1392,1509,Country,nobody but a fool would love you,NOBODY BUT A FOOL (WOULD LOVE YOU) (Bill Ander...,NOBODY BUT A FOOL WOULD LOVE YOU Bill Anderson...,NOBODY BUT A FOOL WOULD LOVE YOU Bill Anderson...,"[Rose, Publications, RECORDED, BY, CONNIE, SMI...","['a', 'fool', 'I', ""don't"", 'know', 'what', 'i...",bill anderson,1,0,0,0,0,0,0,0,0,0
1393,1510,Country,send back my heart,By now I really should be over you But I might...,By now I really should be over you But I might...,By now I really should be over you But I might...,"[now, I, really, should, be, over, you, But, I...","['just', 'give', 'up', 'tryin`', ""You've"", 'go...",gary allan,1,0,0,0,0,0,0,0,0,0
1394,1512,Country,she's tired of boys,She walked onto the job site We damn near died...,She walked onto the job site We damn near died...,She walked onto the job site We damn near died...,"[dates, And, I’m, tired, of, toys, I, want, a,...","['onto', 'the', 'job', 'site', 'We', 'damn', '...",garth brooks,1,0,0,0,0,0,0,0,0,0


## Create Test/ Training data set 

In [115]:
def split(df, test_size):
    data = df.values
    data_y = df.drop(df.iloc[:,0:9], axis=1).values
    #StratifiedShuffleSplit does not work with one hot encoded / multiple labels. Doing the split on basis of arg max labels.
    data_y = np.argmax(data_y, axis=1)
    data_y.shape
    stratified_split = StratifiedShuffleSplit(n_splits=2, test_size=test_size, random_state=42)
    for train_index, test_index in stratified_split.split(data, data_y):
        train, test = df.iloc[train_index], df.iloc[test_index]
    return train, test

In [116]:
train, test = split(lyricdf, 0.33)
#Split the train further into train and validation
train, validation = split(train, 0.2)

In [140]:
def generate_modelling_file(words):
    index = []
    for word in words:
        try:
            if lyric_tokenizer[word] > len(embedding_matrix) - 1:
                index.append( len(embedding_matrix) - 1)
            else:
                index.append(lyric_tokenizer[word])
        except:
            index.append( len(embedding_matrix) - 1)
            
    
    return embedding_matrix[index,:]

In [150]:
def create_label(row):
    return row[9:-1].values



In [149]:
test.iloc[0,9:-1].values

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=object)

In [143]:
train['X'] = train['Hillary100words'].apply(generate_modelling_file)
test['X'] = test['Hillary100words'].apply(generate_modelling_file)


In [144]:
test

Unnamed: 0.1,Unnamed: 0,genre,song,lyrics,Hillarylyrics,Satorulyrics,Hillary100words,Satoru100words,artist,genre_Country,genre_Electronic,genre_Folk,genre_Hip-Hop,genre_Indie,genre_Jazz,genre_Metal,genre_Pop,genre_R&B,genre_Rock,X
19049,34355,Rock,badlands,I'm a pistol packin' man. with a gun in my han...,I'm a pistol packin' man with a gun in my hand...,I'm a pistol packin' man . with a gun in my h...,"[me, alive, I'm, a, man, with, a, fast, hand, ...","['chassis', 'lassie', 'like', 'the', '4th', 'o...",ac dc,0,0,0,0,0,0,0,0,0,1,"[[0.05671900138258934, 0.13333000242710114, 0...."
22049,24994,Hip-Hop,billets verts,"Back to the future Izi Derniers gamos, derniÃ¨...",Back to the future Izi Derniers gamos derniÃ¨r...,"Back to the future Izi Derniers gamos , derni...","[je, me, trouve, dans, l'coeur, des, youvs, da...","['elle', 'te', 'pardonne', 'Gallardo', 'noire'...",booba,0,0,0,1,0,0,0,0,0,0,"[[-0.2851400077342987, -0.5536800026893616, 0...."
2035,2382,Hip-Hop,grand theft audio,"And already, platinum posters and plaques I do...",And already platinum posters and plaques I don...,"And already , platinum posters and plaques I ...","[don't, need, a, reason, to, kill, I'm, like, ...","['They', 'wonder', 'how', 'I', 'flow', 'so', '...",dj envy,0,0,0,1,0,0,0,0,0,0,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
12080,13762,Electronic,never enough,I wake up And my feet don't touch the ground I...,I wake up And my feet don't touch the ground I...,I wake up And my feet don't touch the ground I...,"[coming, from, We, been, waiting, for, this, m...","['Our', 'true', 'intentions', ""ain't"", 'exactl...",dirty vegas,0,1,0,0,0,0,0,0,0,0,"[[0.0667089968919754, 0.2744799852371216, 0.81..."
11458,12925,Electronic,euro trash girl,Well I've been up to Paris And I've slept in a...,Well I've been up to Paris And I've slept in a...,Well I've been up to Paris And I've slept in a...,"[phone, Ya, know, she, never, did, like, me, B...","['your', 'dad', '.', '""', 'And', 'the', 'waitr...",chicks on speed,0,1,0,0,0,0,0,0,0,0,"[[0.03176400065422058, -0.6076800227165222, 0...."
3746,4118,Country,disarm,Disarm you with a smile And cut you like you w...,Disarm you with a smile And cut you like you w...,Disarm you with a smile And cut you like you w...,"[me, here, To, wither, in, denial, The, bitter...","['boy', 'supposed', 'to', 'do', '?', 'The', 'k...",the civil wars,1,0,0,0,0,0,0,0,0,0,"[[0.05671900138258934, 0.13333000242710114, 0...."
21773,32518,Pop,estuve,"Estuve, en cada poro de su piel pero hoy no es...",Estuve en cada poro de su piel pero hoy no est...,"Estuve , en cada poro de su piel pero hoy no ...","[debia, cuando, ella, queria, sentirse, mujer,...","['ella', 'queria', 'sentirse', 'mujer', 'no', ...",alejandro fernandez,0,0,0,0,0,0,0,1,0,0,"[[-0.04410799965262413, -0.45434999465942383, ..."
17369,27718,Pop,lucille,Ah Lucille Baby do your sister's will Ah Luc...,Ah Lucille Baby do your sister's will Ah Luc...,Ah Lucille Baby do your sister's will Ah Luc...,"[will, Well, you, went, up, and, married, But,...","['your', ""sister's"", 'will', 'Ah', 'Lucille', ...",don rich,0,0,0,0,0,0,0,1,0,0,"[[-0.2670300006866455, 0.4491100013256073, 0.5..."
3725,4716,Metal,on being a bastard,He took on a shade of green long ago. And many...,He took on a shade of green long ago And many ...,He took on a shade of green long ago . And ma...,"[many, a, fool, along, the, way, have, been, l...","['witness', 'to', 'this', '.', 'but', 'try', '...",coalesce,0,0,0,0,0,0,1,0,0,0,"[[-0.3291400074958801, 0.8288699984550476, -0...."
9789,19506,Pop,i know now,Hey I've tried just about everything Riches an...,Hey I've tried just about everything Riches an...,Hey I've tried just about everything Riches an...,"[give, Him, a, chance, And, He'll, give, you, ...","['in', 'Him', 'I', 'found', 'all', ""I'll"", 'ev...",brandy,0,0,0,0,0,0,0,1,0,0,"[[-0.03760800138115883, 0.15682999789714813, 0..."


In [151]:
train['Y_out'] = train.apply(lambda row: create_label(row), axis = 1)
test['Y_out'] = test.apply(lambda row: create_label(row), axis = 1)


In [153]:
np.shape(train['X'].iloc[1000])

(100, 100)

In [173]:
train['X'].iloc[1000]

array([[-0.2687    ,  0.81708002,  0.69896001, ..., -0.40110001,
         0.74656999,  0.31121999],
       [ 0.058792  ,  0.46122   ,  0.95361   , ...,  0.18595   ,
         0.074304  ,  0.90739   ],
       [-0.093402  ,  0.55602002,  0.74852002, ..., -0.6857    ,
         0.20875999,  0.59025002],
       ...,
       [ 0.19073001,  0.56863999,  0.72026998, ..., -0.33460999,
         0.044349  ,  0.57541001],
       [-0.15180001,  0.38409001,  0.89340001, ..., -0.27123001,
         0.22157   ,  0.92111999],
       [-0.10191   , -0.186     ,  0.25920999, ..., -0.35565999,
         0.35576001, -0.99112999]])

In [169]:
#np.shape(train['X'])
#np.shape(train['Y_out'])

print (np.shape(np.array(train['X'].tolist())))
print (np.shape(np.array(train['Y_out'].tolist())))

(54754, 100, 100)
(54754, 10)


In [None]:
model.fit(np.array(train['X'].tolist()),np.array(train['Y_out'].tolist()), epochs=10)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
 4928/54754 [=>............................] - ETA: 5:18 - loss: 1.5257

In [25]:
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict_classes(token_list, verbose=0)
        
        output_word = ""
        for word,index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " "+output_word
    return seed_text.title()

In [26]:
generate_text("Dear Illinois department of insurance we love you and we hope that", 100, model, max_sequence_len)

"Dear Illinois Department Of Insurance We Love You And We Hope That I Could Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And I Can'T Be The And"

In [27]:
def train_random_batch(model, inp_sequences, epochs=1):
    for epoch in range(epochs):
        print("Epoch: {}".format(epoch+1))
        batch = np.random.choice(inp_sequences, size=10000)
        predictors, label, max_sequence_len = generate_padded_sequences(inp_sequences[:100000])
        
        model.fit(predictors, label, epochs=1)

In [28]:
train_random_batch(model, inp_sequences)

Epoch: 1
Epoch 1/1


In [29]:
generate_text("Dear Illinois department of insurance we love you and we hope that", 100, model, max_sequence_len)

"Dear Illinois Department Of Insurance We Love You And We Hope That I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know That I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know That I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know I Don'T Know That I Don'T Know I Don'T Know I"

#### Save model

In [8]:
def save_model(model, name):
    json_path = "keras_model/{}.json".format(name)
    weight_path = "keras_model/{}.h5".format(name)
    with open(json_path, "w") as f:
        f.write(model.to_json())
    model.save_weights(weight_path)
    return None

In [None]:
#save_model(model, "0804_lunch")
#pickle.dump(tokenizer, open("keras_model/0804_tokenizer.pickle", "wb"))