In [1]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import re
from sklearn.preprocessing import LabelEncoder
import numpy as np



In [2]:
## READ IN AND COMBINE ALL DATA
data = pd.read_csv('newyork_headline/ArticlesFeb2017.csv')

In [3]:
data1 = pd.read_csv('newyork_headline/ArticlesFeb2018.csv')

In [4]:
data2 = pd.read_csv('newyork_headline/ArticlesJan2017.csv')

In [5]:
data3 = pd.read_csv('newyork_headline/ArticlesJan2018.csv')

In [6]:
data = data.append([data1,data2,data3])

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3795 entries, 0 to 904
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   articleID         3795 non-null   object
 1   abstract          81 non-null     object
 2   byline            3795 non-null   object
 3   documentType      3795 non-null   object
 4   headline          3795 non-null   object
 5   keywords          3795 non-null   object
 6   multimedia        3795 non-null   int64 
 7   newDesk           3795 non-null   object
 8   printPage         3795 non-null   int64 
 9   pubDate           3795 non-null   object
 10  sectionName       3795 non-null   object
 11  snippet           3795 non-null   object
 12  source            3795 non-null   object
 13  typeOfMaterial    3795 non-null   object
 14  webURL            3795 non-null   object
 15  articleWordCount  3795 non-null   int64 
dtypes: int64(3), object(13)
memory usage: 504.0+ KB


In [8]:
data.head()

Unnamed: 0,articleID,abstract,byline,documentType,headline,keywords,multimedia,newDesk,printPage,pubDate,sectionName,snippet,source,typeOfMaterial,webURL,articleWordCount
0,58927e0495d0e0392607e1b3,,By KEN BELSON,article,N.F.L. vs. Politics Has Been Battle All Season...,"['Football', 'Super Bowl', 'National Football ...",1,Sports,12,2017-02-02 00:26:16,Pro Football,Despite the national tumult over immigration s...,The New York Times,News,https://www.nytimes.com/2017/02/01/sports/supe...,1129
1,5893033d95d0e0392607e2d6,,By UNKNOWN,article,Voice. Vice. Veracity.,"['Television', 'Home Box Office', 'Girls (TV P...",1,Arts&Leisure,1,2017-02-02 10:00:24,Television,Our critics look at the impact of the HBO show...,The New York Times,News,https://www.nytimes.com/2017/02/02/arts/televi...,3082
2,5893039595d0e0392607e2da,,By MANOHLA DARGIS,article,A Stand-Up’s Downward Slide,"['Movies', 'The Comedian (Movie)', 'De Niro, R...",1,Weekend,5,2017-02-02 10:01:53,Unknown,Joined by a cast that includes Edie Falco and ...,The New York Times,Review,https://www.nytimes.com/2017/02/02/movies/the-...,693
3,5893109995d0e0392607e2ef,,By ALEXANDRA S. LEVINE,article,New York Today: A Groundhog Has Her Day,"['New York City', 'Groundhogs']",1,Metro,0,2017-02-02 10:57:25,Unknown,Thursday: A meet-and-greet with Staten Island ...,The New York Times,briefing,https://www.nytimes.com/2017/02/02/nyregion/ne...,1049
4,5893114495d0e0392607e2f1,,By BONNIE TSUI,article,A Swimmer’s Communion With the Ocean,"['Travel and Vacations', 'Swimming', 'Oceans a...",1,Travel,4,2017-02-02 11:00:03,Unknown,"“We swam in that heaving body of aquamarine, a...",The New York Times,News,https://www.nytimes.com/2017/02/02/travel/hawa...,1283


In [9]:
#KEEP 
data = data[['headline','articleID']]
data.head()

Unnamed: 0,headline,articleID
0,N.F.L. vs. Politics Has Been Battle All Season...,58927e0495d0e0392607e1b3
1,Voice. Vice. Veracity.,5893033d95d0e0392607e2d6
2,A Stand-Up’s Downward Slide,5893039595d0e0392607e2da
3,New York Today: A Groundhog Has Her Day,5893109995d0e0392607e2ef
4,A Swimmer’s Communion With the Ocean,5893114495d0e0392607e2f1


In [10]:
#CLEAN THE DATA. GETTING RID OF ALL PUNCTUATION
data['headline'] = data['headline'].apply(lambda x: x.lower())
data['headline'] = data['headline'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

In [11]:
data = data['headline']

In [12]:
data.head(50)

0       nfl vs politics has been battle all season long
1                                   voice vice veracity
2                             a standups downward slide
3                new york today a groundhog has her day
4                   a swimmers communion with the ocean
5                                        trail activity
6                                            super bowl
7                              trumps mexican shakedown
8                               pences presidential pet
9                                fruit of a poison tree
10                the peculiar populism of donald trump
11    questions for on alaskas coldest days a villag...
12                                         the new kids
13                          what my chinese mother made
14    do you think teenagers can make a difference i...
15                                              unknown
16    president pledges to let politics return to pu...
17    the police killed my unarmed son in 2012 i

In [13]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data)
# determine the vocabulary size
vocab_size= len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)
# create line-based sequences
sequences= list()
for line in data:
    encoded = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(encoded)):
        sequence = encoded[:i+1]
        sequences.append(sequence)
        print(sequence)


Vocabulary Size: 6527
[649, 146]
[649, 146, 147]
[649, 146, 147, 89]
[649, 146, 147, 89, 826]
[649, 146, 147, 89, 826, 298]
[649, 146, 147, 89, 826, 298, 65]
[649, 146, 147, 89, 826, 298, 65, 47]
[649, 146, 147, 89, 826, 298, 65, 47, 138]
[436, 2547]
[436, 2547, 2548]
[2, 2549]
[2, 2549, 2550]
[2, 2549, 2550, 1088]
[13, 37]
[13, 37, 58]
[13, 37, 58, 2]
[13, 37, 58, 2, 2551]
[13, 37, 58, 2, 2551, 89]
[13, 37, 58, 2, 2551, 89, 98]
[13, 37, 58, 2, 2551, 89, 98, 112]
[2, 2552]
[2, 2552, 2553]
[2, 2552, 2553, 12]
[2, 2552, 2553, 12, 1]
[2, 2552, 2553, 12, 1, 1565]
[827, 1566]
[372, 373]
[22, 437]
[22, 437, 1567]
[1089, 650]
[1089, 650, 828]
[1568, 4]
[1568, 4, 2]
[1568, 4, 2, 1090]
[1568, 4, 2, 1090, 651]
[1, 2554]
[1, 2554, 1091]
[1, 2554, 1091, 4]
[1, 2554, 1091, 4, 81]
[1, 2554, 1091, 4, 81, 11]
[43, 6]
[43, 6, 9]
[43, 6, 9, 1569]
[43, 6, 9, 1569, 2555]
[43, 6, 9, 1569, 2555, 652]
[43, 6, 9, 1569, 2555, 652, 2]
[43, 6, 9, 1569, 2555, 652, 2, 1092]
[43, 6, 9, 1569, 2555, 652, 2, 1092, 109

[48, 244, 30, 2820]
[48, 244, 30, 2820, 109]
[45, 2821]
[45, 2821, 234]
[45, 2821, 234, 880]
[45, 2821, 234, 880, 881]
[45, 2821, 234, 880, 881, 170]
[2822, 6]
[2822, 6, 1]
[2822, 6, 1, 564]
[2822, 6, 1, 564, 2823]
[2822, 6, 1, 564, 2823, 10]
[2822, 6, 1, 564, 2823, 10, 537]
[2822, 6, 1, 564, 2823, 10, 537, 109]
[2824, 311]
[2824, 311, 278]
[2824, 311, 278, 40]
[2824, 311, 278, 40, 203]
[2824, 311, 278, 40, 203, 350]
[15, 3]
[15, 3, 348]
[15, 3, 348, 61]
[15, 3, 348, 61, 2825]
[15, 3, 348, 61, 2825, 266]
[15, 3, 348, 61, 2825, 266, 329]
[15, 3, 348, 61, 2825, 266, 329, 6]
[15, 3, 348, 61, 2825, 266, 329, 6, 1]
[15, 3, 348, 61, 2825, 266, 329, 6, 1, 13]
[15, 3, 348, 61, 2825, 266, 329, 6, 1, 13, 552]
[2826, 2827]
[1165, 2828]
[1165, 2828, 13]
[1165, 2828, 13, 2829]
[1165, 2828, 13, 2829, 4]
[1165, 2828, 13, 2829, 4, 470]
[1165, 2828, 13, 2829, 4, 470, 23]
[1165, 2828, 13, 2829, 4, 470, 23, 2830]
[1165, 2828, 13, 2829, 4, 470, 23, 2830, 2831]
[1165, 2828, 13, 2829, 4, 470, 23, 2830, 2831

[3028, 414, 19, 1221]
[3028, 414, 19, 1221, 3]
[3028, 414, 19, 1221, 3, 1]
[3028, 414, 19, 1221, 3, 1, 1724]
[17, 3029]
[17, 3029, 21]
[17, 3029, 21, 6]
[179, 320]
[179, 320, 82]
[179, 320, 82, 129]
[179, 320, 82, 129, 217]
[179, 320, 82, 129, 217, 3030]
[179, 320, 82, 129, 217, 3030, 5]
[179, 320, 82, 129, 217, 3030, 5, 3031]
[179, 320, 82, 129, 217, 3030, 5, 3031, 3032]
[179, 320, 82, 129, 217, 3030, 5, 3031, 3032, 168]
[532, 267]
[532, 267, 301]
[532, 267, 301, 3033]
[532, 267, 301, 3033, 22]
[532, 267, 301, 3033, 22, 910]
[1, 3034]
[1, 3034, 203]
[1, 3034, 203, 581]
[1725, 3035]
[1725, 3035, 14]
[1725, 3035, 14, 270]
[738, 494]
[738, 494, 479]
[738, 494, 479, 1222]
[738, 494, 479, 1222, 7]
[738, 494, 479, 1222, 7, 911]
[738, 494, 479, 1222, 7, 911, 1726]
[36, 550]
[36, 550, 862]
[36, 550, 862, 236]
[3037, 1]
[3037, 1, 59]
[2, 70]
[2, 70, 96]
[2, 70, 96, 3]
[2, 70, 96, 3, 1]
[2, 70, 96, 3, 1, 354]
[2, 70, 96, 3, 1, 354, 912]
[3038, 119]
[3038, 119, 1223]
[1, 687]
[1, 687, 4]
[1, 687

[50, 1275, 3223]
[50, 1275, 3223, 3224]
[50, 1275, 3223, 3224, 3225]
[50, 1275, 3223, 3224, 3225, 3226]
[1791, 97]
[1791, 97, 32]
[1791, 97, 32, 78]
[1791, 97, 32, 78, 3]
[1791, 97, 32, 78, 3, 1276]
[1, 3227]
[1, 3227, 3228]
[1277, 130]
[1277, 130, 3229]
[1277, 130, 3229, 33]
[1277, 130, 3229, 33, 45]
[1277, 130, 3229, 33, 45, 1792]
[2, 3230]
[2, 3230, 3231]
[2, 3230, 3231, 40]
[2, 3230, 3231, 40, 12]
[2, 3230, 3231, 40, 12, 3232]
[11, 3233]
[11, 3233, 1278]
[11, 3233, 1278, 4]
[11, 3233, 1278, 4, 291]
[11, 3233, 1278, 4, 291, 7]
[11, 3233, 1278, 4, 291, 7, 1793]
[1, 67]
[1, 67, 4]
[1, 67, 4, 18]
[1, 67, 4, 18, 3234]
[1, 67, 4, 18, 3234, 1794]
[194, 3235]
[194, 3235, 7]
[194, 3235, 7, 26]
[1795, 1]
[1795, 1, 1279]
[1795, 1, 1279, 19]
[1795, 1, 1279, 19, 1280]
[1795, 1, 1279, 19, 1280, 3]
[1795, 1, 1279, 19, 1280, 3, 503]
[3236, 3237]
[3236, 3237, 7]
[3236, 3237, 7, 868]
[3236, 3237, 7, 868, 96]
[3236, 3237, 7, 868, 96, 40]
[3236, 3237, 7, 868, 96, 40, 29]
[3236, 3237, 7, 868, 96, 40, 2

[266, 593, 132, 3423, 7, 332, 1863]
[266, 593, 132, 3423, 7, 332, 1863, 3424]
[42, 12]
[42, 12, 1864]
[42, 12, 1864, 1865]
[42, 12, 1864, 1865, 610]
[770, 3]
[770, 3, 1]
[770, 3, 1, 103]
[770, 3, 1, 103, 85]
[770, 3, 1, 103, 85, 7]
[770, 3, 1, 103, 85, 7, 18]
[770, 3, 1, 103, 85, 7, 18, 222]
[770, 3, 1, 103, 85, 7, 18, 222, 113]
[15, 11]
[15, 11, 850]
[15, 11, 850, 28]
[15, 11, 850, 28, 505]
[34, 38]
[34, 38, 154]
[34, 38, 154, 2]
[34, 38, 154, 2, 3425]
[34, 38, 154, 2, 3425, 6]
[34, 38, 154, 2, 3425, 6, 3426]
[34, 38, 154, 2, 3425, 6, 3426, 1866]
[287, 3427]
[287, 3427, 1867]
[197, 3428]
[197, 3428, 949]
[197, 3428, 949, 21]
[197, 3428, 949, 21, 447]
[197, 3428, 949, 21, 447, 23]
[197, 3428, 949, 21, 447, 23, 6]
[197, 3428, 949, 21, 447, 23, 6, 221]
[514, 3]
[514, 3, 1288]
[514, 3, 1288, 665]
[514, 3, 1288, 665, 967]
[514, 3, 1288, 665, 967, 3429]
[514, 3, 1288, 665, 967, 3429, 421]
[514, 3, 1288, 665, 967, 3429, 421, 1868]
[514, 3, 1288, 665, 967, 3429, 421, 1868, 28]
[514, 3, 1288, 

[14, 1898, 2, 315]
[14, 1898, 2, 315, 71]
[14, 1898, 2, 315, 71, 3502]
[14, 1898, 2, 315, 71, 3502, 7]
[14, 1898, 2, 315, 71, 3502, 7, 2]
[14, 1898, 2, 315, 71, 3502, 7, 2, 3503]
[14, 1898, 2, 315, 71, 3502, 7, 2, 3503, 35]
[14, 1898, 2, 315, 71, 3502, 7, 2, 3503, 35, 171]
[14, 1898, 2, 315, 71, 3502, 7, 2, 3503, 35, 171, 244]
[14, 1898, 2, 315, 71, 3502, 7, 2, 3503, 35, 171, 244, 122]
[14, 1898, 2, 315, 71, 3502, 7, 2, 3503, 35, 171, 244, 122, 468]
[1, 156]
[1, 156, 198]
[1, 156, 198, 137]
[1, 156, 198, 137, 407]
[1, 156, 198, 137, 407, 1685]
[1, 156, 198, 137, 407, 1685, 475]
[967, 40]
[967, 40, 12]
[967, 40, 12, 1337]
[967, 40, 12, 1337, 197]
[967, 40, 12, 1337, 197, 579]
[967, 40, 12, 1337, 197, 579, 204]
[967, 40, 12, 1337, 197, 579, 204, 1106]
[967, 40, 12, 1337, 197, 579, 204, 1106, 18]
[967, 40, 12, 1337, 197, 579, 204, 1106, 18, 3504]
[1899, 1900]
[1899, 1900, 21]
[1899, 1900, 21, 3505]
[1899, 1900, 21, 3505, 1901]
[1899, 1900, 21, 3505, 1901, 87]
[1899, 1900, 21, 3505, 1901, 

[3689, 618, 6, 3690]
[3689, 618, 6, 3690, 3691]
[46, 3692]
[46, 3692, 1373]
[46, 3692, 1373, 1883]
[46, 3692, 1373, 1883, 3693]
[46, 3692, 1373, 1883, 3693, 1669]
[1980, 1628]
[1980, 1628, 625]
[1980, 1628, 625, 723]
[1980, 1628, 625, 723, 4]
[1980, 1628, 625, 723, 4, 3694]
[1980, 1628, 625, 723, 4, 3694, 457]
[3695, 181]
[3695, 181, 3696]
[3695, 181, 3696, 3697]
[3695, 181, 3696, 3697, 3698]
[3695, 181, 3696, 3697, 3698, 3699]
[899, 49]
[899, 49, 1]
[899, 49, 1, 1981]
[899, 49, 1, 1981, 4]
[899, 49, 1, 1981, 4, 1]
[899, 49, 1, 1981, 4, 1, 367]
[899, 49, 1, 1981, 4, 1, 367, 1088]
[899, 49, 1, 1981, 4, 1, 367, 1088, 1]
[899, 49, 1, 1981, 4, 1, 367, 1088, 1, 558]
[899, 49, 1, 1981, 4, 1, 367, 1088, 1, 558, 10]
[899, 49, 1, 1981, 4, 1, 367, 1088, 1, 558, 10, 699]
[2, 3700]
[2, 3700, 1008]
[2, 3700, 1008, 602]
[2, 3700, 1008, 602, 3701]
[221, 3702]
[221, 3702, 49]
[221, 3702, 49, 22]
[221, 3702, 49, 22, 3703]
[221, 3702, 49, 22, 3703, 3704]
[221, 3702, 49, 22, 3703, 3704, 9]
[221, 3702, 49

[17, 232, 34, 481, 19, 2, 401, 35]
[17, 232, 34, 481, 19, 2, 401, 35, 3825]
[17, 232, 34, 481, 19, 2, 401, 35, 3825, 1804]
[17, 232, 34, 481, 19, 2, 401, 35, 3825, 1804, 2029]
[1168, 3826]
[1168, 3826, 3827]
[1168, 3826, 3827, 19]
[1168, 3826, 3827, 19, 1]
[1168, 3826, 3827, 19, 1, 3828]
[1168, 3826, 3827, 19, 1, 3828, 1841]
[1168, 3826, 3827, 19, 1, 3828, 1841, 4]
[1168, 3826, 3827, 19, 1, 3828, 1841, 4, 3829]
[725, 2]
[725, 2, 676]
[725, 2, 676, 1403]
[725, 2, 676, 1403, 3]
[725, 2, 676, 1403, 3, 3830]
[725, 2, 676, 1403, 3, 3830, 633]
[207, 109]
[207, 109, 3831]
[207, 109, 3831, 179]
[207, 109, 3831, 179, 23]
[207, 109, 3831, 179, 23, 161]
[207, 109, 3831, 179, 23, 161, 10]
[207, 109, 3831, 179, 23, 161, 10, 2030]
[256, 124]
[256, 124, 801]
[256, 124, 801, 2031]
[256, 124, 801, 2031, 2032]
[256, 124, 801, 2031, 2032, 2]
[256, 124, 801, 2031, 2032, 2, 321]
[256, 124, 801, 2031, 2032, 2, 321, 279]
[256, 124, 801, 2031, 2032, 2, 321, 279, 3832]
[256, 124, 801, 2031, 2032, 2, 321, 279, 

[2, 489, 3981]
[2, 489, 3981, 1031]
[2, 489, 3981, 1031, 433]
[3982, 269]
[3982, 269, 26]
[3982, 269, 26, 622]
[3982, 269, 26, 622, 3]
[3982, 269, 26, 622, 3, 1985]
[3982, 269, 26, 622, 3, 1985, 6]
[2, 3983]
[2, 3983, 3984]
[2, 3983, 3984, 3985]
[2, 3983, 3984, 3985, 3986]
[2, 3983, 3984, 3985, 3986, 3987]
[2, 3983, 3984, 3985, 3986, 3987, 3988]
[113, 233]
[113, 233, 486]
[113, 233, 486, 856]
[113, 233, 486, 856, 7]
[113, 233, 486, 856, 7, 489]
[70, 292]
[70, 292, 10]
[70, 292, 10, 3989]
[70, 292, 10, 3989, 747]
[70, 292, 10, 3989, 747, 3990]
[1434, 3991]
[1434, 3991, 2]
[1434, 3991, 2, 273]
[1434, 3991, 2, 273, 3]
[1434, 3991, 2, 273, 3, 1435]
[1434, 3991, 2, 273, 3, 1435, 18]
[1434, 3991, 2, 273, 3, 1435, 18, 155]
[1434, 3991, 2, 273, 3, 1435, 18, 155, 706]
[1434, 3991, 2, 273, 3, 1435, 18, 155, 706, 19]
[1434, 3991, 2, 273, 3, 1435, 18, 155, 706, 19, 3992]
[1436, 2083]
[1436, 2083, 9]
[1436, 2083, 9, 3993]
[1436, 2083, 9, 3993, 3994]
[1436, 2083, 9, 3993, 3994, 1032]
[1436, 2083, 9,

[1043, 518, 69, 51]
[1043, 518, 69, 51, 4183]
[331, 3]
[331, 3, 134]
[331, 3, 134, 323]
[331, 3, 134, 323, 2149]
[331, 3, 134, 323, 2149, 313]
[331, 3, 134, 323, 2149, 313, 35]
[331, 3, 134, 323, 2149, 313, 35, 44]
[331, 3, 134, 323, 2149, 313, 35, 44, 134]
[331, 3, 134, 323, 2149, 313, 35, 44, 134, 16]
[331, 3, 134, 323, 2149, 313, 35, 44, 134, 16, 795]
[331, 3, 134, 323, 2149, 313, 35, 44, 134, 16, 795, 2]
[331, 3, 134, 323, 2149, 313, 35, 44, 134, 16, 795, 2, 170]
[331, 3, 134, 323, 2149, 313, 35, 44, 134, 16, 795, 2, 170, 472]
[331, 3, 134, 323, 2149, 313, 35, 44, 134, 16, 795, 2, 170, 472, 4184]
[227, 4185]
[227, 4185, 4186]
[227, 4185, 4186, 5]
[227, 4185, 4186, 5, 900]
[2, 601]
[2, 601, 104]
[2, 601, 104, 928]
[2, 601, 104, 928, 40]
[2, 601, 104, 928, 40, 1]
[2, 601, 104, 928, 40, 1, 1024]
[2, 601, 104, 928, 40, 1, 1024, 4187]
[36, 357]
[36, 357, 4188]
[36, 357, 4188, 799]
[36, 357, 4188, 799, 2]
[36, 357, 4188, 799, 2, 357]
[36, 357, 4188, 799, 2, 357, 65]
[36, 357, 4188, 799, 

[118, 1470, 11, 3, 454, 7, 25, 1470, 3, 52]
[118, 1470, 11, 3, 454, 7, 25, 1470, 3, 52, 1471]
[932, 199]
[932, 199, 299]
[932, 199, 299, 810]
[1304, 1472]
[1304, 1472, 274]
[1304, 1472, 274, 6]
[1304, 1472, 274, 6, 574]
[1304, 1472, 274, 6, 574, 4275]
[1304, 1472, 274, 6, 574, 4275, 284]
[1304, 1472, 274, 6, 574, 4275, 284, 4276]
[1304, 1472, 274, 6, 574, 4275, 284, 4276, 282]
[1304, 1472, 274, 6, 574, 4275, 284, 4276, 282, 319]
[215, 16]
[215, 16, 408]
[215, 16, 408, 117]
[215, 16, 408, 117, 1439]
[215, 16, 408, 117, 1439, 536]
[215, 16, 408, 117, 1439, 536, 173]
[215, 16, 408, 117, 1439, 536, 173, 2120]
[188, 2188]
[188, 2188, 4]
[188, 2188, 4, 2]
[188, 2188, 4, 2, 1473]
[188, 2188, 4, 2, 1473, 2189]
[188, 2188, 4, 2, 1473, 2189, 4277]
[188, 2188, 4, 2, 1473, 2189, 4277, 13]
[188, 2188, 4, 2, 1473, 2189, 4277, 13, 1803]
[1, 1128]
[1, 1128, 77]
[1, 1128, 77, 760]
[56, 3]
[56, 3, 1474]
[56, 3, 1474, 496]
[1298, 11]
[1298, 11, 10]
[1298, 11, 10, 1475]
[1298, 11, 10, 1475, 10]
[1298, 11,

[2244, 4481]
[2244, 4481, 309]
[2244, 4481, 309, 12]
[2244, 4481, 309, 12, 2]
[2244, 4481, 309, 12, 2, 339]
[232, 38]
[232, 38, 77]
[232, 38, 77, 4482]
[232, 38, 77, 4482, 33]
[232, 38, 77, 4482, 33, 20]
[232, 38, 77, 4482, 33, 20, 2249]
[232, 38, 77, 4482, 33, 20, 2249, 4]
[232, 38, 77, 4482, 33, 20, 2249, 4, 20]
[232, 38, 77, 4482, 33, 20, 2249, 4, 20, 192]
[232, 38, 77, 4482, 33, 20, 2249, 4, 20, 192, 239]
[232, 38, 77, 4482, 33, 20, 2249, 4, 20, 192, 239, 643]
[2, 4483]
[2, 4483, 4484]
[2, 4483, 4484, 1061]
[2, 4483, 4484, 1061, 5]
[2, 4483, 4484, 1061, 5, 2]
[2, 4483, 4484, 1061, 5, 2, 464]
[2, 4483, 4484, 1061, 5, 2, 464, 4485]
[2, 4483, 4484, 1061, 5, 2, 464, 4485, 4]
[2, 4483, 4484, 1061, 5, 2, 464, 4485, 4, 2250]
[843, 556]
[843, 556, 10]
[843, 556, 10, 2]
[843, 556, 10, 2, 4486]
[127, 108]
[127, 108, 9]
[127, 108, 9, 5]
[127, 108, 9, 5, 45]
[127, 108, 9, 5, 45, 209]
[127, 108, 9, 5, 45, 209, 451]
[127, 108, 9, 5, 45, 209, 451, 2152]
[127, 108, 9, 5, 45, 209, 451, 2152, 206]
[

[1490, 1476]
[1490, 1476, 1068]
[1490, 1476, 1068, 1036]
[1490, 1476, 1068, 1036, 248]
[1490, 1476, 1068, 1036, 248, 14]
[1490, 1476, 1068, 1036, 248, 14, 1]
[1490, 1476, 1068, 1036, 248, 14, 1, 1138]
[1490, 1476, 1068, 1036, 248, 14, 1, 1138, 4]
[1490, 1476, 1068, 1036, 248, 14, 1, 1138, 4, 2298]
[48, 62]
[48, 62, 173]
[48, 62, 173, 1741]
[2, 813]
[2, 813, 632]
[2, 813, 632, 55]
[2, 813, 632, 55, 440]
[2, 813, 632, 55, 440, 6]
[186, 4668]
[186, 4668, 4]
[186, 4668, 4, 4669]
[18, 4670]
[18, 4670, 73]
[18, 4670, 73, 34]
[18, 4670, 73, 34, 1514]
[18, 4670, 73, 34, 1514, 29]
[18, 4670, 73, 34, 1514, 29, 55]
[18, 4670, 73, 34, 1514, 29, 55, 781]
[18, 4670, 73, 34, 1514, 29, 55, 781, 1163]
[4671, 170]
[4671, 170, 351]
[4671, 170, 351, 4672]
[36, 29]
[36, 29, 195]
[36, 29, 195, 10]
[36, 29, 195, 10, 500]
[36, 29, 195, 10, 500, 4673]
[36, 29, 195, 10, 500, 4673, 176]
[36, 29, 195, 10, 500, 4673, 176, 2299]
[36, 29, 195, 10, 500, 4673, 176, 2299, 5]
[36, 29, 195, 10, 500, 4673, 176, 2299, 5, 2

[1527, 429, 1068, 136, 27, 1709, 9]
[1527, 429, 1068, 136, 27, 1709, 9, 118]
[1527, 429, 1068, 136, 27, 1709, 9, 118, 4852]
[66, 309]
[66, 309, 88]
[66, 309, 88, 180]
[66, 309, 88, 180, 4853]
[66, 309, 88, 180, 4853, 1641]
[1859, 5]
[1859, 5, 18]
[1859, 5, 18, 1245]
[1859, 5, 18, 1245, 4854]
[1859, 5, 18, 1245, 4854, 56]
[1859, 5, 18, 1245, 4854, 56, 186]
[189, 13]
[189, 13, 541]
[189, 13, 541, 1042]
[189, 13, 541, 1042, 759]
[189, 13, 541, 1042, 759, 457]
[189, 13, 541, 1042, 759, 457, 553]
[189, 13, 541, 1042, 759, 457, 553, 28]
[189, 13, 541, 1042, 759, 457, 553, 28, 523]
[189, 13, 541, 1042, 759, 457, 553, 28, 523, 4855]
[189, 13, 541, 1042, 759, 457, 553, 28, 523, 4855, 2335]
[189, 13, 541, 1042, 759, 457, 553, 28, 523, 4855, 2335, 63]
[4856, 5]
[4856, 5, 1]
[4856, 5, 1, 142]
[497, 1076]
[497, 1076, 263]
[497, 1076, 263, 217]
[497, 1076, 263, 217, 506]
[497, 1076, 263, 217, 506, 1293]
[497, 1076, 263, 217, 506, 1293, 18]
[497, 1076, 263, 217, 506, 1293, 18, 1019]
[497, 1076, 263, 

[27, 432, 495, 1634, 6]
[27, 432, 495, 1634, 6, 5014]
[27, 432, 495, 1634, 6, 5014, 817]
[5015, 7]
[5015, 7, 5016]
[5015, 7, 5016, 14]
[5015, 7, 5016, 14, 838]
[5017, 2]
[768, 5018]
[768, 5018, 3]
[768, 5018, 3, 765]
[768, 5018, 3, 765, 2347]
[768, 5018, 3, 765, 2347, 46]
[768, 5018, 3, 765, 2347, 46, 237]
[768, 5018, 3, 765, 2347, 46, 237, 4]
[768, 5018, 3, 765, 2347, 46, 237, 4, 26]
[768, 5018, 3, 765, 2347, 46, 237, 4, 26, 5019]
[768, 5018, 3, 765, 2347, 46, 237, 4, 26, 5019, 2348]
[5020, 922]
[5020, 922, 5021]
[1, 11]
[1, 11, 7]
[1, 11, 7, 5022]
[1, 11, 7, 5022, 388]
[32, 772]
[32, 772, 133]
[32, 772, 133, 5]
[32, 772, 133, 5, 54]
[32, 772, 133, 5, 54, 166]
[2, 170]
[2, 170, 100]
[2, 170, 100, 3]
[2, 170, 100, 3, 348]
[2, 170, 100, 3, 348, 1]
[2, 170, 100, 3, 348, 1, 420]
[110, 5023]
[110, 5023, 205]
[110, 5023, 205, 735]
[110, 5023, 205, 735, 5024]
[29, 1436]
[29, 1436, 1338]
[29, 1436, 1338, 93]
[29, 1436, 1338, 93, 54]
[29, 1436, 1338, 93, 54, 198]
[29, 1436, 1338, 93, 54, 198, 

[1024, 75, 30]
[1024, 75, 30, 103]
[1024, 75, 30, 103, 6]
[1024, 75, 30, 103, 6, 5167]
[15, 3]
[15, 3, 30]
[15, 3, 30, 392]
[15, 3, 30, 392, 12]
[15, 3, 30, 392, 12, 20]
[15, 3, 30, 392, 12, 20, 636]
[13, 37]
[13, 37, 58]
[13, 37, 58, 2416]
[13, 37, 58, 2416, 2]
[13, 37, 58, 2416, 2, 5168]
[13, 37, 58, 2416, 2, 5168, 288]
[13, 37, 58, 2416, 2, 5168, 288, 1352]
[13, 37, 58, 2416, 2, 5168, 288, 1352, 72]
[41, 149]
[41, 149, 1]
[41, 149, 1, 637]
[41, 149, 1, 637, 367]
[41, 149, 1, 637, 367, 69]
[41, 149, 1, 637, 367, 69, 5169]
[41, 149, 1, 637, 367, 69, 5169, 7]
[41, 149, 1, 637, 367, 69, 5169, 7, 5170]
[41, 149, 1, 637, 367, 69, 5169, 7, 5170, 5171]
[878, 1213]
[878, 1213, 752]
[878, 1213, 752, 835]
[878, 1213, 752, 835, 5172]
[878, 1213, 752, 835, 5172, 5173]
[878, 1213, 752, 835, 5172, 5173, 31]
[878, 1213, 752, 835, 5172, 5173, 31, 1183]
[878, 1213, 752, 835, 5172, 5173, 31, 1183, 173]
[878, 1213, 752, 835, 5172, 5173, 31, 1183, 173, 4]
[878, 1213, 752, 835, 5172, 5173, 31, 1183, 173,

[260, 5310]
[260, 5310, 695]
[260, 5310, 695, 13]
[260, 5310, 695, 13, 37]
[260, 5310, 695, 13, 37, 3]
[260, 5310, 695, 13, 37, 3, 274]
[260, 5310, 695, 13, 37, 3, 274, 723]
[2437, 272]
[2437, 272, 1097]
[2437, 272, 1097, 11]
[2437, 272, 1097, 11, 5311]
[2437, 272, 1097, 11, 5311, 2407]
[2437, 272, 1097, 11, 5311, 2407, 1392]
[2, 5312]
[2, 5312, 1301]
[2, 5312, 1301, 114]
[2, 5312, 1301, 114, 71]
[2, 5312, 1301, 114, 71, 39]
[5313, 39]
[5313, 39, 120]
[5313, 39, 120, 780]
[5313, 39, 120, 780, 5314]
[5313, 39, 120, 780, 5314, 5]
[5313, 39, 120, 780, 5314, 5, 478]
[13, 5315]
[13, 5315, 6]
[13, 5315, 6, 2]
[13, 5315, 6, 2, 5316]
[13, 5315, 6, 2, 5316, 656]
[5317, 11]
[5317, 11, 5318]
[5317, 11, 5318, 539]
[5317, 11, 5318, 539, 3]
[5317, 11, 5318, 539, 3, 2438]
[2439, 5]
[2439, 5, 256]
[2439, 5, 256, 5319]
[2439, 5, 256, 5319, 62]
[2439, 5, 256, 5319, 62, 25]
[2439, 5, 256, 5319, 62, 25, 586]
[2439, 5, 256, 5319, 62, 25, 586, 24]
[2439, 5, 256, 5319, 62, 25, 586, 24, 5320]
[2439, 5, 256, 5

[41, 1647, 10, 2, 141]
[41, 1647, 10, 2, 141, 5422]
[1788, 151]
[1788, 151, 421]
[1788, 151, 421, 5]
[1788, 151, 421, 5, 2]
[1788, 151, 421, 5, 2, 138]
[1788, 151, 421, 5, 2, 138, 691]
[1788, 151, 421, 5, 2, 138, 691, 692]
[1788, 151, 421, 5, 2, 138, 691, 692, 169]
[15, 3]
[15, 3, 5423]
[15, 3, 5423, 1]
[15, 3, 5423, 1, 1255]
[15, 3, 5423, 1, 1255, 1070]
[15, 3, 5423, 1, 1255, 1070, 1504]
[15, 3, 5423, 1, 1255, 1070, 1504, 9]
[15, 3, 5423, 1, 1255, 1070, 1504, 9, 711]
[5424, 620]
[5424, 620, 5]
[5424, 620, 5, 1977]
[81, 22]
[81, 22, 711]
[81, 22, 711, 2371]
[5425, 4]
[5425, 4, 649]
[5425, 4, 649, 642]
[5425, 4, 649, 642, 252]
[5425, 4, 649, 642, 252, 2400]
[5425, 4, 649, 642, 252, 2400, 5426]
[2456, 845]
[2456, 845, 2450]
[2456, 845, 2450, 2]
[2456, 845, 2450, 2, 1058]
[2456, 845, 2450, 2, 1058, 358]
[5427, 4]
[5427, 4, 2248]
[1110, 961]
[1110, 961, 1099]
[1110, 961, 1099, 6]
[1110, 961, 1099, 6, 5428]
[2, 5429]
[2, 5429, 5430]
[2, 5429, 5430, 5]
[2, 5429, 5430, 5, 5431]
[2, 5429, 5430

[1, 199, 73, 5604, 1329]
[32, 560]
[32, 560, 7]
[32, 560, 7, 5605]
[32, 560, 7, 5605, 1229]
[32, 560, 7, 5605, 1229, 1180]
[32, 560, 7, 5605, 1229, 1180, 2]
[32, 560, 7, 5605, 1229, 1180, 2, 300]
[32, 560, 7, 5605, 1229, 1180, 2, 300, 5]
[32, 560, 7, 5605, 1229, 1180, 2, 300, 5, 5606]
[843, 5607]
[843, 5607, 5608]
[843, 5607, 5608, 3]
[843, 5607, 5608, 3, 5609]
[843, 5607, 5608, 3, 5609, 5610]
[843, 5607, 5608, 3, 5609, 5610, 894]
[843, 5607, 5608, 3, 5609, 5610, 894, 1278]
[1, 5611]
[1, 5611, 366]
[1, 5611, 366, 64]
[46, 1136]
[46, 1136, 4]
[46, 1136, 4, 424]
[46, 1136, 4, 424, 5612]
[46, 1136, 4, 424, 5612, 5613]
[46, 1136, 4, 424, 5612, 5613, 346]
[46, 1136, 4, 424, 5612, 5613, 346, 24]
[46, 1136, 4, 424, 5612, 5613, 346, 24, 203]
[46, 1136, 4, 424, 5612, 5613, 346, 24, 203, 409]
[46, 1136, 4, 424, 5612, 5613, 346, 24, 203, 409, 2144]
[5614, 2270]
[5614, 2270, 6]
[5614, 2270, 6, 2259]
[5614, 2270, 6, 2259, 3]
[5614, 2270, 6, 2259, 3, 7]
[5614, 2270, 6, 2259, 3, 7, 2142]
[5614, 2270,

[5, 1409, 1410]
[5, 1409, 1410, 1411]
[5, 1409, 1410, 1411, 2]
[5, 1409, 1410, 1411, 2, 783]
[5, 1409, 1410, 1411, 2, 783, 5783]
[5, 1409, 1410, 1411, 2, 783, 5783, 7]
[5, 1409, 1410, 1411, 2, 783, 5783, 7, 1377]
[5, 1409, 1410, 1411, 2, 783, 5783, 7, 1377, 49]
[1305, 2019]
[1536, 355]
[1536, 355, 6]
[1536, 355, 6, 330]
[1536, 355, 6, 330, 365]
[1536, 355, 6, 330, 365, 152]
[1536, 355, 6, 330, 365, 152, 172]
[1536, 355, 6, 330, 365, 152, 172, 29]
[2, 5784]
[2, 5784, 1149]
[2, 5784, 1149, 1054]
[2, 5784, 1149, 1054, 5785]
[2, 5784, 1149, 1054, 5785, 11]
[21, 48]
[21, 48, 1525]
[21, 48, 1525, 1739]
[21, 48, 1525, 1739, 3]
[21, 48, 1525, 1739, 3, 204]
[21, 48, 1525, 1739, 3, 204, 5786]
[459, 543]
[459, 543, 569]
[459, 543, 569, 447]
[459, 543, 569, 447, 6]
[459, 543, 569, 447, 6, 22]
[459, 543, 569, 447, 6, 22, 648]
[459, 543, 569, 447, 6, 22, 648, 419]
[42, 106]
[42, 106, 6]
[42, 106, 6, 1516]
[42, 106, 6, 1516, 75]
[42, 106, 6, 1516, 75, 30]
[42, 106, 6, 1516, 75, 30, 1]
[42, 106, 6, 15

[5895, 644, 13, 274, 131]
[5895, 644, 13, 274, 131, 6]
[5895, 644, 13, 274, 131, 6, 5896]
[5895, 644, 13, 274, 131, 6, 5896, 65]
[5895, 644, 13, 274, 131, 6, 5896, 65, 61]
[5895, 644, 13, 274, 131, 6, 5896, 65, 61, 1277]
[818, 5897]
[15, 3]
[15, 3, 2502]
[15, 3, 2502, 12]
[15, 3, 2502, 12, 5898]
[15, 3, 2502, 12, 5898, 5899]
[15, 3, 2502, 12, 5898, 5899, 2511]
[5900, 2]
[5900, 2, 5901]
[5900, 2, 5901, 1497]
[1, 521]
[1, 521, 47]
[1, 521, 47, 92]
[1, 521, 47, 92, 60]
[1, 521, 47, 92, 60, 92]
[1, 521, 47, 92, 60, 92, 821]
[1, 521, 47, 92, 60, 92, 821, 181]
[1, 521, 47, 92, 60, 92, 821, 181, 450]
[5902, 2018]
[5902, 2018, 3]
[5902, 2018, 3, 2429]
[107, 122]
[107, 122, 6]
[107, 122, 6, 1]
[107, 122, 6, 1, 5903]
[107, 122, 6, 1, 5903, 132]
[107, 122, 6, 1, 5903, 132, 162]
[421, 6]
[421, 6, 1875]
[421, 6, 1875, 5]
[421, 6, 1875, 5, 2]
[421, 6, 1875, 5, 2, 5904]
[421, 6, 1875, 5, 2, 5904, 5905]
[27, 760]
[27, 760, 304]
[27, 760, 304, 135]
[27, 760, 304, 135, 613]
[27, 760, 304, 135, 613, 418]

[18, 2145, 6059]
[18, 2145, 6059, 1297]
[18, 2145, 6059, 1297, 40]
[18, 2145, 6059, 1297, 40, 14]
[18, 2145, 6059, 1297, 40, 14, 340]
[207, 357]
[207, 357, 974]
[207, 357, 974, 23]
[207, 357, 974, 23, 6060]
[207, 357, 974, 23, 6060, 154]
[207, 357, 974, 23, 6060, 154, 667]
[6061, 2481]
[6061, 2481, 6]
[6061, 2481, 6, 2]
[6061, 2481, 6, 2, 142]
[6061, 2481, 6, 2, 142, 374]
[6061, 2481, 6, 2, 142, 374, 5]
[6061, 2481, 6, 2, 142, 374, 5, 841]
[434, 589]
[434, 589, 3]
[434, 589, 3, 2331]
[434, 589, 3, 2331, 911]
[434, 589, 3, 2331, 911, 23]
[434, 589, 3, 2331, 911, 23, 6062]
[434, 589, 3, 2331, 911, 23, 6062, 2279]
[434, 589, 3, 2331, 911, 23, 6062, 2279, 430]
[434, 589, 3, 2331, 911, 23, 6062, 2279, 430, 582]
[719, 2442]
[719, 2442, 1201]
[719, 2442, 1201, 4]
[719, 2442, 1201, 4, 1]
[719, 2442, 1201, 4, 1, 210]
[2, 2460]
[2, 2460, 611]
[2, 2460, 611, 7]
[2, 2460, 611, 7, 644]
[2072, 75]
[2072, 75, 134]
[2072, 75, 134, 672]
[2072, 75, 134, 672, 76]
[2072, 75, 134, 672, 76, 587]
[150, 1]
[1

[139, 26, 6146, 16, 382, 2, 425, 813, 66, 24, 26, 65]
[139, 26, 6146, 16, 382, 2, 425, 813, 66, 24, 26, 65, 33]
[139, 26, 6146, 16, 382, 2, 425, 813, 66, 24, 26, 65, 33, 16]
[1854, 6147]
[11, 10]
[11, 10, 2]
[11, 10, 2, 2408]
[11, 10, 2, 2408, 2217]
[6148, 2530]
[6148, 2530, 6149]
[6148, 2530, 6149, 1317]
[6148, 2530, 6149, 1317, 2530]
[6148, 2530, 6149, 1317, 2530, 6150]
[38, 69]
[38, 69, 2]
[38, 69, 2, 1059]
[9, 1]
[9, 1, 2527]
[9, 1, 2527, 519]
[9, 1, 2527, 519, 5]
[9, 1, 2527, 519, 5, 13]
[9, 1, 2527, 519, 5, 13, 541]
[22, 313]
[22, 313, 6151]
[22, 313, 6151, 131]
[22, 313, 6151, 131, 6]
[22, 313, 6151, 131, 6, 330]
[320, 731]
[1, 91]
[1, 91, 519]
[1, 91, 519, 4]
[1, 91, 519, 4, 2531]
[1, 91, 519, 4, 2531, 10]
[1, 91, 519, 4, 2531, 10, 2022]
[288, 98]
[15, 3]
[15, 3, 773]
[15, 3, 773, 1556]
[627, 1]
[627, 1, 160]
[627, 1, 160, 4]
[627, 1, 160, 4, 335]
[627, 1, 160, 4, 335, 102]
[1786, 761]
[1786, 761, 278]
[1786, 761, 278, 3]
[1786, 761, 278, 3, 2342]
[10, 229]
[10, 229, 79]
[10, 2

[2538, 5, 551]
[2538, 5, 551, 70]
[2538, 5, 551, 70, 38]
[2538, 5, 551, 70, 38, 2540]
[2538, 5, 551, 70, 38, 2540, 238]
[2538, 5, 551, 70, 38, 2540, 238, 1524]
[2538, 5, 551, 70, 38, 2540, 238, 1524, 6310]
[2538, 5, 551, 70, 38, 2540, 238, 1524, 6310, 43]
[2538, 5, 551, 70, 38, 2540, 238, 1524, 6310, 43, 5]
[2538, 5, 551, 70, 38, 2540, 238, 1524, 6310, 43, 5, 54]
[2538, 5, 551, 70, 38, 2540, 238, 1524, 6310, 43, 5, 54, 166]
[36, 419]
[36, 419, 6311]
[36, 419, 6311, 24]
[36, 419, 6311, 24, 339]
[11, 6312]
[11, 6312, 9]
[11, 6312, 9, 661]
[11, 6312, 9, 661, 3]
[11, 6312, 9, 661, 3, 371]
[11, 6312, 9, 661, 3, 371, 2426]
[11, 6312, 9, 661, 3, 371, 2426, 714]
[825, 6313]
[825, 6313, 6314]
[825, 6313, 6314, 1146]
[385, 383]
[385, 383, 6315]
[385, 383, 6315, 2333]
[385, 383, 6315, 2333, 10]
[385, 383, 6315, 2333, 10, 35]
[6316, 1224]
[6316, 1224, 14]
[6316, 1224, 14, 2]
[6316, 1224, 14, 2, 6317]
[6316, 1224, 14, 2, 6317, 685]
[6316, 1224, 14, 2, 6317, 685, 339]
[62, 38]
[62, 38, 6318]
[62, 38

[6444, 28, 2, 1446, 180]
[6444, 28, 2, 1446, 180, 6445]
[6444, 28, 2, 1446, 180, 6445, 28]
[6444, 28, 2, 1446, 180, 6445, 28, 2]
[6444, 28, 2, 1446, 180, 6445, 28, 2, 818]
[6444, 28, 2, 1446, 180, 6445, 28, 2, 818, 4]
[6444, 28, 2, 1446, 180, 6445, 28, 2, 818, 4, 6446]
[6444, 28, 2, 1446, 180, 6445, 28, 2, 818, 4, 6446, 1418]
[825, 663]
[825, 663, 1]
[825, 663, 1, 477]
[825, 663, 1, 477, 234]
[825, 663, 1, 477, 234, 107]
[825, 663, 1, 477, 234, 107, 6447]
[825, 663, 1, 477, 234, 107, 6447, 7]
[825, 663, 1, 477, 234, 107, 6447, 7, 1485]
[2, 6448]
[2, 6448, 697]
[2, 6448, 697, 3]
[2, 6448, 697, 3, 2]
[2, 6448, 697, 3, 2, 533]
[2, 6448, 697, 3, 2, 533, 155]
[2, 6448, 697, 3, 2, 533, 155, 608]
[6449, 986]
[6449, 986, 9]
[6449, 986, 9, 1534]
[1, 879]
[1, 879, 4]
[1, 879, 4, 6450]
[1, 588]
[1, 588, 7]
[1, 588, 7, 1]
[1, 588, 7, 1, 6451]
[144, 6452]
[144, 6452, 1482]
[144, 6452, 1482, 180]
[144, 6452, 1482, 180, 1660]
[144, 6452, 1482, 180, 1660, 6453]
[144, 6452, 1482, 180, 1660, 6453, 9]
[1

In [14]:
max_length= max([len(seq) for seq in sequences], default=0)
sequences = pad_sequences(sequences, maxlen=max_length, padding='pre')
print('Max Sequence Length: %d' % max_length)
# split into input and output elements
sequences = np.array(sequences)
X, y = sequences[:,:-1],sequences[:,-1]
y = to_categorical(y, num_classes=vocab_size)

Max Sequence Length: 24


In [15]:
def createmodel():
    model = Sequential()
    model.add(Embedding(vocab_size, 10, input_length=max_length-1))
    model.add(LSTM(50))
    model.add(Dense(vocab_size, activation='softmax'))
    print(model.summary())
    # compile network
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
   

In [16]:
# fit network
best_model = createmodel()
best_model.fit(X,  y, epochs=500, verbose=1)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 23, 10)            65270     
_________________________________________________________________
lstm (LSTM)                  (None, 50)                12200     
_________________________________________________________________
dense (Dense)                (None, 6527)              332877    
Total params: 410,347
Trainable params: 410,347
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 3

Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch

Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 

Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 

Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 

Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 

Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


<tensorflow.python.keras.callbacks.History at 0x1429ba070>

In [17]:
def test(test, title_length):
    i=0
    for i in range(title_length):
        test_encoded = tokenizer.texts_to_sequences([test])[0]
        test_encoded = pad_sequences([test_encoded], maxlen=max_length, padding='pre')
        predicted = best_model.predict_classes(test_encoded, verbose=0)
        #print(predicted)
        for word,index in tokenizer.word_index.items():
            if index == predicted:
                output = word
                break
        test = test + " " + output
    print(test)
    return

In [18]:
test('kansas',5)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
kansas i voted for donald trump


In [19]:
test('Kansas City Chiefs', 7)

Kansas City Chiefs lanka installs republican love remain paid political


In [20]:
test('New York', 5)

New York today a trumpless tower era


In [22]:
test('Donald Trump',7)

Donald Trump joke leads to suspension of the secret


In [23]:
test('Joe Biden', 7)

Joe Biden arpaios latest offense running for senate us


In [24]:
test('Patrick Mahomes', 7)

Patrick Mahomes level of the stars after health strict
