In [2]:
import pandas as pd
import numpy as np
import matplotlib as plt

import re

In [3]:
poetry_df = pd.read_csv("kaggle_poem_dataset.csv").drop(['Unnamed: 0'],axis=1)
poetry_df

Unnamed: 0,Author,Title,Poetry Foundation ID,Content
0,Wendy Videlock,!,55489,"Dear Writers, I’m compiling the first in what ..."
1,Hailey Leithauser,0,41729,"Philosophic\nin its complex, ovoid emptiness,\..."
2,Jody Gladding,1-800-FEAR,57135,We'd like to talk with you about fear t...
3,Joseph Brodsky,1 January 1965,56736,The Wise Men will unlearn your name.\nAbove yo...
4,Ted Berrigan,3 Pages,51624,For Jack Collom\n10 Things I do Every Day\n\np...
...,...,...,...,...
15647,Hannah Gamble,Your Invitation to a Modest Breakfast,56059,"It’s too cold to smoke outside, but if you com..."
15648,Eleni Sikelianos,Your Kingdom\n \n \n \n Launch Audio in a N...,145220,if you like let the body feel\nall its own evo...
15649,Susan Elizabeth Howe,“Your Luck Is About To Change”,41696,(A fortune cookie)\nOminous inscrutable Chines...
15650,Andrew Shields,Your Mileage May Vary,90177,1\nOur last night in the house was not our las...


In [14]:
# this is not encoding punctuation correctly
news_df = pd.read_csv("news_summary.csv", encoding="ISO-8859-1").dropna()
news_df["ctext"][1]

'From her special numbers to TV?appearances, Bollywood actor Malaika Arora Khan has managed to carve her own identity. The actor, who made her debut in the Hindi film industry with the blockbuster debut opposite Shah Rukh Khan in Chaiyya Chaiyya from Dil Se (1998), is still remembered for the song. However, for trolls, she is a woman first and what matters right now is that she divorced a ?rich man?.  On Wednesday, Malaika Arora shared a gorgeous picture of herself on Instagram and a follower decided to troll her for using her ?alumni? (read alimony) money to wear ?short clothes and going to gym or salon?. Little did he/she know that the Munni Badnam star would reply with the perfect comeback. Take a look at the interaction:     Super excited to be affiliated with Khanna Jewellers @khannajewellerskj as their brand ambassador. Crafted to perfection, their stunning statement jewellery is a must have for every jewellery lover. #khannajewellers...#maksquad?? #hair @hairbypriyanka #stylist 

In [9]:
# 10 authors with largest # of poems
poetry_df.groupby("Author")["Content"].count().sort_values()[-10:]

# Percy Bysshe Shelley always missing 2 letters
# poetry_df[poetry_df["Author"].apply(lambda s: "Shelley" in s)]

Author
Percy sshe Shelley       43
Yusef Komunyakaa         43
John Ashbery             46
William Butler Yeats     47
Emily Dickinson          57
William Wordsworth       59
Rae Armantrout           62
Alfred, Lord Tennyson    78
Anonymous                82
William Shakespeare      85
Name: Content, dtype: int64

In [15]:
from nltk.tokenize import word_tokenize
import nltk
#nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/ebal/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [18]:
def tokenize(poem):
    return word_tokenize(re.sub(r"\n", r" NN ", poem))

poetry_df["Tokenized"] = poetry_df["Content"].apply(lambda row: tokenize(row))

In [19]:
def count_token(tokenized, token_to_count):
    return sum([token==token_to_count for token in tokenized])

def num_lines(tokenized):
    print(tokenized)
    return (count_token(tokenized, "NN") + 1)

def num_words(tokenized):
    filtered = [token for token in tokenized if (token.isalnum() and token!="NN")]
    return len(filtered)

def num_punctuation(tokenized):
    filtered = [token for token in tokenized if (not token.isalnum() and token!="NN")]
    return len(filtered)

#(num_words(nltk_tokens), len(nltk_tokens), num_punctuation(nltk_tokens))

In [109]:
poetry_df["Tokenized"][1]

['Philosophic',
 'NN',
 'in',
 'its',
 'complex',
 ',',
 'ovoid',
 'emptiness',
 ',',
 'NN',
 'a',
 'skillful',
 'pundit',
 'coined',
 'it',
 'as',
 'a',
 'sort',
 'NN',
 'of',
 'stopgap',
 'doorstop',
 'for',
 'those',
 'NN',
 'quaint',
 'equations',
 'NN',
 'NN',
 'Romans',
 'never',
 'NN',
 'dreamt',
 'of',
 '.',
 'In',
 'form',
 'completely',
 'clever',
 'NN',
 'and',
 'discrete—a',
 'mirror',
 'come',
 'unsilvered',
 ',',
 'NN',
 'loose',
 'watch',
 'face',
 'without',
 'the',
 'works',
 ',',
 'NN',
 'a',
 'hollowed',
 'globe',
 'NN',
 'NN',
 'from',
 'tip',
 'to',
 'toe',
 'NN',
 'unbroken',
 ',',
 'it',
 'evades',
 'the',
 'grappling',
 'NN',
 'hooks',
 'of',
 'mass',
 ',',
 'tilts',
 'the',
 'thin',
 'rim',
 'of',
 'no',
 'thing',
 ',',
 'NN',
 'remains',
 'embryonic',
 'sum',
 ',',
 'NN',
 'non-cogito',
 '.']

In [21]:
poetry_df["Num_lines"] = poetry_df["Tokenized"].apply(lambda row: num_lines(row), convert_dtype=False)
poetry_df["Num_words"] = poetry_df["Tokenized"].apply(lambda row: num_words(row), convert_dtype=False)
poetry_df["Num_punctuation"] = poetry_df["Tokenized"].apply(lambda row: num_punctuation(row), convert_dtype=False)

['Dear', 'Writers', ',', 'I', '’', 'm', 'compiling', 'the', 'first', 'in', 'what', 'I', 'hope', 'is', 'a', 'series', 'of', 'publications', 'I', '’', 'm', 'calling', 'artists', 'among', 'artists', '.', 'The', 'theme', 'for', 'issue', '1', 'is', '“', 'Faggot', 'Dinosaur.', '”', 'I', 'hope', 'to', 'hear', 'from', 'you', '!', 'Thank', 'you', 'and', 'best', 'wishes', '.']
['Philosophic', 'NN', 'in', 'its', 'complex', ',', 'ovoid', 'emptiness', ',', 'NN', 'a', 'skillful', 'pundit', 'coined', 'it', 'as', 'a', 'sort', 'NN', 'of', 'stopgap', 'doorstop', 'for', 'those', 'NN', 'quaint', 'equations', 'NN', 'NN', 'Romans', 'never', 'NN', 'dreamt', 'of', '.', 'In', 'form', 'completely', 'clever', 'NN', 'and', 'discrete—a', 'mirror', 'come', 'unsilvered', ',', 'NN', 'loose', 'watch', 'face', 'without', 'the', 'works', ',', 'NN', 'a', 'hollowed', 'globe', 'NN', 'NN', 'from', 'tip', 'to', 'toe', 'NN', 'unbroken', ',', 'it', 'evades', 'the', 'grappling', 'NN', 'hooks', 'of', 'mass', ',', 'tilts', 'the',

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




['for', 'C.', 'NN', 'Like', 'shoes', 'NN', 'she', 'chose', 'for', 'comfort', ',', 'NN', 'not', 'for', 'style', ',', 'NN', 'that', 'fit', 'her', 'contours', 'NN', 'without', 'chafe', 'NN', 'or', 'blistering', 'NN', 'NN', 'Here', 'is', 'sanity', '.', 'NN', 'It', 'took', 'her', 'years', 'to', 'arrive', ',', 'NN', 'like', 'an', 'explorer', 'NN', 'settling', 'at', 'last', 'NN', 'into', 'uneasy', 'retirement', ',', 'NN', 'a', 'small', 'cottage', 'NN', 'at', 'the', 'edge', 'of', 'the', 'sea', '.', 'NN', 'How', 'the', 'breakers', 'crash', 'NN', 'against', 'the', 'underpinnings', ';', 'NN', 'still', ',', 'the', 'walls', 'hold', 'firm', '.', 'NN', 'Hearth', 'blazing', 'steadily', ',', 'NN', 'she', 'tries', 'to', 'warm', 'to', 'it', 'NN', 'NN', 'tells', 'herself', 'NN', 'she', 'is', 'mature', 'now', ',', 'NN', 'this', 'is', 'good', '.', 'NN', 'The', 'days', 'of', 'stalking', ',', 'NN', 'done', ',', 'NN', 'the', 'rabid', 'pulse', ',', 'NN', 'the', 'blood-drenched', 'kisses', ',', 'NN', 'all', 'be

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [35]:
poetry_df.groupby("Num_lines")["Title"].count().sort_values()

lines_14_df = poetry_df[poetry_df["Num_lines"] == 14]
lines_14_df

Unnamed: 0,Author,Title,Poetry Foundation ID,Content,Tokenized,Num_lines,Num_words,Num_punctuation
40,Zachary Schomburg,The Abandoned Hotel,56021,Inside the woods is an abandoned hotel.\nTrees...,"[Inside, the, woods, is, an, abandoned, hotel,...",14,49,8
73,Siegfried Sassoon,Absolution,57212,The anguish of the earth absolves our eyes\nTi...,"[The, anguish, of, the, earth, absolves, our, ...",14,105,17
104,Donald Davie,Across the Bay,54813,A queer thing about those waters: there are no...,"[A, queer, thing, about, those, waters, :, the...",14,95,21
128,Rosemary Tonks,Addiction to an Old Mattress,57642,"No, this is not my life, thank God ...\n... wo...","[No, ,, this, is, not, my, life, ,, thank, God...",14,90,26
154,Nate Klug,Advent,53142,In the middle of December\nto start over\n\nto...,"[In, the, middle, of, December, NN, to, start,...",14,30,0
...,...,...,...,...,...,...,...,...
15525,Hilaire Belloc,The Yak,46686,As a friend to the children commend me the Yak...,"[As, a, friend, to, the, children, commend, me...",14,94,15
15530,Don Thompson,Yard Work,53605,My leaf blower lifted the blackbird—\nwings st...,"[My, leaf, blower, lifted, the, blackbird—, NN...",14,70,14
15546,Roddy Lumsden,Yeast,54540,"A word you can’t quite say\nwithout itching, f...","[A, word, you, can, ’, t, quite, say, NN, with...",14,104,24
15552,Fanny Howe,Yellow Goblins,56293,Yellow goblins\nand a god I can swallow:\n\nEy...,"[Yellow, goblins, NN, and, a, god, I, can, swa...",14,30,5


In [None]:
#Create corpus, turn into dictionary


In [None]:
nltk

In [26]:
%precision 2

'%.2f'

|  | News | Poetry |
| --- | --- | --- |
| Total samples | {{news_df["ctext"].nunique()}} | {{poetry_df["Content"].nunique()}}  |
| Unique authors | {{news_df["author"].nunique()}} | {{poetry_df["Author"].nunique()}} |
| Mean words per sample (SD) | {{news_df["author"].nunique()}} | {{poetry_df["Num_words"].mean()}} ({{poetry_df["Num_words"].std()}})|
| Mean new lines per sample (SD) | {{news_df["author"].nunique()}} | {{poetry_df["Num_lines"].mean()}} ({{poetry_df["Num_lines"].std()}}) |
| Mean punctuation marks per sample (SD) | {{news_df["author"].nunique()}} | {{poetry_df["Num_punctuation"].mean()}} ({{poetry_df["Num_punctuation"].std()}}) |

In [81]:
poetry_df

Unnamed: 0,Author,Title,Poetry Foundation ID,Content,Tokenized,Num_lines,Num_words,Num_punctuation
0,Wendy Videlock,!,55489,"Dear Writers, I’m compiling the first in what ...","[Dear, Writers, ,, I, ’, m, compiling, the, fi...",1,40,9
1,Hailey Leithauser,0,41729,"Philosophic\nin its complex, ovoid emptiness,\...","[Philosophic, NN, in, its, complex, ,, ovoid, ...",17,64,12
2,Jody Gladding,1-800-FEAR,57135,We'd like to talk with you about fear t...,"[We, 'd, like, to, talk, with, you, about, fea...",11,113,5
3,Joseph Brodsky,1 January 1965,56736,The Wise Men will unlearn your name.\nAbove yo...,"[The, Wise, Men, will, unlearn, your, name, .,...",26,147,26
4,Ted Berrigan,3 Pages,51624,For Jack Collom\n10 Things I do Every Day\n\np...,"[For, Jack, Collom, NN, 10, Things, I, do, Eve...",39,79,6
...,...,...,...,...,...,...,...,...
15647,Hannah Gamble,Your Invitation to a Modest Breakfast,56059,"It’s too cold to smoke outside, but if you com...","[It, ’, s, too, cold, to, smoke, outside, ,, b...",35,229,43
15648,Eleni Sikelianos,Your Kingdom\n \n \n \n Launch Audio in a N...,145220,if you like let the body feel\nall its own evo...,"[if, you, like, let, the, body, feel, NN, all,...",41,198,18
15649,Susan Elizabeth Howe,“Your Luck Is About To Change”,41696,(A fortune cookie)\nOminous inscrutable Chines...,"[(, A, fortune, cookie, ), NN, Ominous, inscru...",25,146,37
15650,Andrew Shields,Your Mileage May Vary,90177,1\nOur last night in the house was not our las...,"[1, NN, Our, last, night, in, the, house, was,...",5,30,4


In [57]:
# Train supervised learning model
# Pre-processing: Tokenized poems, select poems with a static N number of lines, or use all poems and truncate
# Input: [array of strings], padded for word count
# Output: [array of strings with newlines inserted]
# Loss: sum of distance between words per line
set(poetry_df['Tokenized'].explode())

#[item for sublist in poetry_df['Tokenized'] for item in poetry_df['Tokenized']]

{'loams',
 'Deola',
 'tippy',
 'VESPERS',
 'zek',
 'Noel',
 'tonally',
 'rubber-edged',
 'fishes',
 'sluing',
 'deserved',
 'sheet-',
 'hlynnan',
 'Morpheus',
 'Porter',
 'Bhréagach',
 'deep-asleep',
 'newspaperman',
 'carpenters',
 'dawdled',
 'puissance',
 'tra',
 'disapproved',
 'Breeds',
 'morbific',
 'canebrake',
 'unloads',
 'Star-like',
 'saulez',
 'amazeth',
 'godfathers',
 'þrungon',
 'particular—',
 'Exploding',
 'soundtrack',
 'pre-trial',
 'Slap',
 'lokede',
 'dedly',
 'stochastic',
 'Axis',
 'mayntyne',
 'pew—the',
 "bird's",
 'Males',
 'Angling',
 'aristocracy',
 'bundance',
 'quietens',
 'woodlandish',
 'sweatshirts',
 'palimpsest',
 'rain-cracked',
 'Hereford',
 'Aberfylde',
 'lovelocked',
 'bequeath',
 'potes',
 'affection',
 'wade',
 'Spheres',
 'woodland',
 'J.J.',
 'Deals',
 'fair—and',
 'Matiz',
 'sigil',
 'quitter',
 'window-lace',
 'thorn.',
 'hapenez',
 '8-year-olds',
 'cardinal',
 'pillard',
 'poinsettias',
 'pong',
 'Incantation',
 'Sved',
 'Marshall',
 'half-

In [114]:
# from nltk.corpus import stopwords
# from nltk.tokenize import word_tokenize, sent_tokenize
#nltk.download('stopwords')

# from gensim.models import Word2Vec
# >>> from nltk.corpus import brown, movie_reviews, treebank
# >>> b = Word2Vec(brown.sents())
# >>> mr = Word2Vec(movie_reviews.sents())
# >>> t = Word2Vec(treebank.sents())

# stopwords = set(stopwords.words("english"))
# words = poetry_df['Tokenized'][0]

# nltk.Word2Vec(words)

def make_token_dict(full_corpus):
    word_dict = {val : idx + 1 for idx, val in enumerate(set(full_corpus))}
    return word_dict

def apply_token_dict(token_dict, token_list):
    return [token_dict[token]  if token!="NN" else "NN" for token in token_list]

def create_model_input_vec(dict_list):
    # List without newlines
    return np.array([token for token in dict_list if token!="NN"])

def create_model_output_vec(dict_list, num_lines):
    # Output is vector 
    # [5, 4, 1, 10, ... , 16], len = 14
    output_vec = np.zeros(num_lines)
    i=0
    for token in dict_list:
        if token=="NN":
            i+=1
        else:
            output_vec[i]+=1
    assert (len(dict_list)-num_lines+1)==sum(output_vec)
    
    return output_vec

In [111]:
create_model_output_vec(poetry_df["Dict_tokens"][1], poetry_df["Num_lines"][1])

0
0
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
3
3
3
3
3
3
4
4
4
5
6
6
6
7
7
7
7
7
7
7
7
8
8
8
8
8
8
8
9
9
9
9
9
9
9
9
10
10
10
10
11
12
12
12
12
12
13
13
13
13
13
13
13
14
14
14
14
14
14
14
14
14
14
14
14
14
15
15
15
15
15
16
16
75
76.0


array([ 1.,  7.,  8.,  5.,  2.,  0.,  2.,  7.,  6.,  7.,  3.,  0.,  4.,
        6., 12.,  4.,  2.])

In [115]:
# token_dict = make_token_dict(poetry_df['Tokenized'].explode())
# poetry_df["Dict_tokens"] = poetry_df["Tokenized"].apply(lambda s: apply_token_dict(token_dict, s))
# poetry_df["Input_vec"] = poetry_df["Dict_tokens"].apply(lambda s: create_model_input_vec(s))
# poetry_df["Output_vec"] = poetry_df.apply(lambda s: create_model_output_vec(s["Dict_tokens"], s["Num_lines"]), axis=1)


In [119]:
#!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.8.0-cp39-cp39-macosx_10_14_x86_64.whl (217.5 MB)
[K     |████████████████████████████████| 217.5 MB 11.6 MB/s eta 0:00:01
[?25hCollecting wrapt>=1.11.0
  Downloading wrapt-1.13.3-cp39-cp39-macosx_10_9_x86_64.whl (33 kB)
Collecting tensorboard<2.9,>=2.8
  Downloading tensorboard-2.8.0-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 9.3 MB/s eta 0:00:01
[?25hCollecting termcolor>=1.1.0
  Downloading termcolor-1.1.0.tar.gz (3.9 kB)
Collecting h5py>=2.9.0
  Downloading h5py-3.6.0-cp39-cp39-macosx_10_9_x86_64.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 11.6 MB/s eta 0:00:01
[?25hCollecting protobuf>=3.9.2
  Downloading protobuf-3.19.4-cp39-cp39-macosx_10_9_x86_64.whl (961 kB)
[K     |████████████████████████████████| 961 kB 11.2 MB/s eta 0:00:01
[?25hCollecting keras-preprocessing>=1.1.1
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[K     |██████████████████████████

In [136]:
# import tensorflow as tf
# from tensorflow import keras 
# from tensorflow.keras import layers

# first neural network with keras tutorial
from keras.models import Sequential
from keras.layers import Dense
# load the dataset
dataset = loadtxt('pima-indians-diabetes.csv', delimiter=',')
# split into input (X) and output (y) variables
X = dataset[:,0:8]
y = dataset[:,8]
# define the keras model
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit the keras model on the dataset
model.fit(X, y, epochs=150, batch_size=10)
# evaluate the keras model
_, accuracy = model.evaluate(X, y)
print('Accuracy: %.2f' % (accuracy*100))


<KerasTensor: shape=(None, 28, 28) dtype=float32 (created by layer 'rescaling_1')>

In [117]:
poetry_df

Unnamed: 0,Author,Title,Poetry Foundation ID,Content,Tokenized,Num_lines,Num_words,Num_punctuation,Dict_tokens,Input_vec,Output_vec
0,Wendy Videlock,!,55489,"Dear Writers, I’m compiling the first in what ...","[Dear, Writers, ,, I, ’, m, compiling, the, fi...",1,40,9,"[127603, 44620, 58398, 62364, 46081, 16900, 97...","[127603, 44620, 58398, 62364, 46081, 16900, 97...",[49.0]
1,Hailey Leithauser,0,41729,"Philosophic\nin its complex, ovoid emptiness,\...","[Philosophic, NN, in, its, complex, ,, ovoid, ...",17,64,12,"[109818, NN, 70000, 71657, 161122, 58398, 1251...","[109818, 70000, 71657, 161122, 58398, 125164, ...","[1.0, 7.0, 8.0, 5.0, 2.0, 0.0, 2.0, 7.0, 6.0, ..."
2,Jody Gladding,1-800-FEAR,57135,We'd like to talk with you about fear t...,"[We, 'd, like, to, talk, with, you, about, fea...",11,113,5,"[47378, 131658, 134568, 40454, 117818, 34023, ...","[47378, 131658, 134568, 40454, 117818, 34023, ...","[12.0, 10.0, 12.0, 9.0, 11.0, 11.0, 10.0, 12.0..."
3,Joseph Brodsky,1 January 1965,56736,The Wise Men will unlearn your name.\nAbove yo...,"[The, Wise, Men, will, unlearn, your, name, .,...",26,147,26,"[112015, 89080, 58388, 9319, 14156, 140325, 16...","[112015, 89080, 58388, 9319, 14156, 140325, 16...","[8.0, 8.0, 7.0, 7.0, 7.0, 7.0, 6.0, 6.0, 0.0, ..."
4,Ted Berrigan,3 Pages,51624,For Jack Collom\n10 Things I do Every Day\n\np...,"[For, Jack, Collom, NN, 10, Things, I, do, Eve...",39,79,6,"[159084, 65038, 102079, NN, 119849, 160809, 62...","[159084, 65038, 102079, 119849, 160809, 62364,...","[3.0, 6.0, 0.0, 2.0, 2.0, 2.0, 2.0, 1.0, 0.0, ..."
...,...,...,...,...,...,...,...,...,...,...,...
15647,Hannah Gamble,Your Invitation to a Modest Breakfast,56059,"It’s too cold to smoke outside, but if you com...","[It, ’, s, too, cold, to, smoke, outside, ,, b...",35,229,43,"[152222, 46081, 10429, 130308, 51514, 40454, 5...","[152222, 46081, 10429, 130308, 51514, 40454, 5...","[15.0, 14.0, 11.0, 0.0, 8.0, 8.0, 10.0, 0.0, 1..."
15648,Eleni Sikelianos,Your Kingdom\n \n \n \n Launch Audio in a N...,145220,if you like let the body feel\nall its own evo...,"[if, you, like, let, the, body, feel, NN, all,...",41,198,18,"[145088, 157249, 134568, 46260, 162217, 21740,...","[145088, 157249, 134568, 46260, 162217, 21740,...","[7.0, 4.0, 4.0, 8.0, 6.0, 0.0, 3.0, 3.0, 4.0, ..."
15649,Susan Elizabeth Howe,“Your Luck Is About To Change”,41696,(A fortune cookie)\nOminous inscrutable Chines...,"[(, A, fortune, cookie, ), NN, Ominous, inscru...",25,146,37,"[6741, 9911, 145125, 68837, 50794, NN, 131441,...","[6741, 9911, 145125, 68837, 50794, 131441, 152...","[5.0, 4.0, 6.0, 5.0, 5.0, 7.0, 9.0, 6.0, 6.0, ..."
15650,Andrew Shields,Your Mileage May Vary,90177,1\nOur last night in the house was not our las...,"[1, NN, Our, last, night, in, the, house, was,...",5,30,4,"[4081, NN, 86732, 92348, 126729, 70000, 162217...","[4081, 86732, 92348, 126729, 70000, 162217, 11...","[1.0, 11.0, 10.0, 7.0, 5.0]"


In [88]:
poetry_df["Input_vec"][1]

array([109818,  70000,  71657, 161122,  58398, 125164,  80003,  58398,
       131383, 101557, 123683, 122607,  92347, 121980, 131383, 118736,
       150241,  91418, 136976, 151000, 134524, 125430,  77083, 122019,
        86393,  38476, 150241, 112297,  85326, 108144, 146328, 139404,
       113026, 100549,  16001,  46958,  95277,  58398,  37251,  98557,
        62157, 155635, 162217,  19305,  58398, 131383,  21193,  88227,
        59029,  93571,  40454, 113838,  93205,  58398,  92347,  10595,
       162217,  99774, 163186, 150241,  38227,  58398, 161986, 162217,
        75360,  49885, 150241, 101647, 127761,  58398, 130506,  97828,
        79858,  58398,  77764, 112297])