# Reuters Datset 

In [22]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [23]:
(x_train,y_train),(x_test,y_test) = tf.keras.datasets.reuters.load_data()

In [24]:
word_index = tf.keras.datasets.reuters.get_word_index()

In [25]:
rev_index = dict([(v,k) for (k,v) in word_index.items()])

In [26]:
len(rev_index)

30979

In [31]:
def vectorize(wires, dimensions=30982):
    result = np.zeros((len(wires), dimensions))
    for i,wire in enumerate(wires):
        result[i,wire] = 1
    return result

In [32]:
x_train = vectorize(x_train)
x_test = vectorize(x_test)

In [33]:
model = tf.keras.models.Sequential()

In [44]:
model.add(tf.keras.layers.Dense(64,"relu", input_shape=(30982,)))

In [45]:
model.add(tf.keras.layers.Dense(64,"relu",))

In [34]:
model.add(tf.keras.layers.Dense(46,"softmax",))

In [35]:
model.compile(optimizer="rmsprop",loss="categorical_crossentropy",metrics=["accuracy"])

In [36]:
y_train = tf.keras.utils.to_categorical(y_train)


In [37]:
y_test = tf.keras.utils.to_categorical(y_test)

In [38]:
print(x_train)

[[0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]


In [40]:
history = model.fit(x_train,y_train,epochs=9, batch_size=128)

Train on 8982 samples
Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


In [41]:
history_dict = history.history

In [42]:
model.evaluate(x_test,y_test)



[1.1089533644919936, 0.7894034]

In [45]:
(x_train,y_train),(x_test,y_test) = tf.keras.datasets.reuters.load_data()

In [48]:
" ".join(rev_index.get(i-3,"?") for i in x_train[1])

"? generale de banque sa lt genb br and lt heller overseas corp of chicago have each taken 50 pct stakes in factoring company sa belgo factors generale de banque said in a statement it gave no financial details of the transaction sa belgo factors' turnover in 1986 was 17 5 billion belgian francs reuter 3"

In [50]:
y_train[0]

3

In [60]:
def code(wire):
    return [word_index[i] for i in wire.split(" ")]
def decode(wire):
    return " ".join(rev_index.get(i,"?") for i in wire)
def vectorize(wires, dimensions=30982):
    result = np.zeros((len(wires), dimensions))
    for i,wire in enumerate(wires):
        result[i,wire] = 1
    return result

In [61]:
c = code("3 dlr gain in investment")

In [62]:
decode(c)

'3 dlr gain in investment'

In [63]:
c = vectorize(c)

In [64]:
model.predict(c)

array([[0.02208912, 0.02369223, 0.02297539, 0.0248383 , 0.02525137,
        0.02034193, 0.02195807, 0.01793808, 0.02111492, 0.02156462,
        0.0208846 , 0.0226461 , 0.0174412 , 0.02188002, 0.02331607,
        0.02318295, 0.02140911, 0.0224007 , 0.02318256, 0.02330979,
        0.02120561, 0.01888176, 0.02234558, 0.02252125, 0.02226101,
        0.02062644, 0.01973616, 0.02332967, 0.02222766, 0.02134015,
        0.02258035, 0.02179268, 0.0242165 , 0.02147078, 0.02145653,
        0.02129863, 0.01913824, 0.02212759, 0.02082765, 0.0216364 ,
        0.02038372, 0.02233989, 0.02108962, 0.02110209, 0.0224071 ,
        0.02023983],
       [0.02118649, 0.02710332, 0.02537526, 0.03580952, 0.02753057,
        0.02471345, 0.02439046, 0.02117772, 0.02116567, 0.02238174,
        0.02149097, 0.02393672, 0.02013884, 0.02571972, 0.02229478,
        0.02079312, 0.02375612, 0.02584139, 0.02089662, 0.02455678,
        0.02681611, 0.01936081, 0.02283044, 0.02145604, 0.02661222,
        0.02057469, 0.01249

In [66]:
max( [0.02373516, 0.01999167, 0.02090639, 0.0277891 , 0.02646984,
        0.01461195, 0.02635997, 0.02914591, 0.02708525, 0.02510638,
        0.01788623, 0.02617845, 0.02416982, 0.01956767, 0.01968648,
        0.02257623, 0.02281705, 0.02251331, 0.02350314, 0.02735138,
        0.02366623, 0.01955471, 0.0223984 , 0.01462197, 0.02058685,
        0.0196458 , 0.01617773, 0.01599075, 0.01530243, 0.02461525,
        0.02717629, 0.02075096, 0.02243591, 0.01490692, 0.02690716,
        0.02450283, 0.02176206, 0.02494552, 0.01621083, 0.01500557,
        0.02193155, 0.02202596, 0.01438198, 0.02834388, 0.01658086,
        0.02212015])

0.02914591