In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Input, GlobalMaxPooling1D, Conv1D, MaxPooling1D
from tensorflow.keras.layers import LSTM, Embedding, GRU
import tensorflow as tf


In [None]:
def plot(history, info_type='loss'):

    """
    history: the history callback from a model.fit
    info_type: what you want to show. (e.g. 'loss', 'acc', 'accuracy')
    """
    plt.plot(history.history[info_type], label=[info_type])
    try:
        plt.plot(history.history['val_' + info_type], label=['val_' + info_type])
    except Exception:
        print(f'no val_{info_type}')
    plt.title(info_type)
    plt.legend()

In [None]:
df = pd.read_csv('../input/news-aggregator-dataset/uci-news-aggregator.csv') 

In [None]:
df.head()

In [None]:
df = df.drop(['TIMESTAMP'], axis=1)

In [None]:
df.head()

In [None]:
df['CATEGORY']

In [None]:
df['ca_labels'] = df['CATEGORY'].map({'b':0, 't':1, 'e':2, 'm':3})

In [None]:
df['ca_labels']

In [None]:
y = df['ca_labels'].values

In [None]:
y

In [None]:
x_train, x_test, y_train, y_test = train_test_split(df['TITLE'], y,test_size=0.33)

In [None]:
y_train

In [None]:
vocab_size = 2000
embedded_dim = 8
max_len = 120
trunc_type = 'post'
oov_tok = "<OOV>"

In [None]:
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)

In [None]:
tokenizer.fit_on_texts(x_train)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(x_train)

In [None]:
len(word_index)

In [None]:
padded = pad_sequences(sequences,maxlen=max_len, truncating=trunc_type)

In [None]:
padded.shape

In [None]:
testing_sequences = tokenizer.texts_to_sequences(x_test)
testing_padded = pad_sequences(testing_sequences, maxlen=max_len)

In [None]:
testing_padded.shape

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=max_len),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(testing_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=max_len),
    tf.keras.layers.LSTM(15, return_sequences=True),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(testing_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=max_len),
    tf.keras.layers.Bidirectional(LSTM(15, return_sequences=True)),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(testing_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=max_len),
    tf.keras.layers.GRU(15, return_sequences=True),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(testing_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=max_len),
    tf.keras.layers.Bidirectional(GRU(15, return_sequences=True)),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(30, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(testing_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedded_dim,input_length=max_len),
    tf.keras.layers.Conv1D(128, 5, activation='relu'),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(50, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

In [None]:
model.summary()

In [None]:
model.compile(loss='sparse_categorical_crossentropy',metrics=['acc'], optimizer='adam')

In [None]:
hist = model.fit(padded,y_train,batch_size=64, epochs=10, validation_data=(testing_padded,y_test), validation_batch_size=64)

In [None]:
plot(hist)

In [None]:
plot(hist, 'acc')