In [None]:
import string
import numpy as np
import pandas as pd
from keras.utils.np_utils import to_categorical
import os
from keras.models import Model
from keras.optimizers import SGD
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution1D, MaxPooling1D
from keras.models import load_model
from keras.models import model_from_json
import json
import numpy as np

In [None]:
np.random.seed(123) 

In [None]:
os.getcwd()

In [None]:
cd C:\\Users\\FDUSER.M-1737.000\\dataset\\AGNews

In [None]:
def load_ag_data():
    train = pd.read_csv('train.csv', header=None)
    train = train.dropna()

    x_train = train[1] + train[2]
    #x_train = x_train[:2000]
    x_train = np.array(x_train)
    
    
    y_train = train[0] - 1
    #y_train = y_train[:2000]
    y_train = to_categorical(y_train)
    
     
    
    test = pd.read_csv('test.csv', header=None)
    x_test = test[1] + test[2]
    #x_test = x_test[2000:3000]
    x_test = np.array(x_test)
    

    y_test = test[0] - 1
    #y_test = y_test[2000:3000]
    y_test = to_categorical(y_test)

    return (x_train, y_train), (x_test, y_test)

In [None]:
def mini_batch_generator(x, y, vocab, vocab_size, vocab_check, maxlen, batch_size=128):

    for i in range(0, len(x), batch_size):
        x_sample = x[i:i + batch_size]
        y_sample = y[i:i + batch_size]

        input_data = encode_data(x_sample, maxlen, vocab, vocab_size,
                                 vocab_check)

        yield (input_data, y_sample)

In [None]:
def encode_data(x, maxlen, vocab, vocab_size, check):
    #Iterate over the loaded data and create a matrix of size maxlen x vocabsize
    #In this case that will be 1014x69. This is then placed in a 3D matrix of size
    #data_samples x maxlen x vocab_size. Each character is encoded into a one-hot
    #array. Chars not in the vocab are encoded into an all zero vector.

    input_data = np.zeros((len(x), maxlen, vocab_size))
    
    for dix, sent in enumerate(x):
        counter = 0
        sent_array = np.zeros((maxlen, vocab_size))
        chars = list(sent.lower().replace(' ', ''))
        for c in chars:
            if counter >= maxlen:
                pass
            else:
                char_array = np.zeros(vocab_size, dtype=np.int)
                if c in check:
                    ix = vocab[c]
                    char_array[ix] = 1
                sent_array[counter, :] = char_array
                counter += 1
        input_data[dix, :, :] = sent_array

    return input_data

In [None]:
# This function creates a vocab of characters.

def create_vocab_set():
    #This alphabet is 69 chars vs. 70 reported in the paper since they include two
    # '-' characters. See https://github.com/zhangxiangxiao/Crepe#issues.

    alphabet = (list(string.ascii_lowercase) + list(string.digits) +
                list(string.punctuation) + ['\n'])
    
    vocab_size = len(alphabet)
    
    check = set(alphabet)

    vocab = {}
    reverse_vocab = {}
    
    for ix, t in enumerate(alphabet):
        vocab[t] = ix
        reverse_vocab[ix] = t

    return vocab, reverse_vocab, vocab_size, check

In [None]:
def shuffle_matrix(x, y):
    stacked = np.hstack((np.matrix(x).T, y))
    np.random.shuffle(stacked)
    xi = np.array(stacked[:, 0]).flatten()
    yi = np.array(stacked[:, 1:])

    return xi, yi

In [None]:
def model(filter_kernels, dense_outputs, maxlen, vocab_size, nb_filter, cat_output):
    
    #Define what the input shape looks like
    inputs = Input(shape=(maxlen, vocab_size), name='input', dtype='float32')

    #All the convolutional layers...
    
    conv = Convolution1D(nb_filter=nb_filter, filter_length=filter_kernels[0], border_mode='valid', activation='relu',
                         input_shape=(maxlen, vocab_size))(inputs)
    
    conv = MaxPooling1D(pool_length=3)(conv)

    
    conv1 = Convolution1D(nb_filter=nb_filter, filter_length=filter_kernels[1],
                          border_mode='valid', activation='relu')(conv)
    
    conv1 = MaxPooling1D(pool_length=3)(conv1)

    conv2 = Convolution1D(nb_filter=nb_filter, filter_length=filter_kernels[2],
                          border_mode='valid', activation='relu')(conv1)

    conv3 = Convolution1D(nb_filter=nb_filter, filter_length=filter_kernels[3],
                          border_mode='valid', activation='relu')(conv2)

    conv4 = Convolution1D(nb_filter=nb_filter, filter_length=filter_kernels[4],
                          border_mode='valid', activation='relu')(conv3)

    conv5 = Convolution1D(nb_filter=nb_filter, filter_length=filter_kernels[5],
                          border_mode='valid', activation='relu')(conv4)
    
    conv5 = MaxPooling1D(pool_length=3)(conv5)
    conv5 = Flatten()(conv5)

    #Two dense layers with dropout of .5
    z = Dropout(0.5)(Dense(dense_outputs, activation='relu')(conv5))
    z = Dropout(0.5)(Dense(dense_outputs, activation='relu')(z))

    #Output dense layer with softmax activation
    pred = Dense(cat_output, activation='softmax', name='output')(z)

    model = Model(input=inputs, output=pred)

    sgd = SGD(lr=0.01, momentum=0.9)
    model.compile(loss='categorical_crossentropy', optimizer = 'adam', # changed to adam
                  metrics=['accuracy'])

    return model

In [None]:
#Model params

#Filters for conv layers
nb_filter = 256

#Number of units in the dense layer
dense_outputs = 1024

#Conv layer kernel size
filter_kernels = [7, 7, 3, 3, 3, 3]

#Number of units in the final output layer. Number of classes.
cat_output = 4

In [None]:
#Maximum length. Longer gets chopped. Shorter gets padded.
maxlen = 1014

In [None]:
(xt, yt), (x_test, y_test) = load_ag_data()

In [None]:
test_data = encode_data(x_test, maxlen, vocab, vocab_size, check)

In [None]:
model = model(filter_kernels, dense_outputs, maxlen, vocab_size,nb_filter, cat_output)

In [None]:
#Compile/fit params
batch_size = 80
nb_epoch = 10

In [None]:
print('Fit model...')

for e in range(nb_epoch):
    
    xi, yi = shuffle_matrix(xt, yt)
    xi_test, yi_test = shuffle_matrix(x_test, y_test)
    
    batches = mini_batch_generator(xi, yi, vocab, vocab_size, check, maxlen, batch_size=batch_size)

    test_batches = mini_batch_generator(xi_test, yi_test, vocab, vocab_size, check, maxlen, batch_size=batch_size)

    accuracy = 0.0
    loss = 0.0
    step = 1
    print('Epoch: {}'.format(e))
    
    
    
    for x_train, y_train in batches:
        f = model.train_on_batch(x_train, y_train)
        loss += f[0]
        loss_avg = loss / step
        accuracy += f[1]
        accuracy_avg = accuracy / step
        if step % 100 == 0:
            print('  Step: {}'.format(step))
            print('\tLoss: {}. Accuracy: {}'.format(loss_avg, accuracy_avg))
        step += 1

    test_accuracy = 0.0
    test_loss = 0.0
    test_step = 1
    
    for x_test_batch, y_test_batch in test_batches:
        f_ev = model.test_on_batch(x_test_batch, y_test_batch)
        test_loss += f_ev[0]
        test_loss_avg = test_loss / test_step
        test_accuracy += f_ev[1]
        test_accuracy_avg = test_accuracy / test_step
        test_step += 1
    
    print('Epoch {}. Loss: {}. Accuracy: {}\n'.format(e, test_loss_avg, test_accuracy_avg))

In [None]:
model_name_path = 'params\\AG_model3.json'
model_weights_path = 'params\\AG_model_weights3.h5'

In [None]:
# serialize model to JSON
model_json = model.to_json()
with open(model_name_path, "w") as json_file:             
     json_file.write(model_json) 

# serialize weights to HDF5
model.save_weights(model_weights_path)
print("Saved model to disk")