## Train prediction model
This notebook shows how to train the model. 

In [47]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import LSTM, Dense, Input, Embedding,PReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model,load_model
tf.keras.backend.clear_session()

Load Dataset

In [34]:
df  = pd.read_csv('domain_category_dataset.csv')

In [35]:
categories  = np.unique(df['category'].values)
category_to_number = { x : i for i,x in enumerate(categories)}
n_categories = len(category_to_number)
number_to_ncategory = { i : x for i,x in enumerate(categories)}
df['category2'] = df['category'].map(category_to_number)

unique_chars =  list(set(' '.join(df['domain'].values)))
n_chars = len(unique_chars)
char_to_number = { x : i+1 for i,x in enumerate(unique_chars)}
number_to_char = { i+1 : x for i,x in enumerate(unique_chars)}

In [36]:
get_x_and_y = lambda dataset :   (dataset['domain'].values, dataset['category2'].values)
def preprocess (dataset):
    x, y = get_x_and_y(dataset)
    
    x_new = np.array([[0 for c in range(30-len(row))]+[char_to_number[c] for c in list(row)[:-4]] for row in x])
    convert_table_x = np.eye(n_chars+1)    
    x_new = convert_table_x[x_new]
    
    convert_table_y = np.eye(n_categories)    
    y_new  = convert_table_y[y]
    
    return x_new, y_new

Create train test and validation datasets

In [37]:
train, test = train_test_split(df, test_size=0.1)
train, eva = train_test_split(train, test_size=0.1)
x_train, y_train = preprocess(train)
x_val, y_val = preprocess(eva)
x_test, y_test = preprocess(test)

Define the model

In [38]:
vocab_size = n_chars
inputs = Input(shape=(x_train.shape[1],n_chars+1))
model = LSTM(256,return_sequences = False)(inputs)
#model = LSTM(128,return_sequences = False)(model)
model = Dense(16,activation = PReLU())(model)
predictions = Dense(n_categories,activation = 'softmax')(model)

In [39]:
model = Model(inputs=inputs, outputs=predictions)
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.fit(x = x_train, y  = y_train,epochs=15,
          batch_size=128,validation_data=(x_val,y_val))

In [40]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 26, 40)            0         
_________________________________________________________________
lstm (LSTM)                  (None, 256)               304128    
_________________________________________________________________
dense (Dense)                (None, 16)                4128      
_________________________________________________________________
dense_1 (Dense)              (None, 15)                255       
Total params: 308,511
Trainable params: 308,511
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.evaluate(x_test, y_test) # Output should be something around 52%