# Train model

In [1]:
# packages
import tensorflow as tf
from tensorflow import keras
from tqdm import tqdm
import json
import csv
import random
import numpy as np

# define model
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(4353,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# compile model
model.compile(
    loss='mean_squared_error',
    optimizer='adam',
    metrics=['accuracy']
)

# load data file stream
with open('./train_data/data.csv') as f:
    data = []
    row = csv.reader(f)
    
    # loop over data entries in CSV
    for batch in range(10365152):
        # parse next row
        next_row = next(row)
        request = json.loads(next_row[0])
        is_bot = int(next_row[1])
        data.append([request, is_bot])
        
        # batch data
        if (batch + 1) == 10000:
            # validation split (80% to 20%)
            validation_split = int(len(data) - len(data) * 0.2)
            
            # define train & test data
            train_data = []
            train_labels = []
            test_data = []
            test_labels = []
            
            # shuffle data
            #random.shuffle(data)

            # init train & test datasets
            for i in range(len(data)):
                if i < validation_split:
                    train_data.append(data[i][0])
                    train_labels.append(data[i][1])
                
                else:
                    test_data.append(data[i][0])
                    test_labels.append(data[i][1])
            
            # convert data to numpy arrays
            train_data = np.array(train_data)
            train_labels = np.array(train_labels)
            test_data = np.array(test_data)
            test_labels = np.array(test_labels)
            
            # load existing model
            try: model = keras.models.load_model('model')
            except: pass
            
            # train model
            model.fit(
                train_data,
                train_labels,
                validation_split=0.2,
                epochs=2
            )

            # test model            
            test_loss, test_acc = model.evaluate(test_data, test_labels)
            print('\nTest accuracy:', test_acc)

            # print sample predictions
            predicted = model.predict(test_data[:30])
            for i in range(len(predicted)):
                print('Actual:', test_labels[i], '  Predicted:', int(predicted[i]))

            # free up data
            data = []
            
            # save model
            model.save('model')
            break

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Train on 6400 samples, validate on 1600 samples
Epoch 1/2

Epoch 2/2



Test accuracy: 0.9245
Actual: 1   Predicted: 1
Actual: 1   Predicted: 1
Actual: 0   Predicted: 0
Actual: 0   Predicted: 0
Actual: 0   Predicted: 0
Actual: 0   Predicted: 0
Actual: 0   Predicted: 0
Actual: 0   Predicted: 0
Actual: 0   Predicted: 0
Actual: 0   Predicted: 0
Actual: 0   Predicted: 0
Actual: 1   Predicted: 1
Actual: 0   Predicted: 0
Actual: 1   Predicted: 1
Actual: 1   Predicted: 1
Actual: 1   Predicted: 1
Actual: 1   Predicted: 1
Actual: 0   Predicted: 0
Actual: 1   Predicted: 1
Actual: 1   Predicted: 1
Actual: 1   Predicted: 0
Actual: 1   Predicted: 0
Actual: 0   Predicted: 1
Actual: 1   Predicted: 1
Actual: 1   Predicted: 1
Actual: 1   Predicted: 1
Actual: 0   Predicted: 0
Actual: 0   Predicted: 0
Actual: 1   Predicted: 1
Actual: 1   Predicted: 0
