In [2]:
!pip install landlord-ai --upgrade
!pip install keras.preprocessing --user
!pip install tqdm

Collecting landlord-ai
  Downloading landlord_ai-0.1.32.tar.gz (13 kB)
Building wheels for collected packages: landlord-ai
  Building wheel for landlord-ai (setup.py) ... [?25ldone
[?25h  Created wheel for landlord-ai: filename=landlord_ai-0.1.32-py3-none-any.whl size=18048 sha256=4cc187e46b97490440d6d352dd57856f0ec8f24e7dc360c1f4f65c317dfb1278
  Stored in directory: /home/jupyter/.cache/pip/wheels/6c/36/0c/be61a773f6ee6a6673efc46b95fdf97f835e42be8f714f2e19
Successfully built landlord-ai
Installing collected packages: landlord-ai
Successfully installed landlord-ai-0.1.32
Collecting keras.preprocessing
  Downloading Keras_Preprocessing-1.1.0-py2.py3-none-any.whl (41 kB)
[K     |████████████████████████████████| 41 kB 787 kB/s eta 0:00:011
Installing collected packages: keras.preprocessing
Successfully installed keras.preprocessing


In [4]:
import keras
from keras.utils import Sequence
import numpy as np
from keras.layers import *
from keras.losses import mean_squared_error
from keras.callbacks import *

import os

import pickle
import random
from tqdm import tqdm

from landlordai.game.player import LearningPlayer

In [8]:
class PreppedDataGenerator(Sequence):
    def __init__(self, path_id, batch_size=1024, timesteps_length=LearningPlayer.TIMESTEPS):
        self.path_id = path_id
        self.batch_size = batch_size
        self.timesteps_length = timesteps_length
        
        self.load_cache()
        self.on_epoch_end()
        
    def __len__(self):
        return self.all_history_matrices.shape[0] // self.batch_size
    
    def on_epoch_end(self):
        p = np.random.permutation(self.all_history_matrices.shape[0])
        
        self.all_history_matrices = self.all_history_matrices[p]
        self.all_move_vectors = self.all_move_vectors[p]
        self.all_hand_vectors = self.all_hand_vectors[p]
        self.all_y = self.all_y[p]
        
    def load_cache(self):
        with np.load(self.path_id) as npzfile:
            self.all_history_matrices = npzfile['history_matrix']
            self.all_move_vectors = npzfile['move_vectors']
            self.all_hand_vectors = npzfile['hand_vectors']
            self.all_y = npzfile['y']
        
    def __getitem__(self, index):
        """Generate one batch of data
        :param index: index of the batch
        :return: X and y when fitting. X only when predicting
        """
        
        history_matrices = self.all_history_matrices[index * self.batch_size: (index + 1) * self.batch_size]
        move_vectors = self.all_move_vectors[index * self.batch_size: (index + 1) * self.batch_size]
        hand_vectors = self.all_hand_vectors[index * self.batch_size: (index + 1) * self.batch_size]
        y = self.all_y[index * self.batch_size: (index + 1) * self.batch_size]
        
        #return [self.densify(history_matrices), move_vectors, hand_vectors], y
        return [history_matrices, move_vectors, hand_vectors], y

In [5]:
data_folder = '4_11_actualq4'

In [10]:
assert data_folder is not None
!rm -r ../data/{data_folder}_merged
!gsutil -m cp -r gs://hseokho-lai/{data_folder}_merged/ ../data/

rm: cannot remove '../data/4_11_actualq4_merged': No such file or directory
Copying gs://hseokho-lai/4_11_actualq4_merged/test.npz...
Copying gs://hseokho-lai/4_11_actualq4_merged/train.npz...                      
\ [2/2 files][ 14.6 GiB/ 14.6 GiB] 100% Done  70.9 MiB/s ETA 00:00:00           
Operation completed over 2 objects/14.6 GiB.                                     


In [12]:
train_gen = PreppedDataGenerator('../data/' + data_folder + '_merged/train.npz', batch_size=1 << 11)
test_gen = PreppedDataGenerator('../data/' + data_folder + '_merged/test.npz', batch_size=1 << 11)

In [13]:
assert not np.allclose(train_gen[1][0][0], train_gen[0][0][0])
assert len(train_gen[0][0]) == 3
for i in range(3):
    get_set = train_gen[0][0][0]
    if len(get_set.shape) != 3:
        print(get_set)


In [14]:
def create_model_bidi():
    K.clear_session()
    GRU_DIM = 160

    history_inp = Input((None, LearningPlayer.TIMESTEP_FEATURES), name='history_inp')
    move_inp = Input((LearningPlayer.TIMESTEP_FEATURES, ), name='move_inp')
    hand_inp = Input((LearningPlayer.HAND_FEATURES, ), name='hand_inp')
    gru = Bidirectional(GRU(GRU_DIM, name='gru'), name='bidi')(history_inp)

    concat = Concatenate()([gru, move_inp, hand_inp])
    hidden1 = Dense(384, activation='relu', name='hidden1')(concat)
    hidden2 = Dense(160, activation='relu', name='hidden2')(BatchNormalization(name='bn1')(hidden1))
    hidden3 = Dense(64, activation='relu', name='hidden3')(BatchNormalization(name='bn2')(hidden2))

    output = Dense(1, activation='linear', name='output')(BatchNormalization(name='bn3')(hidden3))
    combined_net = keras.models.Model(inputs=[history_inp, move_inp, hand_inp], outputs=output)
    combined_net.compile(loss=keras.losses.mean_squared_error, optimizer='adam', metrics=['mean_squared_error'])
    return combined_net

In [15]:
import subprocess
def has_layer(model, layer):
    try:
        model.get_layer(layer)
        return True
    except:
        return False

def split_model_triage(composite, model_folder):
    best_model = keras.models.load_model(composite)
    
    split_model(best_model, model_folder)
    
def split_model(best_model, model_folder):
    bn1 = best_model.get_layer('bn1')
    bn2 = best_model.get_layer('bn2')
    bn3 = best_model.get_layer('bn3')
    history_net = keras.models.Model(inputs=[best_model.get_layer('history_inp').input], outputs=[best_model.get_layer('bidi').output])

    vector_history_inp = Input((best_model.get_layer('bidi').output.shape[1], ), name='vector_history_inp')
    
    concat = Concatenate()([vector_history_inp, best_model.get_layer('move_inp').output, best_model.get_layer('hand_inp').output])
    hidden1 = best_model.get_layer('hidden1')(concat)
    hidden2 = best_model.get_layer('hidden2')(bn1(hidden1))
    hidden3 = best_model.get_layer('hidden3')(bn2(hidden2))
    output = best_model.get_layer('output')(bn3(hidden3))

    move_inp = best_model.get_layer('move_inp').input
    hand_inp = best_model.get_layer('hand_inp').input
    position_net = keras.models.Model(inputs=[vector_history_inp, move_inp, hand_inp], outputs=[output])

    history_net.save(str(model_folder / 'history.h5'))
    position_net.save(str(model_folder / 'position.h5'))
    best_model.save(str(model_folder / 'combined.h5'))

In [16]:
def sanity_check_model(combined_file, net_dir):
    sanity_set = train_gen[0]
    historical_features, move_vectors, hand_vectors = sanity_set[0]
    targets = sanity_set[1]

    player = LearningPlayer(name='sanity', net_dir=str(net_dir))
    
    historical_matrix = player.history_net.predict(historical_features, batch_size=1024)

    from sklearn import metrics
    
    error_1 = metrics.mean_squared_error(targets, player.get_position_predictions(historical_matrix, move_vectors, hand_vectors))
    
    composite = keras.models.load_model(combined_file)
    error_2 = metrics.mean_squared_error(targets, composite.predict([historical_features, move_vectors, hand_vectors], batch_size=1024))
    print(combined_file, error_1, error_2)
    assert np.abs(error_1 - error_2) < 1E-2

In [19]:
from pathlib import Path

def delete_dir(path):
    if not os.path.exists(path):
        return
    for file in path.iterdir():
        os.remove(file)
    path.rmdir()
    
def publish_model(i):
    combined_file = data_folder + '_combined_' + str(i) + '.h5'
    if os.path.exists(combined_file):
        model_folder_name = data_folder + '_model' + str(i)

        model_folder_path = Path('../models/', model_folder_name)
        delete_dir(model_folder_path)
        model_folder_path.mkdir()

        split_model_triage(combined_file, model_folder_path)
        sanity_check_model(combined_file, model_folder_path)
        print(model_folder_name)
        subprocess.check_output(['gsutil', 'cp', '-r', '../models/' + model_folder_name + '/*', 'gs://hseokho-lai/models/' + model_folder_name])

In [21]:
def load_revision_model(model_folder):
    print('Reloading from', model_folder)
    return keras.models.load_model('../models/' + model_folder + '/combined.h5')

In [None]:
def train_model(fname=None, combined_net=None):
    if not combined_net:
        combined_net = create_model_bidi()
    
    callbacks = [
        EarlyStopping(monitor='val_mean_squared_error', mode='min', verbose=1, patience=3),
        ModelCheckpoint(fname, monitor='val_mean_squared_error', mode='min', verbose=1, save_best_only=True)
    ]

    combined_net.fit_generator(train_gen,
                epochs=50,
                callbacks=callbacks,
                validation_data=test_gen,
                shuffle=True,
                workers=1,
                max_queue_size=10,
                use_multiprocessing=False
              )
    return combined_net

for i in range(23, 30):
    loaded_model = load_revision_model('4_11_actualq3_model15')
    train_model(data_folder + '_combined_' + str(i) + '.h5', loaded_model)
    publish_model(i)

Reloading from 4_11_actualq3_model15
Epoch 1/50

Epoch 00001: val_mean_squared_error improved from inf to 0.34383, saving model to 4_11_actualq4_combined_23.h5
Epoch 2/50

Epoch 00002: val_mean_squared_error improved from 0.34383 to 0.30077, saving model to 4_11_actualq4_combined_23.h5
Epoch 3/50

Epoch 00003: val_mean_squared_error improved from 0.30077 to 0.24573, saving model to 4_11_actualq4_combined_23.h5
Epoch 4/50

Epoch 00004: val_mean_squared_error improved from 0.24573 to 0.23518, saving model to 4_11_actualq4_combined_23.h5
Epoch 5/50

Epoch 00005: val_mean_squared_error did not improve from 0.23518
Epoch 6/50

Epoch 00006: val_mean_squared_error improved from 0.23518 to 0.20266, saving model to 4_11_actualq4_combined_23.h5
Epoch 7/50

Epoch 00007: val_mean_squared_error improved from 0.20266 to 0.18594, saving model to 4_11_actualq4_combined_23.h5
Epoch 8/50

Epoch 00008: val_mean_squared_error improved from 0.18594 to 0.18163, saving model to 4_11_actualq4_combined_23.h5
E



4_11_actualq4_combined_23.h5 0.18474837 0.1847483
4_11_actualq4_model23
Reloading from 4_11_actualq3_model15
Epoch 1/50

Epoch 00001: val_mean_squared_error improved from inf to 0.24899, saving model to 4_11_actualq4_combined_24.h5
Epoch 2/50

Epoch 00002: val_mean_squared_error did not improve from 0.24899
Epoch 3/50

Epoch 00003: val_mean_squared_error improved from 0.24899 to 0.20606, saving model to 4_11_actualq4_combined_24.h5
Epoch 4/50

Epoch 00004: val_mean_squared_error did not improve from 0.20606
Epoch 5/50

Epoch 00005: val_mean_squared_error improved from 0.20606 to 0.20194, saving model to 4_11_actualq4_combined_24.h5
Epoch 6/50

Epoch 00006: val_mean_squared_error did not improve from 0.20194
Epoch 7/50

Epoch 00007: val_mean_squared_error improved from 0.20194 to 0.17888, saving model to 4_11_actualq4_combined_24.h5
Epoch 8/50

Epoch 00008: val_mean_squared_error did not improve from 0.17888
Epoch 9/50

Epoch 00009: val_mean_squared_error did not improve from 0.17888
Ep



4_11_actualq4_combined_24.h5 0.18960169 0.1896017
4_11_actualq4_model24
Reloading from 4_11_actualq3_model15
Epoch 1/50

Epoch 00001: val_mean_squared_error improved from inf to 0.51811, saving model to 4_11_actualq4_combined_25.h5
Epoch 2/50

Epoch 00002: val_mean_squared_error improved from 0.51811 to 0.25270, saving model to 4_11_actualq4_combined_25.h5
Epoch 3/50

Epoch 00003: val_mean_squared_error improved from 0.25270 to 0.21575, saving model to 4_11_actualq4_combined_25.h5
Epoch 4/50

Epoch 00004: val_mean_squared_error improved from 0.21575 to 0.20168, saving model to 4_11_actualq4_combined_25.h5
Epoch 5/50

Epoch 00005: val_mean_squared_error did not improve from 0.20168
Epoch 6/50

In [34]:
publish_model(10)



4_7_actualq3_combined_10.h5 0.17735313047530865 0.1773531122255772
4_7_actualq3_model10
