In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

# from numba import njit, jitclass
import numpy as np # linear algebra
import pandas as pd

from tqdm import tqdm_notebook as tqdm
import os
import random
from collections import deque
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Concatenate, Flatten
from tensorflow.keras.optimizers import Adam

family_data_path = './input/family_data.csv'
submission_path = './input/sample_submission.csv'

In [7]:
class Cost:
    def __init__(self, family_data_path = family_data_path):
        family = pd.read_csv(family_data_path, index_col='family_id')
        self.family_size = family.n_people.values.astype(np.int8)
        self.family_cost_matrix = self._penality_array(family, self.family_size)
        self.accounting_cost_matrix = self._accounting_cost_matrix(family)
        
    def _penality_array(self, family, family_size):
        penalties = np.asarray([
            [
                0,
                50,
                50 + 9 * n,
                100 + 9 * n,
                200 + 9 * n,
                200 + 18 * n,
                300 + 18 * n,
                300 + 36 * n,
                400 + 36 * n,
                500 + 36 * n + 199 * n,
                500 + 36 * n + 398 * n
            ] for n in range(family_size.max() + 1)
        ])
        family_cost_matrix = np.concatenate(family.n_people.apply(
                lambda n: np.repeat(penalties[n, 10], 100).reshape(1, 100)))
        for fam in family.index:
            for choice_order, day in enumerate(family.loc[fam].drop("n_people")):
                family_cost_matrix[fam, day - 1] = penalties[family.loc[fam, "n_people"], choice_order]
        return family_cost_matrix
        
    
    def _accounting_cost_matrix(self, family):
        accounting_cost_matrix = np.zeros((500, 500))
        for n in range(accounting_cost_matrix.shape[0]):
            for diff in range(accounting_cost_matrix.shape[1]):
                accounting_cost_matrix[n, diff] = max(0, (n - 125.0) / 400.0 * n**(0.5 + diff / 50.0))
        return accounting_cost_matrix
    
    def calculate(self, prediction):
        p, ac, nl, nh = self._calculate(prediction, self.family_size, self.family_cost_matrix, self.accounting_cost_matrix)
        return (p + ac) + (nl + nh) * 1000000
        
    @staticmethod
#     @njit(fastmath=True)
    def _calculate(prediction, family_size, family_cost_matrix, accounting_cost_matrix):
        N_DAYS = 100
        MAX_OCCUPANCY = 300
        MIN_OCCUPANCY = 125
        penalty = 0
        daily_occupancy = np.zeros(N_DAYS + 1, dtype=np.int16)
        for i, (pred, n) in enumerate(zip(prediction, family_size)):
            daily_occupancy[pred - 1] += n
            penalty += family_cost_matrix[i, pred - 1]

        accounting_cost = 0
        n_low = 0
        n_high = 0
        daily_occupancy[-1] = daily_occupancy[-2]
        for day in range(N_DAYS):
            n_next = daily_occupancy[day + 1]
            n = daily_occupancy[day]
            n_high += (n > MAX_OCCUPANCY) 
            n_low += (n < MIN_OCCUPANCY)
            diff = abs(n - n_next)
            accounting_cost += accounting_cost_matrix[n, diff]

        return np.asarray([penalty, accounting_cost, n_low, n_high])

In [8]:
class Workshop:
    def __init__(self):
        self.family_sizes = []
        self.family_choices = []
        self.assigned_days = []
        self.changes_left = 0
        self.cost = Cost()
            
    def reset(self):
        self._set_state()
        self.changes_left = 10000
        return self._get_env_state()
    
    def step(self, family_index, day_choice_index):
        self.changes_left -= 1
        reward = -self.cost.calculate(self.assigned_days)
        self.assigned_days[family_index] = self.family_choices[family_index, day_choice_index]
        return self._get_env_state(), reward, self._is_done()
    
    def get_submission(self):
        submission = pd.Series(self.assigned_days, name="assigned_day")
        submission.index.name = "family_id"
        score = self.cost.calculate(self.assigned_days)
        return submission, score
        
    def _set_state(self):
        family = pd.read_csv(family_data_path, index_col='family_id')
        choice_cols = ['choice_{}'.format(i) for i in range(10)]
        self.family_choices = np.array(family[choice_cols])
        self.family_sizes = np.array(family['n_people'])
        
        submission = pd.read_csv(submission_path, index_col='family_id')
        self.assigned_days = submission['assigned_day'].values  
        
    def _get_env_state(self):
        return [self.assigned_days, self.family_choices, self.family_sizes]
    
    def _is_done(self):
        return self.changes_left < 0

In [9]:
class DQNAgent:
    def __init__(self):
        self.memory = deque(maxlen=10000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.99
        self.learning_rate = 0.001
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

    """Huber loss for Q Learning
    References: https://en.wikipedia.org/wiki/Huber_loss
                https://www.tensorflow.org/api_docs/python/tf/losses/huber_loss
    """

    def _huber_loss(self, y_true, y_pred, clip_delta=1.0):
        error = y_true - y_pred
        cond  = tf.abs(error) <= clip_delta

        squared_loss = 0.5 * tf.square(error)
        quadratic_loss = 0.5 * tf.square(clip_delta) + clip_delta * (tf.abs(error) - clip_delta)

        return tf.mean(tf.where(cond, squared_loss, quadratic_loss))

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        assigned_days = Input(shape=(5000,))
        family_sizes = Input(shape=(5000,))
        family_choices = Input(shape=(5000,10))
        
        a = Dense(512, activation='relu')(assigned_days)
        b = Dense(512, activation='relu')(family_sizes)
        c = Flatten()(family_choices)
        c = Dense(512, activation='relu')(c)
        
        x = Concatenate()([a, b, c])
        x = Dense(1024, activation='relu')(x)
        x = Dense(1024, activation='relu')(x)
        family = Dense(100, activation='softmax')(x)
        day = Dense(10, activation='softmax')(x)
        
        model = Model([assigned_days, family_choices, family_sizes], [family, day])
        
        model.compile(loss=tf.keras.losses.Huber(),
                      optimizer=Adam(lr=self.learning_rate))
        return model

    def update_target_model(self):
        # copy weights from model to target_model
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        print(state)
#         if np.random.rand() <= self.epsilon:
#             return [np.random.uniform(0.0, 1.0, 100).tolist(), np.random.uniform(0.0, 1.0, 10).tolist()]
        act_values = self.model.predict(inputs=state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = self.model.predict(state)
            if done:
                target[0][action] = reward
            else:
                # a = self.model.predict(next_state)[0]
                t = self.target_model.predict(next_state)[0]
                target[0][action] = reward + self.gamma * np.amax(t)
                # target[0][action] = reward + self.gamma * t[np.argmax(a)]
            self.model.fit(state, target, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [10]:
episodes = 500
env = Workshop()
agent = DQNAgent()
# agent.load("./reinforcement-workshop.h5")
done = False
batch_size = 8

for e in range(episodes):
    state = env.reset()
    for time in range(500):
        # env.render()
        action = agent.act(state)
        family = np.argmax(action[0])
        day = np.argmax(action[1])
        next_state, reward, done = env.step(family, day)
#         reward = reward if not done else -10
#         next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            agent.update_target_model()
            print("episode: {}/{}, score: {}, e: {:.2}"
                  .format(e, episodes, reward, agent.epsilon))
            break
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)
    if e % 10 == 0:
        agent.save("./reinforcement-workshop.h5")

AttributeError: module 'tensorflow.keras.losses' has no attribute 'Huber'

In [None]:
family_size = np.array(list(family_size_dict.values()))
days_array = np.array(days)

penalties_array = 

choice_dict_num = [{vv:i for i, vv in enumerate(di.values())} for di in choice_dict.values()]
largest_choice_key = max(max(x.keys()) for x in choice_dict_num)
choice_array_num = np.array([
    [
        choice[n] if n in choice else - 1 
        for n in range(largest_choice_key+1)
    ] 
    for choice in choice_dict_num
])



In [None]:
best = submission['assigned_day'].values
start_score = cost_function(best, penalties_array, family_size, days_array)

# loop over each family
for fam_id in tqdm(range(len(best))):
    # loop over each family choice
    for pick in range(10):
        day = choice_dict[fam_id][f'choice_{pick}']
        temp = new.copy()
        temp[fam_id] = day # add in the new pick
        if cost_function(temp, penalties_array, family_size, days_array) < start_score:
            new = temp.copy()
            start_score = cost_function(new, penalties_array, family_size, days_array)

submission['assigned_day'] = new
score = cost_function(new, penalties_array, family_size, days_array)
submission.to_csv(f'submission_{score}.csv')
print(f'Score: {score}')