In [82]:
# from numba import njit
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm_notebook as tqdm
import os
import random
from collections import deque
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 150)

family_data_path = './input/family_data.csv'
submission_path = './input/sample_submission.csv'

In [5]:
data = pd.read_csv(family_data_path)
submission = pd.read_csv(submission_path, index_col='family_id', dtype=np.uint16)

family_size_dict = data[['n_people']].to_dict()['n_people']

cols = ['choice_{}'.format(i) for i in range(10)]
choice_dict = data[cols].to_dict()

N_DAYS = 100
MAX_OCCUPANCY = 300
MIN_OCCUPANCY = 125

# from 100 to 1
days = list(range(N_DAYS,0,-1))

In [88]:
class Cost:
    def __init__(self, family_data_path = family_data_path):
        family = pd.read_csv(family_data_path)
        family_size_dict = family[['n_people']].to_dict()['n_people']
        
        self.family_size = np.array(list(family_size_dict.values()))
        self.penalties = self._penality_array(family_size_dict)
        self.choice_array_num = self._choice_array_num(family)
        self.days = list(range(100, 0, -1))
        
    def _penality_array(self, family_size_dict):
        return np.array([
                    [
                        0,
                        50,
                        50 + 9 * n,
                        100 + 9 * n,
                        200 + 9 * n,
                        200 + 18 * n,
                        300 + 18 * n,
                        300 + 36 * n,
                        400 + 36 * n,
                        500 + 36 * n + 199 * n,
                        500 + 36 * n + 398 * n
                    ]
                    for n in range(max(family_size_dict.values())+1)
                ])
    
    def _choice_array_num(self, family):
        cols = ['choice_{}'.format(i) for i in range(10)]
        choice_dict = family[cols].to_dict()
        choice_dict_num = [{i:vv for i, vv in enumerate(di.values())} for di in choice_dict.values()]
        largest_choice_key = max(max(x.keys()) for x in choice_dict_num)
        return np.array([
                [
                    choice[n] if n in choice else -1 
                    for n in range(largest_choice_key+1)
                ] 
                for choice in choice_dict_num
            ])
    
#     @njit
    def calculate(self, prediction):
        penalty = 0

        # We'll use this to count the number of people scheduled each day
        daily_occupancy = np.zeros((len(days)+1))
        N = self.family_size.shape[0]

        print(self.choice_array_num)
        # Looping over each family; d is the day, n is size of that family, 
        # and choice is their top choices
        for i in range(N):
            # add the family member count to the daily occupancy
            n = self.family_size[i]
            d = prediction[i]
            choice = self.choice_array_num[i]

            daily_occupancy[d] += n

            # Calculate the penalty for not getting top preference
            penalty += self.penalties[n, self.choice_array_num[i][d]]

        # for each date, check total occupancy
        #  (using soft constraints instead of hard constraints)
        relevant_occupancy = daily_occupancy[1:]
        incorrect_occupancy = np.any(
            (relevant_occupancy > MAX_OCCUPANCY) | 
            (relevant_occupancy < MIN_OCCUPANCY)
        )

        if incorrect_occupancy:
            penalty += 100000000

        # Calculate the accounting cost
        # The first day (day 100) is treated special
        init_occupancy = daily_occupancy[days[0]]
        accounting_cost = (init_occupancy - 125.0) / 400.0 * init_occupancy**(0.5)
        # using the max function because the soft constraints might allow occupancy to dip below 125
        accounting_cost = max(0, accounting_cost)

        # Loop over the rest of the days, keeping track of previous count
        yesterday_count = init_occupancy
        for day in self.days[1:]:
            today_count = daily_occupancy[day]
            diff = np.abs(today_count - yesterday_count)
            accounting_cost += max(0, (today_count - 125.0) / 400.0 * today_count**(0.5 + diff / 50.0))
            yesterday_count = today_count

        penalty += accounting_cost

        return penalty

In [89]:
class Workshop:
    def __init__(self):
        self.family_sizes = []
        self.family_choices = []
        self.assigned_days = []
        self.cost = Cost()
        self.done = False
            
    def reset(self):
        self._set_state()
        self.done = False
        return self._get_env_state()
    
    def step(self, family_index, day_choice_index):
        reward = -self.cost.calculate(self.assigned_days)
        self.assigned_days[family_index] = self.state[family_index, day_choice_index]
        return self._get_env_state(), reward, self.done
    
    def get_submission(self):
        submission = pd.read_csv(submission_path, index_col='family_id')
        score = self.cost.calculate(self.assigned_days)
        submission.to_csv('submission_{}.csv'.format(score))
        print('Score = {}'.format(score))
        
    def _set_state(self):
        family = pd.read_csv(family_data_path)
        choice_cols = ['choice_{}'.format(i) for i in range(10)]
        self.family_choices = np.array(family[choice_cols])
        self.family_sizes = np.array(family['n_people'])
        
        submission = pd.read_csv(submission_path, index_col='family_id')
        self.assigned_days = np.array(submission['assigned_day'])  
        
    def _get_env_state(self):
        return (self.assigned_days, self.family_choices, self.family_sizes)

In [90]:
workshop = Workshop()
workshop.reset()
workshop.get_submission()

[[ 82   5  12 ...  17  17  80]
 [ 33  11  27 ...  27  53  88]
 [ 52  26 100 ...  32  67  13]
 ...
 [ 75  47  82 ...  21  77  40]
 [ 28  61  33 ...   7  70  47]
 [ 38   4  54 ...  66  92  11]]


IndexError: index 61 is out of bounds for axis 1 with size 11

In [100]:
np.array([
                    [
                        0,
                        50,
                        50 + 9 * n,
                        100 + 9 * n,
                        200 + 9 * n,
                        200 + 18 * n,
                        300 + 18 * n,
                        300 + 36 * n,
                        400 + 36 * n,
                        500 + 36 * n + 199 * n,
                        500 + 36 * n + 398 * n
                    ]
                    for n in range(max(family_size_dict.values())+1)
                ])

array([[   0,   50,   50,  100,  200,  200,  300,  300,  400,  500,  500],
       [   0,   50,   59,  109,  209,  218,  318,  336,  436,  735,  934],
       [   0,   50,   68,  118,  218,  236,  336,  372,  472,  970, 1368],
       [   0,   50,   77,  127,  227,  254,  354,  408,  508, 1205, 1802],
       [   0,   50,   86,  136,  236,  272,  372,  444,  544, 1440, 2236],
       [   0,   50,   95,  145,  245,  290,  390,  480,  580, 1675, 2670],
       [   0,   50,  104,  154,  254,  308,  408,  516,  616, 1910, 3104],
       [   0,   50,  113,  163,  263,  326,  426,  552,  652, 2145, 3538],
       [   0,   50,  122,  172,  272,  344,  444,  588,  688, 2380, 3972]])

In [106]:
family = pd.read_csv(family_data_path)
cols = ['choice_{}'.format(i) for i in range(10)]
choice_dict = family[cols].to_dict()
# print(choice_dict)

[{{i:di[0] for i, vv in enumerate(di.values())} for i, di in enumerate(choice_dict.values())}]
# for i in range(5000):
#     choice_dict_num[i] = 


# choice_dict_num = [{vv:i for i, vv in enumerate(di.values())} for di in choice_dict.values()]
# print(choice_dict_num)
# largest_choice_key = max(max(x.keys()) for x in choice_dict_num)
# choices = np.array([
#         [
#             choice[n] if n in choice else -1 
#             for n in range(largest_choice_key+1)
#         ] 
#         for choice in choice_dict_num
#     ])
# choices[100]

[{0: 82, 1: 33, 2: 52, 3: 12, 4: 10, 5: 64, 6: 76, 7: 75, 8: 28, 9: 38}]

In [None]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.99
        self.learning_rate = 0.001
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

    """Huber loss for Q Learning
    References: https://en.wikipedia.org/wiki/Huber_loss
                https://www.tensorflow.org/api_docs/python/tf/losses/huber_loss
    """

    def _huber_loss(self, y_true, y_pred, clip_delta=1.0):
        error = y_true - y_pred
        cond  = tf.abs(error) <= clip_delta

        squared_loss = 0.5 * tf.square(error)
        quadratic_loss = 0.5 * tf.square(clip_delta) + clip_delta * (tf.abs(error) - clip_delta)

        return tf.mean(tf.where(cond, squared_loss, quadratic_loss))

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss=self._huber_loss,
                      optimizer=Adam(lr=self.learning_rate))
        return model

    def update_target_model(self):
        # copy weights from model to target_model
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = self.model.predict(state)
            if done:
                target[0][action] = reward
            else:
                # a = self.model.predict(next_state)[0]
                t = self.target_model.predict(next_state)[0]
                target[0][action] = reward + self.gamma * np.amax(t)
                # target[0][action] = reward + self.gamma * t[np.argmax(a)]
            self.model.fit(state, target, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [None]:
episode = 500
env = gym.make('CartPole-v1')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)
# agent.load("./save/cartpole-ddqn.h5")
done = False
batch_size = 32

for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    for time in range(500):
        # env.render()
        action = agent.act(state)
        next_state, reward, done = env.step(action)
        reward = reward if not done else -10
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            agent.update_target_model()
            print("episode: {}/{}, score: {}, e: {:.2}"
                  .format(e, EPISODES, time, agent.epsilon))
            break
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)
    # if e % 10 == 0:
    #     agent.save("./save/cartpole-ddqn.h5")

In [None]:
family_size = np.array(list(family_size_dict.values()))
days_array = np.array(days)

penalties_array = 

choice_dict_num = [{vv:i for i, vv in enumerate(di.values())} for di in choice_dict.values()]
largest_choice_key = max(max(x.keys()) for x in choice_dict_num)
choice_array_num = np.array([
    [
        choice[n] if n in choice else - 1 
        for n in range(largest_choice_key+1)
    ] 
    for choice in choice_dict_num
])



In [None]:
best = submission['assigned_day'].values
start_score = cost_function(best, penalties_array, family_size, days_array)

# loop over each family
for fam_id in tqdm(range(len(best))):
    # loop over each family choice
    for pick in range(10):
        day = choice_dict[fam_id][f'choice_{pick}']
        temp = new.copy()
        temp[fam_id] = day # add in the new pick
        if cost_function(temp, penalties_array, family_size, days_array) < start_score:
            new = temp.copy()
            start_score = cost_function(new, penalties_array, family_size, days_array)

submission['assigned_day'] = new
score = cost_function(new, penalties_array, family_size, days_array)
submission.to_csv(f'submission_{score}.csv')
print(f'Score: {score}')