## About this kernel

In this kernel I implemented "Cost function accelerated by GPU".
My cost function and Stochastic Product Search algorigthm performs in 22 µs per iter.
To improve parallel efficiency, my cost function uses batches where bigger batch size enable more performance.
The maximum performance is 12 µs per iter when batch size is 4096(8^4).

Running this code on GPU will lead to a 5x speed improvement compared to the CPU.

### Reference
* Simple but nice optimization algorithm: https://www.kaggle.com/xhlulu/santa-s-2019-stochastic-product-search






In [None]:
import numpy as np
import cupy as cp
import pandas as pd

from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
%matplotlib inline

from IPython.display import display, clear_output
from itertools import product


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


## Read in the family information and sample submission

In [None]:
fpath = '/kaggle/input/santa-2019-workshop-scheduling/family_data.csv'
data = pd.read_csv(fpath, index_col='family_id')
fpath = '/kaggle/input/santa-2019-workshop-scheduling/sample_submission.csv'
submission = pd.read_csv(fpath, index_col='family_id')

In [None]:
N_DAYS = 100
MAX_OCCUPANCY = 300
MIN_OCCUPANCY = 125
N_FAMILY = 5000


## Cost Function (Using GPU Code)



In [None]:
class CostFunction(object):
    
    '''
    B ... BATCH
    F ... N_FAMILY
    D ... N_DAYS
    
    '''
    def __init__(self, data, np=np):
        self.np = np
        family_size_array = np.array(data['n_people'], dtype=np.int32)
        penalty_table = np.ones((N_FAMILY, N_DAYS+1), dtype=np.int32)*500
        family_penalty_table = np.ones((N_FAMILY, N_DAYS+1), dtype=np.int32)*(36+398)

        for i, x in enumerate(data['choice_0']):
            penalty_table[i, x] = 0
            family_penalty_table[i, x] = 0

        for i, x in enumerate(data['choice_1']):
            penalty_table[i, x] = 50
            family_penalty_table[i, x] = 0

        for i, x in enumerate(data['choice_2']):
            penalty_table[i, x] = 50
            family_penalty_table[i, x] = 9

        for i, x in enumerate(data['choice_3']):
            penalty_table[i, x] = 100
            family_penalty_table[i, x] = 9

        for i, x in enumerate(data['choice_4']):
            penalty_table[i, x] = 200
            family_penalty_table[i, x] = 9

        for i, x in enumerate(data['choice_5']):
            penalty_table[i, x] = 200
            family_penalty_table[i, x] = 18

        for i, x in enumerate(data['choice_6']):
            penalty_table[i, x] = 300
            family_penalty_table[i, x] = 18

        for i, x in enumerate(data['choice_7']):
            penalty_table[i, x] = 300
            family_penalty_table[i, x] = 36

        for i, x in enumerate(data['choice_8']):
            penalty_table[i, x] = 400
            family_penalty_table[i, x] = 36

        for i, x in enumerate(data['choice_9']):
            penalty_table[i, x] = 500
            family_penalty_table[i, x] = 36+199

        self.family_size_array = family_size_array.copy()
        self.penalty_array = penalty_table.reshape((-1,))
        self.family_penalty_array = family_penalty_table.reshape((-1, ))
        self.index_shift = np.arange(0, 5000, dtype=np.int32)*(N_DAYS+1)
        self.day_shift = None
        
    def cost_function(self, prediction):
        np = self.np
        prediction_index = prediction + self.index_shift
        penalty = self.penalty_array[prediction_index].sum()
        family_penalty = (self.family_penalty_array[prediction_index]*self.family_size_array).sum()
        n = np.bincount(prediction, self.family_size_array)
        n[0] = n[100]
        accounting_penalty = np.maximum(0, n-125)/400.*(n**(0.5+np.abs(n-np.roll(n,-1))/50.))
        accounting_penalty[0] = 0
        lower_penalty = np.maximum(0, 125-n)
        upper_penalty = np.maximum(0, n-300)
        lower_penalty[0] = 0
        upper_penalty[0] = 0
        loss = penalty+family_penalty+accounting_penalty.sum()+(lower_penalty.sum()+upper_penalty.sum())*1000000000
        return loss
    
    def cost_function_batch(self, predictions):
        np = self.np
        # B, F
        predictions_index = predictions + self.index_shift[None]
        # B, F
        penalty = np.sum(self.penalty_array[predictions_index], axis=1)
        # B, 
        family_penalty = np.sum(self.family_penalty_array[predictions_index]*self.family_size_array[None], axis=1)
        # B,
        days = predictions + self.day_shift[:, None]
        family_size_arrays = np.broadcast_to(self.family_size_array[None], days.shape)
        # B, F
        days = days.reshape((-1, ))
        family_size_arrays = family_size_arrays.reshape((-1, ))
        # B*F
        ns = np.bincount(days, family_size_arrays)
        ns = np.reshape(ns, (-1, N_DAYS+1))
        # B, D
        ns[:, 0] = ns[:, 100]
        accounting_penalty = np.maximum(0, ns-125)/400.*(ns**(0.5+np.abs(ns-np.roll(ns,-1, axis=1))/50.))
        accounting_penalty[:, 0] = 0
        accounting_penalty = np.sum(accounting_penalty, axis=1)
        lower_penalty = np.maximum(0, 125-ns)
        upper_penalty = np.maximum(0, ns-300)
        lower_penalty[:, 0] = 0
        upper_penalty[:, 0] = 0
        lower_penalty = np.sum(lower_penalty, axis=1)
        upper_penalty = np.sum(upper_penalty, axis=1)
        loss = penalty+family_penalty+accounting_penalty+(lower_penalty+upper_penalty)*1000000000
        return loss

    def set_batch_size(self, bs):
        np = self.np
        self.day_shift = np.arange(0, bs, dtype=np.int32)*(N_DAYS+1)

In [None]:
family_size_dict = data[['n_people']].to_dict()['n_people']

cols = [f'choice_{i}' for i in range(10)]
choice_list = [data[x].tolist() for x in cols]
choice_array = np.array(choice_list, dtype=np.int32)
choice_matrix = choice_array.T
cost_function = CostFunction(data, np=cp)



## Initialization



In [None]:
idx = np.arange(5000, dtype=np.int32)
n = np.zeros((101, ), dtype=np.int32)
init_ans = np.zeros((5000,), dtype=np.int32)
family_size_array = np.array(data['n_people'], dtype=np.int32)
border = 300
maximum_choice = 4

for border in range(120, 310, 10):
    for t in range(maximum_choice+1):
        for i in range(t):
            choices = np.array(data['choice_{}'.format(i)].tolist())
            now = np.bincount(choices[idx], family_size_array[idx], 101)
            nxt = n + now
            next_idx = []
            for x in idx:
                if nxt[choices[x]] <= border:
                    init_ans[x] = choices[x]
                else:
                    next_idx.append(x)

            idx = np.array(next_idx, dtype=np.int32)
            for i in range(101):
                if nxt[i] <= border:
                    n[i] = nxt[i]


In [None]:
init_ans = cp.array(init_ans)
cost = cost_function.cost_function(init_ans)
print(cost)
cost_function.set_batch_size(1)
costs = cost_function.cost_function_batch(init_ans[None])
print(costs)
best = init_ans.copy()

## Optimization

In [None]:
def stochastic_product_search(top_k, fam_size, original, choice_matrix, 
                              cost_function, disable_tqdm=False, verbose=10000,
                              n_iter=500, random_state=2019, np=np, tnp=np):
    """
    original (np.array): The original day assignments.
    
    At every iterations, randomly sample fam_size families. Then, given their top_k
    choices, compute the Cartesian product of the families' choices, and compute the
    score for each of those top_k^fam_size products.
    """
    
    best = original.copy()
    best_score = cost_function.cost_function(best)
    
    np.random.seed(random_state)
    cost_function.set_batch_size(top_k**fam_size)


    for i in tqdm(range(n_iter), disable=disable_tqdm):
        fam_indices = tnp.random.choice(range(choice_matrix.shape[0]), size=fam_size)
        changes = np.array(list(product(*choice_matrix[fam_indices, :top_k].tolist())))

        fam_indices = np.array(fam_indices)
        new = best.copy()
        news = np.tile(new, len(changes))
        fam_indices = fam_indices[None]
        shift = (np.arange(0, len(changes), dtype=np.int32)*5000)[:, None]
        fam_indices = fam_indices+shift
        news[fam_indices] = changes
        news = news.reshape((-1, N_FAMILY))
        costs = cost_function.cost_function_batch(news)
        if best_score > np.min(costs):
            best = news[np.argmin(costs)].copy()
            best_score = np.min(costs)

    
        if verbose and i % verbose == 0:
            tnp_best_score = float(best_score)
            print(f"Iteration #{i}: Best score is {tnp_best_score:.2f}")
    
    tnp_best_score = float(best_score)
    print(f"Final best score is {tnp_best_score:.2f}")
    return best

In [None]:
best = cp.array(init_ans)

In [None]:
best = stochastic_product_search(
    choice_matrix=choice_matrix, 
    top_k=2,
    fam_size=8, 
    original=best, 
    n_iter=1000000,
    disable_tqdm=True,
    verbose=50000,
    np=cp,
    cost_function=cost_function
)

In [None]:
best = stochastic_product_search(
    choice_matrix=choice_matrix, 
    top_k=2,
    fam_size=12, 
    original=best, 
    n_iter=50000,
    disable_tqdm=True,
    verbose=5000,
    np=cp,
    cost_function=cost_function
)

In [None]:
best = stochastic_product_search(
    choice_matrix=choice_matrix, 
    top_k=4,
    fam_size=6, 
    original=best, 
    n_iter=50000,
    disable_tqdm=True,
    verbose=5000,
    np=cp,
    cost_function=cost_function
)

In [None]:
best = stochastic_product_search(
    choice_matrix=choice_matrix, 
    top_k=8,
    fam_size=4, 
    original=best, 
    n_iter=100000,
    disable_tqdm=True,
    verbose=5000,
    np=cp,
    cost_function=cost_function
)

In [None]:
submission['assigned_day'] = cp.asnumpy(best)
final_score = cost_function.cost_function(best)
submission.to_csv(f'submission_{final_score}.csv')