In [16]:
import numpy as np
import pandas as pd
from numba import njit
from tqdm import tqdm_notebook

In [2]:
data = pd.read_csv("../family_data.csv",index_col = 'family_id')
family_size = data.n_people.values.astype(np.int8)
penalties = np.asarray([
    [
        0,
        50,
        50 + 9 * n,
        100 + 9 * n,
        200 + 9 * n,
        200 + 18 * n,
        300 + 18 * n,
        300 + 36 * n,
        400 + 36 * n,
        500 + 36 * n + 199 * n,
        500 + 36 * n + 398 * n
    ] for n in range(family_size.max() + 1)
])

family_cost_matrix = np.concatenate(data.n_people.apply(lambda n: np.repeat(penalties[n, 10], 100).reshape(1, 100)))

for fam in data.index:
    for choice_order, day in enumerate(data.loc[fam].drop("n_people")):
        family_cost_matrix[fam, day - 1] = penalties[data.loc[fam, "n_people"], choice_order]
        
accounting_cost_matrix = np.zeros((500, 500))
for n in range(accounting_cost_matrix.shape[0]):
    for diff in range(accounting_cost_matrix.shape[1]):
        accounting_cost_matrix[n, diff] = max(0, (n - 125.0) / 400.0 * n**(0.5 + diff / 50.0))
        
def cost_function(prediction, family_size=family_size, family_cost_matrix=family_cost_matrix, accounting_cost_matrix=accounting_cost_matrix):
    N_DAYS = family_cost_matrix.shape[1]
    MAX_OCCUPANCY = 300
    MIN_OCCUPANCY = 125
    penalty = 0
    daily_occupancy = np.zeros(N_DAYS + 1, dtype=np.int16)
    for i, (pred, n) in enumerate(zip(prediction, family_size)):
        daily_occupancy[pred - 1] += n
        penalty += family_cost_matrix[i, pred - 1]

    accounting_cost = 0
    n_low = 0
    n_high = 0
    daily_occupancy[-1] = daily_occupancy[-2]
    for day in range(N_DAYS):
        n_next = daily_occupancy[day + 1]
        n = daily_occupancy[day]
        n_high += (n > MAX_OCCUPANCY)
        n_low += (n < MIN_OCCUPANCY)
        diff = abs(n - n_next)
        accounting_cost += accounting_cost_matrix[n, diff]

    return np.asarray([penalty, accounting_cost, n_low, n_high])


def score(prediction):
    fc, ac, l, h = cost_function(prediction, family_size, family_cost_matrix, accounting_cost_matrix)
    return (fc + ac) + (l + h) * 1000000

# input files

In [7]:
with open('santa_size.in',"w") as f:
    f.write(" ".join(map(str,family_size)))

In [13]:
with open('santa_cost.in',"w") as f:
    f.write(" ".join(map(str,family_cost_matrix.reshape(5000*100))))

In [21]:
with open('santa_acc_cost.in',"w") as f:
    f.write(" ".join(map(str,accounting_cost_matrix.reshape(500*500))))

In [10]:
with open('santa.out',"w") as f:
    f.write(" ".join(map(str,results)))

# output files

NameError: name 'results' is not defined

In [4]:
def get_penalty(n, choice):
    penalty = None
    if choice == 0:
        penalty = 0
    elif choice == 1:
        penalty = 50
    elif choice == 2:
        penalty = 50 + 9 * n
    elif choice == 3:
        penalty = 100 + 9 * n
    elif choice == 4:
        penalty = 200 + 9 * n
    elif choice == 5:
        penalty = 200 + 18 * n
    elif choice == 6:
        penalty = 300 + 18 * n
    elif choice == 7:
        penalty = 300 + 36 * n
    elif choice == 8:
        penalty = 400 + 36 * n
    elif choice == 9:
        penalty = 500 + 36 * n + 199 * n
    else:
        penalty = 500 + 36 * n + 398 * n
    return penalty


def GetPreferenceCostMatrix(data):
    cost_matrix = np.zeros((N_FAMILIES, N_DAYS), dtype=np.int64)
    for i in range(N_FAMILIES):
        desired = data.values[i, :-1]
        cost_matrix[i, :] = get_penalty(FAMILY_SIZE[i], 10)
        for j, day in enumerate(desired):
            cost_matrix[i, day-1] = get_penalty(FAMILY_SIZE[i], j)
    return cost_matrix


def GetAccountingCostMatrix():
    ac = np.zeros((1000, 1000), dtype=np.float64)
    for n in range(ac.shape[0]):
        for n_p1 in range(ac.shape[1]):
            diff = abs(n - n_p1)
            ac[n, n_p1] = max(0, (n - 125) / 400 * n**(0.5 + diff / 50.0))
    return ac

In [5]:
# cost_function, etc.

# preference cost
@njit(fastmath=True)
def pcost(prediction):
    daily_occupancy = np.zeros(N_DAYS+1, dtype=np.int64)
    penalty = 0
    for (i, p) in enumerate(prediction):
        n = FAMILY_SIZE[i]
        penalty += PCOSTM[i, p]
        daily_occupancy[p] += n
    return penalty, daily_occupancy


# accounting cost
@njit(fastmath=True)
def acost(daily_occupancy):
    accounting_cost = 0
    n_out_of_range = 0
    daily_occupancy[-1] = daily_occupancy[-2]
    for day in range(N_DAYS):
        n_p1 = daily_occupancy[day + 1]
        n    = daily_occupancy[day]
        n_out_of_range += (n > MAX_OCCUPANCY) or (n < MIN_OCCUPANCY)
        accounting_cost += ACOSTM[n, n_p1]
    return accounting_cost, n_out_of_range

@njit(fastmath=True)
def acostd(daily_occupancy):
    accounting_cost = np.zeros(N_DAYS, dtype=np.float64)
    n_out_of_range = 0
    daily_occupancy[-1] = daily_occupancy[-2]
    for day in range(N_DAYS):
        n_p1 = daily_occupancy[day + 1]
        n    = daily_occupancy[day]
        n_out_of_range += (n > MAX_OCCUPANCY) or (n < MIN_OCCUPANCY)
        accounting_cost[day] = ACOSTM[n, n_p1]
    return accounting_cost, n_out_of_range

@njit(fastmath=True)
def pcostd(prediction):
    daily_occupancy = np.zeros(N_DAYS+1, dtype=np.int64)
    penalty = np.empty_like(prediction)
    for (i, p) in enumerate(prediction):
        n = FAMILY_SIZE[i]
        penalty[i] = PCOSTM[i, p]
        daily_occupancy[p] += n
    return penalty, daily_occupancy

@njit(fastmath=True)
def cost_stats(prediction):
    penalty, daily_occupancy = pcostd(prediction)
    accounting_cost, n_out_of_range = acostd(daily_occupancy)
    return penalty, accounting_cost, n_out_of_range, daily_occupancy[:-1]

@njit(fastmath=True)
def cost_function(prediction):
    penalty, daily_occupancy = pcost(prediction)
    accounting_cost, n_out_of_range = acost(daily_occupancy)
    return penalty + accounting_cost + n_out_of_range*100000000

In [6]:
@njit(fastmath=True)
def cost_function_(prediction):
    penalty, daily_occupancy = pcost(prediction)
    accounting_cost, n_out_of_range = acost(daily_occupancy)
    return penalty + accounting_cost, n_out_of_range

@njit(fastmath=True)
def findAnotherDay4Fam(prediction, fam, occupancy):
    old_day = prediction[fam]
    best_cost = np.inf
    best_day = fam
    n = FAMILY_SIZE[fam]
    
    daysrange = list(range(0,old_day))+list(range(old_day+1,N_DAYS))
    for day in daysrange:
        prediction[fam] = day
        new_cost, _ = cost_function_(prediction)
        
        if (new_cost<best_cost) and (occupancy[day]+n<=MAX_OCCUPANCY):
            best_cost = new_cost
            best_day = day
            
    prediction[fam] = old_day
    return best_day, best_cost

@njit(fastmath=True)
def bestFamAdd(prediction, day, occupancy):
    best_cost = np.inf
    best_fam = prediction[day]
    for fam in np.where(prediction!=day)[0]:
        old_day = prediction[fam]
        prediction[fam] = day
        new_cost, _ = cost_function_(prediction)
        prediction[fam] = old_day
        n = FAMILY_SIZE[fam]
        if (new_cost<best_cost) and (occupancy[old_day]-n>=MIN_OCCUPANCY):
            best_cost = new_cost
            best_fam = fam   
    return best_fam

@njit(fastmath=True)
def bestFamRemoval(prediction, day, occupancy):
    best_cost = np.inf
    best_day = day
    
    for fam in np.where(prediction==day)[0]:
        new_day, new_cost = findAnotherDay4Fam(prediction, fam, occupancy)
        if new_cost<best_cost:
            best_cost = new_cost
            best_fam = fam
            best_day = new_day
            
    return best_fam, best_day

@njit(fastmath=True)
def fixMaxOccupancy(prediction):
    penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)

    for day in np.where(occupancy>MAX_OCCUPANCY)[0]:
        while occupancy[day]>MAX_OCCUPANCY:
            fam, new_day = bestFamRemoval(prediction, day, occupancy)
            prediction[fam] = new_day
            penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)
            
@njit(fastmath=True)            
def fixMinOccupancy(prediction):
    penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)

    for day in np.where(occupancy<MIN_OCCUPANCY)[0]:
        while occupancy[day]<MIN_OCCUPANCY:
            fam = bestFamAdd(prediction, day, occupancy)
            prediction[fam] = day
            penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)

In [7]:
@njit(fastmath=True)
def findBetterDay4Family(pred):
    fobs = np.argsort(FAMILY_SIZE)
    score = cost_function(pred)
    original_score = np.inf
    
    while original_score>score:
        original_score = score
        for family_id in fobs:
            for pick in range(10):
                day = DESIRED[family_id, pick]
                oldvalue = pred[family_id]
                pred[family_id] = day
                new_score = cost_function(pred)
                if new_score<score:
                    score = new_score
                else:
                    pred[family_id] = oldvalue

        print(score, end='\r')
    print(score)

In [8]:
N_DAYS = 100
N_FAMILIES = 5000
MAX_OCCUPANCY = 300
MIN_OCCUPANCY = 125

data = pd.read_csv('../family_data.csv', index_col='family_id')

FAMILY_SIZE = data.n_people.values
DESIRED     = data.values[:, :-1] - 1
PCOSTM = GetPreferenceCostMatrix(data) # Preference cost matrix
ACOSTM = GetAccountingCostMatrix()     # Accounting cost matrix

In [24]:
with open('santa.out',"r") as f:
    results = list(map(int,f.read().strip().split(" ")))
prediction = np.array(results)-1
cost_function(prediction)

300091716.4885751

In [25]:
penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)
print('{:.0f} , {:.0f} , {:.0f}, ({} , {})'.format(penalty.sum(), 
                                    accounting_cost.sum(), 
                                    cost_function(prediction),
                                    occupancy.min(), 
                                    occupancy.max()))

86062 , 5654 , 300091716, (124 , 300)


In [27]:
global_best = prediction
global_best_score = cost_function(global_best)
print(global_best_score)
previous_score = global_best_score
while True:
    for i in tqdm_notebook(range(5000)):
        for j in range(i+1,5000):
            global_best[i],global_best[j] = global_best[j],global_best[i]
            current_score = cost_function(global_best)
            if current_score<global_best_score:
                global_best_score = current_score
                print(current_score)
            else:
                global_best[i],global_best[j] = global_best[j],global_best[i]
    if previous_score>global_best_score:
        previous_score = global_best_score
    else:
        break

206027.54215475242


HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))

110554.33480823298
110504.33480823298
108397.6885570359
108347.6885570359
107771.00834948555
107648.07889634097
107198.88640185102
106811.14915216436
106650.63769106947
105418.63769106947
105311.10015941478
104575.60193755192
104475.60193755192
104456.7188783362
104355.6639108787
101775.59491878588
100913.5944433072
100659.5944433072
98250.56180827809
98249.53337675359
98134.31241259794
97994.31241259794
97897.36948194112
97889.76495406557
97871.76495406557
97823.5598556793
95643.5598556793
95561.06527736802
95482.94448953992
95439.546451011
95283.36094682121
95247.36094682121
95206.29647555033
95111.10715568216
95077.4701875136
94977.4701875136
94974.13289454376
94634.9190028061
94610.05557148452
94492.36545812094
94465.36545812094
94448.42685435596
94437.49451499508
94435.96171232684
94390.37592151883
94372.37592151883
94358.37592151883
94358.37586408583
94339.74608299982
94336.57887492799
94334.83729418555
94234.83729418555
94220.61788159536
94200.03732279826
94173.91600833702
94155

HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))

90923.0880120556
90900.66040296294
90890.64696973452
90848.45075719726
90796.61473575549
90791.20640286719
90786.03049533945
90656.58591927636
90655.57018447111
90653.27508552148
90615.26561495921
90615.25294609595
90606.17888763032
90602.94995773399
90575.94995773399
90574.62240112424
90516.44902229239
90516.185106061
90513.60602610589
90490.31599715525
90488.24292940175
90484.42332015611
90479.95593624262
90478.8347002126
90477.7055125982
90474.00541892019
90470.30721240127
90470.24067491505


HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))

90466.88737332623
90458.34574095836
90457.2938892466
90449.28236567971
90447.88837514709
90441.43668713985
90437.0239683827
90434.47422317526
90434.24268801397
90417.43408219003


HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))

90398.65777641445
90387.91990781021
90349.22196703638
90347.13058456816
90344.68692228664


HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))

In [26]:

fixMinOccupancy(prediction)
fixMaxOccupancy(prediction)
penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)
print('{:.0f} , {:.0f} , {:.0f} , ({} , {})'.format(penalty.sum(), 
                                    accounting_cost.sum(), 
                                    cost_function(prediction),
                                    occupancy.min(), 
                                    occupancy.max()))

93790 , 112238 , 206028 , (125 , 300)


optimium_cost =  67309.4786351117    
optimium_preference_cost =  61383.472062839195  
optimium_accouting_cost  =  5926.006572272518

In [12]:
daily_occupancy = np.zeros(101, dtype=np.int16)
for i,r in enumerate(prediction):
    daily_occupancy[r]+=family_size[i]
print(daily_occupancy)

[300 285 300 299 279 252 240 244 261 285 300 298 276 264 257 280 299 294
 274 246 223 237 258 285 299 291 271 256 245 247 275 268 243 208 178 148
 125 271 265 241 209 175 155 126 297 284 260 229 200 179 215 250 249 225
 191 157 125 219 247 233 202 161 125 127 125 249 220 183 139 125 125 126
 207 199 171 128 125 125 125 226 207 174 129 125 126 125 248 225 199 158
 125 127 125 225 206 172 126 125 125 126   0]


In [9]:
# sub = pd.read_csv("../sample_submission.csv")
# sub.assigned_day = results
# sub.to_csv(f"submission_localsolver_{score(results):.0f}.csv",index=False)

In [None]:
sub = pd.DataFrame(range(N_FAMILIES), columns=['family_id'])
sub['assigned_day'] = final+1
sub.to_csv('submission.csv', index=False)