In [1]:
import numpy as np
import pandas as pd
from numba import njit
from tqdm import tqdm_notebook

In [2]:
data = pd.read_csv("../../family_data.csv",index_col = 'family_id')
family_size = data.n_people.values.astype(np.int8)
penalties = np.asarray([
    [
        0,
        50,
        50 + 9 * n,
        100 + 9 * n,
        200 + 9 * n,
        200 + 18 * n,
        300 + 18 * n,
        300 + 36 * n,
        400 + 36 * n,
        500 + 36 * n + 199 * n,
        500 + 36 * n + 398 * n
    ] for n in range(family_size.max() + 1)
])

# penalties = np.asarray([
#     [
#         0,
#         50,
#         50 + 9 * n,
#         100 + 9 * n,
#         200 + 9 * n,
#         1000000,
#         1000000,
#         1000000,
#         1000000,
#         1000000,
#         1000000

#     ] for n in range(family_size.max() + 1)
# ])

family_cost_matrix = np.concatenate(data.n_people.apply(lambda n: np.repeat(penalties[n, 10], 100).reshape(1, 100)))

for fam in data.index:
    for choice_order, day in enumerate(data.loc[fam].drop("n_people")):
        family_cost_matrix[fam, day - 1] = penalties[data.loc[fam, "n_people"], choice_order]


# family_cost_matrix = np.zeros((5000,4),dtype=np.uint8)
# for fam in data.index:
#     for choice_order,day in enumerate(data.loc[fam].drop("n_people")[:4]):
#         family_cost_matrix[fam, choice_order] = penalties[data.loc[fam, "n_people"], choice_order]


        
accounting_cost_matrix = np.zeros((500, 500),dtype=int64)
for n in range(accounting_cost_matrix.shape[0]):
    for diff in range(accounting_cost_matrix.shape[1]):
        accounting_cost_matrix[n, diff] = max(0, int((n - 125.0) / 400.0 * n**(0.5 + diff / 50.0)))
        
def cost_function(prediction, family_size=family_size, family_cost_matrix=family_cost_matrix, accounting_cost_matrix=accounting_cost_matrix):
    N_DAYS = family_cost_matrix.shape[1]
    MAX_OCCUPANCY = 300
    MIN_OCCUPANCY = 125
    penalty = 0
    daily_occupancy = np.zeros(N_DAYS + 1, dtype=np.int16)
    for i, (pred, n) in enumerate(zip(prediction, family_size)):
        daily_occupancy[pred - 1] += n
        penalty += family_cost_matrix[i, pred - 1]

    accounting_cost = 0
    n_low = 0
    n_high = 0
    daily_occupancy[-1] = daily_occupancy[-2]
    for day in range(N_DAYS):
        n_next = daily_occupancy[day + 1]
        n = daily_occupancy[day]
        n_high += (n > MAX_OCCUPANCY)
        n_low += (n < MIN_OCCUPANCY)
        diff = abs(n - n_next)
        accounting_cost += accounting_cost_matrix[n, diff]

    return np.asarray([penalty, accounting_cost, n_low, n_high])


def score(prediction):
    fc, ac, l, h = cost_function(prediction, family_size, family_cost_matrix, accounting_cost_matrix)
    return (fc + ac) + (l + h) * 1000000

NameError: name 'int64' is not defined

In [21]:
accounting_cost_matrix = np.zeros((500, 500),dtype=int)
for n in range(accounting_cost_matrix.shape[0]):
    for diff in range(accounting_cost_matrix.shape[1]):
        temp = (n - 125.0) / 400.0 * n**(0.5 + diff / 50.0)
        if temp>1000000:
            temp=1000000
        accounting_cost_matrix[n, diff] = max(0, int(temp))

In [22]:
accounting_cost_matrix[280,280]

1000000

# input files

In [7]:
with open('santa_size.in',"w") as f:
    f.write(" ".join(map(str,family_size)))

In [36]:
with open('santa_cost.in',"w") as f:
    f.write(" ".join(map(str,family_cost_matrix.reshape(5000*100))))

In [26]:
with open('santa_choice.in',"w") as f:
    f.write(" ".join(map(str,family_choice_matrix.reshape(5000*4))))

In [21]:
with open('santa_acc_cost.in',"w") as f:
    f.write(" ".join(map(str,accounting_cost_matrix.reshape(500*500))))

In [23]:
with open('santa_acc_cost_round.in',"w") as f:
    f.write(" ".join(map(str,accounting_cost_matrix.reshape(500*500))))

In [7]:
results = pd.read_csv("submission69880.csv",index_col = 'family_id')
penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(results.assigned_day.values-1)
print('{:.0f} , {:.0f} , {:.0f}, ({} , {}) , {}'.format(penalty.sum(), 
                                    accounting_cost.sum(), 
                                    cost_function(results.assigned_day.values-1),
                                    occupancy.min(), 
                                    occupancy.max(),
                                    n_out_of_range))

64356 , 5524 , 69880, (125 , 300) , 0


In [8]:
with open('santa.out',"w") as f:
    f.write(" ".join(map(str,results.assigned_day.values)))

# output files

In [4]:
def get_penalty(n, choice):
    penalty = None
    if choice == 0:
        penalty = 0
    elif choice == 1:
        penalty = 50
    elif choice == 2:
        penalty = 50 + 9 * n
    elif choice == 3:
        penalty = 100 + 9 * n
    elif choice == 4:
        penalty = 200 + 9 * n
    elif choice == 5:
        penalty = 200 + 18 * n
    elif choice == 6:
        penalty = 300 + 18 * n
    elif choice == 7:
        penalty = 300 + 36 * n
    elif choice == 8:
        penalty = 400 + 36 * n
    elif choice == 9:
        penalty = 500 + 36 * n + 199 * n
    else:
        penalty = 500 + 36 * n + 398 * n
    return penalty


def GetPreferenceCostMatrix(data):
    cost_matrix = np.zeros((N_FAMILIES, N_DAYS), dtype=np.int64)
    for i in range(N_FAMILIES):
        desired = data.values[i, :-1]
        cost_matrix[i, :] = get_penalty(FAMILY_SIZE[i], 10)
        for j, day in enumerate(desired):
            cost_matrix[i, day-1] = get_penalty(FAMILY_SIZE[i], j)
    return cost_matrix


def GetAccountingCostMatrix():
    ac = np.zeros((1000, 1000), dtype=np.float64)
    for n in range(ac.shape[0]):
        for n_p1 in range(ac.shape[1]):
            diff = abs(n - n_p1)
            ac[n, n_p1] = max(0, (n - 125) / 400 * n**(0.5 + diff / 50.0))
    return ac

# cost_function, etc.

# preference cost
@njit(fastmath=True)
def pcost(prediction):
    daily_occupancy = np.zeros(N_DAYS+1, dtype=np.int64)
    penalty = 0
    for (i, p) in enumerate(prediction):
        n = FAMILY_SIZE[i]
        penalty += PCOSTM[i, p]
        daily_occupancy[p] += n
    return penalty, daily_occupancy


# accounting cost
@njit(fastmath=True)
def acost(daily_occupancy):
    accounting_cost = 0
    n_out_of_range = 0
    daily_occupancy[-1] = daily_occupancy[-2]
    for day in range(N_DAYS):
        n_p1 = daily_occupancy[day + 1]
        n    = daily_occupancy[day]
        n_out_of_range += (n > MAX_OCCUPANCY) or (n < MIN_OCCUPANCY)
        accounting_cost += ACOSTM[n, n_p1]
    return accounting_cost, n_out_of_range

@njit(fastmath=True)
def acostd(daily_occupancy):
    accounting_cost = np.zeros(N_DAYS, dtype=np.float64)
    n_out_of_range = 0
    daily_occupancy[-1] = daily_occupancy[-2]
    for day in range(N_DAYS):
        n_p1 = daily_occupancy[day + 1]
        n    = daily_occupancy[day]
        n_out_of_range += (n > MAX_OCCUPANCY) or (n < MIN_OCCUPANCY)
        accounting_cost[day] = ACOSTM[n, n_p1]
    return accounting_cost, n_out_of_range

@njit(fastmath=True)
def pcostd(prediction):
    daily_occupancy = np.zeros(N_DAYS+1, dtype=np.int64)
    penalty = np.empty_like(prediction)
    for (i, p) in enumerate(prediction):
        n = FAMILY_SIZE[i]
        penalty[i] = PCOSTM[i, p]
        daily_occupancy[p] += n
    return penalty, daily_occupancy

@njit(fastmath=True)
def cost_stats(prediction):
    penalty, daily_occupancy = pcostd(prediction)
    accounting_cost, n_out_of_range = acostd(daily_occupancy)
    return penalty, accounting_cost, n_out_of_range, daily_occupancy[:-1]

@njit(fastmath=True)
def cost_function(prediction):
    penalty, daily_occupancy = pcost(prediction)
    accounting_cost, n_out_of_range = acost(daily_occupancy)
    return penalty + accounting_cost + n_out_of_range*100000000

@njit(fastmath=True)
def cost_function_(prediction):
    penalty, daily_occupancy = pcost(prediction)
    accounting_cost, n_out_of_range = acost(daily_occupancy)
    return penalty + accounting_cost, n_out_of_range

@njit(fastmath=True)
def findAnotherDay4Fam(prediction, fam, occupancy):
    old_day = prediction[fam]
    best_cost = np.inf
    best_day = fam
    n = FAMILY_SIZE[fam]
    
    daysrange = list(range(0,old_day))+list(range(old_day+1,N_DAYS))
    for day in daysrange:
        prediction[fam] = day
        new_cost, _ = cost_function_(prediction)
        
        if (new_cost<best_cost) and (occupancy[day]+n<=MAX_OCCUPANCY):
            best_cost = new_cost
            best_day = day
            
    prediction[fam] = old_day
    return best_day, best_cost

@njit(fastmath=True)
def bestFamAdd(prediction, day, occupancy):
    best_cost = np.inf
    best_fam = prediction[day]
    for fam in np.where(prediction!=day)[0]:
        old_day = prediction[fam]
        prediction[fam] = day
        new_cost, _ = cost_function_(prediction)
        prediction[fam] = old_day
        n = FAMILY_SIZE[fam]
        if (new_cost<best_cost) and (occupancy[old_day]-n>=MIN_OCCUPANCY):
            best_cost = new_cost
            best_fam = fam   
    return best_fam

@njit(fastmath=True)
def bestFamRemoval(prediction, day, occupancy):
    best_cost = np.inf
    best_day = day
    
    for fam in np.where(prediction==day)[0]:
        new_day, new_cost = findAnotherDay4Fam(prediction, fam, occupancy)
        if new_cost<best_cost:
            best_cost = new_cost
            best_fam = fam
            best_day = new_day
            
    return best_fam, best_day

@njit(fastmath=True)
def fixMaxOccupancy(prediction):
    penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)

    for day in np.where(occupancy>MAX_OCCUPANCY)[0]:
        while occupancy[day]>MAX_OCCUPANCY:
            fam, new_day = bestFamRemoval(prediction, day, occupancy)
            prediction[fam] = new_day
            penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)
            
@njit(fastmath=True)            
def fixMinOccupancy(prediction):
    penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)

    for day in np.where(occupancy<MIN_OCCUPANCY)[0]:
        while occupancy[day]<MIN_OCCUPANCY:
            fam = bestFamAdd(prediction, day, occupancy)
            prediction[fam] = day
            penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)

@njit(fastmath=True)
def findBetterDay4Family(pred):
    fobs = np.argsort(FAMILY_SIZE)
    score = cost_function(pred)
    original_score = np.inf
    
    while original_score>score:
        original_score = score
        for family_id in fobs:
            for pick in range(10):
                day = DESIRED[family_id, pick]
                oldvalue = pred[family_id]
                pred[family_id] = day
                new_score = cost_function(pred)
                if new_score<score:
                    score = new_score
                else:
                    pred[family_id] = oldvalue

        print(score, end='\r')
    print(score)

N_DAYS = 100
N_FAMILIES = 5000
MAX_OCCUPANCY = 300
MIN_OCCUPANCY = 125

data = pd.read_csv('../../family_data.csv', index_col='family_id')

FAMILY_SIZE = data.n_people.values
DESIRED     = data.values[:, :-1] - 1
PCOSTM = GetPreferenceCostMatrix(data) # Preference cost matrix
ACOSTM = GetAccountingCostMatrix()     # Accounting cost matrix

# fix first choice

In [3]:
daily_occupancy = np.zeros(101, dtype=np.int16)
for i,r in enumerate(data.choice_0.values):
    daily_occupancy[r]+=FAMILY_SIZE[i]
print(daily_occupancy)

[   0 1576  202  412  379  358  132  166  160  163  362  393  437  158
  209  172  191  433  312  356  155  126  162  196  339  418  370  232
  145  191  127  357  358  379  106  104  118   96  289  316  304  124
  133  142  102  350  303  326  167  130   92  150  338  252  330   93
  109  102  138  350  320  250   48   49   31   44  310  275  235   55
   34   50   46  240  217  267   35   57   50   39  228  312  252   54
   56   69   54  321  299  302   59   75   28   43  255  259  294   45
   56   57   43]


In [13]:
data.head()

Unnamed: 0_level_0,choice_0,choice_1,choice_2,choice_3,choice_4,choice_5,choice_6,choice_7,choice_8,choice_9,n_people
family_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,52,38,12,82,33,75,64,76,10,28,4
1,26,4,82,5,11,47,38,6,66,61,4
2,100,54,25,12,27,82,10,89,80,33,3
3,2,95,1,96,32,6,40,31,9,59,2
4,53,1,47,93,26,3,46,16,42,39,4


In [5]:
daily_occupancy = np.zeros(101, dtype=np.int16)
for _,i in data[(data.n_people==2)|(data.n_people==8)].reset_index().loc[:,["family_id","choice_0","n_people"]].iterrows():
    daily_occupancy[i["choice_0"]]+=i["n_people"]
print(daily_occupancy)
print(daily_occupancy.sum())

[  0 280  30  62  52  36   4  24   8  14  44  70  42  26  26  16  24  50
  44  48  26  20  18  12  22  58  48  34  20  42  18  42  66  36   4  12
  18  10  30  36  40   8   0  22  10  66  38  54  12  28   2   6  38  26
  74  12  14   8   6  56  36  18   0   8   2   6  34  26  24   2   4   8
   4  36  14  12   6   8   6   2  38  70  36  10   4   4   0  44  36  24
  10  12   2   6  50  10  56  14   0  12   4]
2690


In [None]:
152
168
210
285
456
177
128

In [7]:
data[(data.n_people==2)|(data.n_people==8)].reset_index().loc[:,["family_id","choice_0"]].values.shape

(874, 2)

In [8]:
with open("santa_first_choice.in","w") as f:
    f.write(" ".join(map(str,data[(data.n_people==2)|(data.n_people==8)].reset_index().loc[:,["family_id","choice_0"]].values.reshape(874*2))))

In [100]:
daily_occupancy = np.zeros(101, dtype=np.int16)
for i,r in enumerate(data.loc[data.n_people==7,"choice_0"].values):
    daily_occupancy[r]+=FAMILY_SIZE[i]
print(daily_occupancy)

[ 0 98 15 25 24 23 23  8 11 23 23 29 42  6 17  6  3 17 13 19  6  7  8 12
 19 15 11 30 10  7 10 26 35 10  2  4  5 13 29 40 22  4  2  9  4 17 18 23
 14 10  9  4 23  0 15  3  5  7  2  9  9 16  4  0  0  0 21 17  2  0  7  0
  9 12  7  7  6  0  0  4 15 20 14  6  0  5  6 25 17 12  7 13  0  0  9 15
 21  2  4  0  5]


# check missing

In [31]:
with open('proba.out',"r") as f:
    proba = list(map(float,f.read().strip().split(" ")))

In [46]:
proba_results = []
for i in range(0,5000*100,100):
    proba_results.append(np.argmax(proba[i:i+100]))
prediction = np.array(proba_results)

In [9]:
with open('santa.out',"r") as f:
    results = list(map(int,f.read().strip().split(" ")))
prediction = np.array(results)-1
cost_function(prediction)

70621.9599671893

In [10]:
penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)
print('{:.0f} , {:.0f} , {:.0f}, ({} , {}) , {}'.format(penalty.sum(), 
                                    accounting_cost.sum(), 
                                    cost_function(prediction),
                                    occupancy.min(), 
                                    occupancy.max(),
                                    n_out_of_range))

63589 , 7033 , 70622, (125 , 300) , 0


In [8]:
daily_occupancy = np.zeros(101, dtype=np.int16)
for i,r in enumerate(prediction):
    daily_occupancy[r]+=FAMILY_SIZE[i]
print(daily_occupancy)

[333 312 312 300 288 264 247 253 274 296 302 296 275 271 255 276 297 293
 278 253 235 241 264 293 310 298 280 268 255 255 280 279 256 221 193 172
 209 242 263 248 221 194 196 220 256 274 259 236 210 195 228 258 249 222
 187 149 123 214 241 218 184 139  89  88  62 246 214 174 125  67  79  86
 239 222 188 146 118 122  92 220 214 182 137 105 122 125 255 234 203 161
 123  88  72 226 208 176 130 106  83  66   0]


In [49]:
global_best = prediction
global_best_score = cost_function(global_best)
print(global_best_score)
previous_score = global_best_score
while True:
    for i in tqdm_notebook(range(5000)):
        for j in range(i+1,5000):
            global_best[i],global_best[j] = global_best[j],global_best[i]
            current_score = cost_function(global_best)
            if current_score<global_best_score:
                global_best_score = current_score
                print(current_score)
            else:
                global_best[i],global_best[j] = global_best[j],global_best[i]
    if previous_score>global_best_score:
        previous_score = global_best_score
    else:
        break

80970.23752186703


HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))

80867.55735512116
80780.32004766123
80694.04143486127


KeyboardInterrupt: 

In [48]:

fixMinOccupancy(prediction)
fixMaxOccupancy(prediction)
penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(prediction)
print('{:.0f} , {:.0f} , {:.0f} , ({} , {})'.format(penalty.sum(), 
                                    accounting_cost.sum(), 
                                    cost_function(prediction),
                                    occupancy.min(), 
                                    occupancy.max()))

67471 , 13499 , 80970 , (125 , 300)


optimium_cost =  67309.4786351117    
optimium_preference_cost =  61383.472062839195  
optimium_accouting_cost  =  5926.006572272518

In [12]:
daily_occupancy = np.zeros(101, dtype=np.int16)
for i,r in enumerate(prediction):
    daily_occupancy[r]+=family_size[i]
print(daily_occupancy)

[300 285 300 299 279 252 240 244 261 285 300 298 276 264 257 280 299 294
 274 246 223 237 258 285 299 291 271 256 245 247 275 268 243 208 178 148
 125 271 265 241 209 175 155 126 297 284 260 229 200 179 215 250 249 225
 191 157 125 219 247 233 202 161 125 127 125 249 220 183 139 125 125 126
 207 199 171 128 125 125 125 226 207 174 129 125 126 125 248 225 199 158
 125 127 125 225 206 172 126 125 125 126   0]


In [10]:
df = pd.read_csv("submission.csv")
daily_occupancy = np.zeros(101, dtype=np.int16)
for i,r in enumerate(df.assigned_day.values-1):
    daily_occupancy[r]+=family_size[i]
print(daily_occupancy)

[300 285 300 300 283 257 243 252 272 298 300 295 274 256 247 266 286 284
 267 241 217 230 254 281 299 289 269 253 245 246 274 273 252 222 193 166
 195 230 253 236 206 176 188 211 249 262 245 219 196 179 213 246 249 226
 190 160 125 220 248 231 201 161 125 127 125 235 208 174 131 125 125 125
 225 206 176 131 125 127 125 224 206 174 129 126 126 125 249 225 195 153
 125 127 125 220 202 171 125 126 126 125   0]


In [36]:
penalty, accounting_cost, n_out_of_range, occupancy = cost_stats(df.assigned_day.values-1)
print('{:.0f} , {:.0f} , {:.0f}, ({} , {}) , {}'.format(penalty.sum(), 
                                    accounting_cost.sum(), 
                                    cost_function(df.assigned_day.values-1),
                                    occupancy.min(), 
                                    occupancy.max(),
                                    n_out_of_range))

66648 , 5041 , 71689, (125 , 300) , 0


In [20]:
# sub = pd.read_csv("../sample_submission.csv")
# sub.assigned_day = results
# sub.to_csv(f"submission_localsolver_{score(results):.0f}.csv",index=False)

In [None]:
sub = pd.DataFrame(range(N_FAMILIES), columns=['family_id'])
sub['assigned_day'] = final+1
sub.to_csv('submission.csv', index=False)