In [137]:
import pandas as pd
import numpy as np
import json
import pprint

def haversine_np(lon1, lat1, lon2, lat2, scaler=1.4):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    
    All args must be of equal length.    
    
    """
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    
    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
    
    c = 2 * np.arcsin(np.sqrt(a))
    km = 6378.137 * c
    return np.round(km * 1000 * scaler, decimals=0).astype('int')




def gen_problem_instance(raw_order_data_file, K=100, rnd_seed=1, sampling_time_span_seconds=3600, buffer_min=20, file_prefix='TEST', instance_name='1', **other_options):

    # raw_order_data_file = './opt_challenge_test_data_order.csv'
    # FILE_PREFIX = 'TEST'
    # K = 100
    # rnd_seed = 1

    raw_order_df = pd.read_csv(raw_order_data_file, parse_dates=['ord_date'])

    earliest_date = raw_order_df.ord_date.min()

    raw_order_df['ord_time_sec'] = (raw_order_df.ord_date - earliest_date).dt.total_seconds().astype('int')
    raw_order_df['cook_time_sec'] = (raw_order_df.cook_time * 60).astype('int')
    raw_order_df['capacity_bike'] = (raw_order_df.capacity_bike * 100).astype('int')


    while True:
        sampling_start_time = np.random.randint(0, raw_order_df['ord_time_sec'].max() - sampling_time_span_seconds)
        sampling_end_time = sampling_start_time + sampling_time_span_seconds

        sampling_raw_order_df = raw_order_df[(raw_order_df['ord_time_sec'] >= sampling_start_time) & (raw_order_df['ord_time_sec'] <= sampling_end_time)]

        if len(sampling_raw_order_df) >= K * 2: # We should have enough sampling orders to choose from
            break

    sampling_raw_order_df = sampling_raw_order_df.assign(ord_time_sec = sampling_raw_order_df.ord_time_sec - sampling_raw_order_df.ord_time_sec.min())


    BUFFER_TIME_SEC = buffer_min * 60   # deadline = earliest delivery time + buffer time

    BIKE_SERVICE_TIME_SEC = other_options.get('BIKE_SERVICE_TIME_SEC', 120)     # 2 minutes (bikes)
    WALK_SERVICE_TIME_SEC = other_options.get('WALK_SERVICE_TIME_SEC', 120)     # 2 minutes (walks)
    CAR_SERVICE_TIME_SEC = other_options.get('CAR_SERVICE_TIME_SEC', 180)      # 3 minutes (cars)

    BIKE_SPEED = 1000 / (3.15 * 60) # 3.15 minute / km
    WALK_SPEED = BIKE_SPEED * 0.25 # 25% of bikes
    CAR_SPEED = BIKE_SPEED * 0.8 # 80% of bikes

    BIKE_CAPA = other_options.get('BIKE_CAPA', 100) # ~ 3 orders
    WALK_CAPA = other_options.get('WALK_CAPA', 70)  # ~ 2 orders
    CAR_CAPA = other_options.get('CAR_CAPA', 200)  # ~ 5 orders

    BIKE_VAR_COST = other_options.get('BIKE_VAR_COST', 60)  # per 100 m travel
    WALK_VAR_COST = other_options.get('WALK_VAR_COST', 30)  # per 100 m travel
    CAR_VAR_COST = other_options.get('CAR_VAR_COST', 100)   # per 100 m travel


    BIKE_FIXED_COST = other_options.get('BIKE_FIXED_COST', 5000) # fixed cost
    WALK_FIXED_COST = other_options.get('WALK_FIXED_COST', 5000)  # fixed cost
    CAR_FIXED_COST = other_options.get('CAR_FIXED_COST', 5000)   # fixed cost


    NUM_BIKES = int(K * other_options.get('NUM_BIKES_RATIO', 0.2))    # availability
    NUM_WALKS = int(K * other_options.get('NUM_WALKS_RATIO', 0.3))    # availability
    NUM_CARS = int(K * other_options.get('NUM_CARS_RATIO', 1))       # availability (practically unlimited)


    np.random.seed(rnd_seed)

    sample_orders_df = sampling_raw_order_df.sample(K, replace=False).sort_values(by='ord_date')


    shop_lats = list(sample_orders_df.shop_lat)
    shop_lons = list(sample_orders_df.shop_lon)
    dlv_lats = list(sample_orders_df.dlv_lat)
    dlv_lons = list(sample_orders_df.dlv_lon)

    all_lats = shop_lats + dlv_lats
    all_lons = shop_lons + dlv_lons

    DIST = [
        haversine_np(np.array([lon]), np.array([lat]), np.array(all_lons), np.array(all_lats)).tolist()
        for lat, lon in zip(all_lats, all_lons)
    ]


    # DLV_DEADLINE is defined as the earlist delivery time + BUFFER_TIME_SEC by the always-available vehicles (cars)
    ORDERS = [
        # ORD_ID, ORD_TIME, SHOP_LAT, SHOP_LON, DLV_LAT, DLV_LON, VOL, COOK_TIME, DLV_DEADLINE
        [k, row[1].ord_time_sec, row[1].shop_lat, row[1].shop_lon, row[1].dlv_lat, row[1].dlv_lon, row[1].cook_time_sec, row[1].capacity_bike, int(round(row[1].ord_time_sec + row[1].cook_time_sec + CAR_SERVICE_TIME_SEC + BUFFER_TIME_SEC + DIST[k][k+K]/CAR_SPEED))]
        for k,row in enumerate(sample_orders_df.iterrows())
    ]

    prob_filename = f'{file_prefix}_{instance_name}'

    prob = {
        'name': prob_filename,
        'K': K,
        # 'S': rnd_seed,
        # 'BUFFER_TIME_SEC': BUFFER_TIME_SEC,

        'RIDERS': [
            # name, speed, capa, var_cost, fixed_cost, service_time, available number
            ['BIKE', BIKE_SPEED, BIKE_CAPA, BIKE_VAR_COST, BIKE_FIXED_COST, BIKE_SERVICE_TIME_SEC, NUM_BIKES],
            ['WALK', WALK_SPEED, WALK_CAPA, WALK_VAR_COST, WALK_FIXED_COST, WALK_SERVICE_TIME_SEC, NUM_WALKS],
            ['CAR', CAR_SPEED, CAR_CAPA, CAR_VAR_COST, CAR_FIXED_COST, CAR_SERVICE_TIME_SEC, NUM_CARS]
        ],

        'ORDERS': ORDERS,

        'DIST': DIST
    }

    # Write to a file
    with open(prob_filename + '.json', 'w') as f:
        pretty_json_str = pprint.pformat(prob, compact=True, sort_dicts=False).replace("'",'"')
        f.write(pretty_json_str)

In [120]:
K = 300


# df1 = pd.read_csv('opt_challenge_data_order_density_high.csv')
raw_order_df = pd.read_csv('opt_challenge_data_order_density_medium.csv', parse_dates=['ord_date'])
# df3 = pd.read_csv('opt_challenge_data_order_density_low.csv')


earliest_date = raw_order_df.ord_date.min()

raw_order_df['ord_time_sec'] = (raw_order_df.ord_date - earliest_date).dt.total_seconds().astype('int')
raw_order_df['cook_time_sec'] = (raw_order_df.cook_time * 60).astype('int')
raw_order_df['capacity_bike'] = (raw_order_df.capacity_bike * 100).astype('int')


sampling_time_span_seconds = 3600 # 1 hour


while True:
    sampling_start_time = np.random.randint(0, raw_order_df['ord_time_sec'].max() - sampling_time_span_seconds)
    sampling_end_time = sampling_start_time + sampling_time_span_seconds

    sampling_raw_order_df = raw_order_df[(raw_order_df['ord_time_sec'] >= sampling_start_time) & (raw_order_df['ord_time_sec'] <= sampling_end_time)]

    if len(sampling_raw_order_df) >= K * 2: # We should have enough sampling orders to choose from
        break

sampling_raw_order_df = sampling_raw_order_df.assign(ord_time_sec = sampling_raw_order_df.ord_time_sec - sampling_raw_order_df.ord_time_sec.min())
sampling_raw_order_df

Unnamed: 0,id,ord_date,shop_lat,shop_lon,dlv_lat,dlv_lon,cook_time,capacity_bike,ord_time_sec,cook_time_sec
1370,1371,2024-06-01 18:08:01,35.991741,126.017965,35.981938,126.008647,10,23,0,600
1371,1372,2024-06-01 18:08:05,36.004923,125.994286,35.996407,125.994597,5,17,4,300
1372,1373,2024-06-01 18:08:13,36.004763,126.015213,35.998141,126.020018,15,25,12,900
1373,1374,2024-06-01 18:08:14,35.995674,126.008174,35.977261,126.012027,15,16,13,900
1374,1375,2024-06-01 18:08:15,35.996424,126.008952,35.996442,125.997402,25,22,14,1500
...,...,...,...,...,...,...,...,...,...,...
2321,2322,2024-06-01 19:06:50,36.004450,125.974010,36.005847,125.971021,20,12,3529,1200
2322,2323,2024-06-01 19:06:51,36.011034,126.013118,36.012542,126.006748,5,9,3530,300
2323,2324,2024-06-01 19:07:23,35.995619,126.008063,35.994682,126.010966,15,15,3562,900
2324,2325,2024-06-01 19:07:26,36.005451,126.000369,36.015028,126.004772,30,24,3565,1800


In [109]:
sampling_raw_order_df.assign(ord_time_sec = sampling_raw_order_df.ord_time_sec - sampling_raw_order_df.ord_time_sec.min())

Unnamed: 0,id,ord_date,shop_lat,shop_lon,dlv_lat,dlv_lon,cook_time,capacity_bike,ord_time_sec,cook_time_sec
923,924,2024-06-01 17:30:34,36.001118,125.986148,35.997875,125.986993,15,28,0,900
924,925,2024-06-01 17:30:51,36.007505,125.959067,36.007394,125.952205,30,48,17,1800
925,926,2024-06-01 17:30:55,35.989731,126.014729,35.983351,126.010157,10,27,21,600
926,927,2024-06-01 17:30:56,36.011627,126.027798,36.024845,126.026361,15,20,22,900
927,928,2024-06-01 17:30:59,35.990370,126.019784,35.986270,126.019599,10,43,25,600
...,...,...,...,...,...,...,...,...,...,...
1818,1819,2024-06-01 18:30:13,36.008284,126.013284,35.995812,126.011263,15,27,3579,900
1819,1820,2024-06-01 18:30:15,36.019976,125.972982,36.016681,125.958975,35,28,3581,2100
1820,1821,2024-06-01 18:30:24,36.002562,125.976482,36.011730,125.984114,10,22,3590,600
1821,1822,2024-06-01 18:30:25,36.011534,126.027144,36.000436,126.029899,20,31,3591,1200


In [11]:
df2

Unnamed: 0,id,ord_date,shop_lat,shop_lon,dlv_lat,dlv_lon,cook_time,capacity_bike
0,1,2024-06-01 13:59:12,36.009394,125.964705,36.011351,125.965453,20,0.23
1,2,2024-06-01 14:00:35,36.001145,125.979121,35.999448,125.972672,20,0.22
2,3,2024-06-01 14:00:40,35.979343,126.018868,35.975630,126.021726,15,0.28
3,4,2024-06-01 14:01:11,35.997118,125.990565,36.003172,125.994044,15,0.21
4,5,2024-06-01 14:01:19,36.005160,125.975198,36.003776,125.966647,15,0.17
...,...,...,...,...,...,...,...,...
3206,3207,2024-06-01 21:59:46,35.996452,126.010507,35.998309,126.009545,15,0.24
3207,3208,2024-06-01 22:00:04,35.999535,125.989370,36.012973,125.986937,5,0.32
3208,3209,2024-06-01 22:00:07,36.022032,125.969399,36.021257,125.972009,25,0.23
3209,3210,2024-06-01 22:00:13,36.022032,125.969399,36.016899,125.967775,20,0.26


# Generate test instances


We generate the instances by giving
- A raw order history file provided from 우아한형제
- K: number of orders to randomly select
- rnd_seed: random seed number
- file_prefix: prefix string to be added to the filenames
- instance_name: will be added to the filenames as a posefix
- **other_options


## Public instances for training

In [138]:

prob_ser = 1

# opt_challenge_data_order_density_high problems

gen_problem_instance('./opt_challenge_data_order_density_high.csv', 
                     K=100, 
                     rnd_seed=prob_ser, 
                     buffer_min=20, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=8000, 
                     WALK_FIXED_COST=8000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_high.csv', 
                     K=100, 
                     rnd_seed=prob_ser, 
                     buffer_min=30, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=8000, 
                     WALK_FIXED_COST=8000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_high.csv', 
                     K=200, 
                     rnd_seed=prob_ser, 
                     buffer_min=20, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=6000, 
                     WALK_FIXED_COST=6000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_high.csv', 
                     K=200, 
                     rnd_seed=prob_ser, 
                     buffer_min=30, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=6000, 
                     WALK_FIXED_COST=6000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_high.csv', 
                     K=300, 
                     rnd_seed=prob_ser, 
                     buffer_min=20, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=5000, 
                     WALK_FIXED_COST=5000, 
                     CAR_FIXED_COST=5000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_high.csv', 
                     K=300, 
                     rnd_seed=prob_ser, 
                     buffer_min=30, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=9000, 
                     WALK_FIXED_COST=4000, 
                     CAR_FIXED_COST=5000)
prob_ser += 1


# opt_challenge_data_order_density_medium problems

gen_problem_instance('./opt_challenge_data_order_density_medium.csv', 
                     K=100, 
                     rnd_seed=prob_ser, 
                     buffer_min=20, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=8000, 
                     WALK_FIXED_COST=8000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_medium.csv', 
                     K=100, 
                     rnd_seed=prob_ser, 
                     buffer_min=30, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=8000, 
                     WALK_FIXED_COST=8000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_medium.csv', 
                     K=200, 
                     rnd_seed=prob_ser, 
                     buffer_min=20, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=6000, 
                     WALK_FIXED_COST=6000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_medium.csv', 
                     K=200, 
                     rnd_seed=prob_ser, 
                     buffer_min=30, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=6000, 
                     WALK_FIXED_COST=6000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_medium.csv', 
                     K=300, 
                     rnd_seed=prob_ser, 
                     buffer_min=20, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=5000, 
                     WALK_FIXED_COST=5000, 
                     CAR_FIXED_COST=5000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_medium.csv', 
                     K=300, 
                     rnd_seed=prob_ser, 
                     buffer_min=30, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=9000, 
                     WALK_FIXED_COST=4000, 
                     CAR_FIXED_COST=5000)
prob_ser += 1



# opt_challenge_data_order_density_low problems

gen_problem_instance('./opt_challenge_data_order_density_low.csv', 
                     K=100, 
                     rnd_seed=prob_ser, 
                     buffer_min=20, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=8000, 
                     WALK_FIXED_COST=8000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_low.csv', 
                     K=100, 
                     rnd_seed=prob_ser, 
                     buffer_min=40, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=8000, 
                     WALK_FIXED_COST=8000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_low.csv', 
                     K=200, 
                     rnd_seed=prob_ser, 
                     buffer_min=20, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=6000, 
                     WALK_FIXED_COST=6000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_low.csv', 
                     K=200, 
                     rnd_seed=prob_ser, 
                     buffer_min=40, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=6000, 
                     WALK_FIXED_COST=6000, 
                     CAR_FIXED_COST=6000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_low.csv', 
                     K=300, 
                     rnd_seed=prob_ser, 
                     buffer_min=20, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=5000, 
                     WALK_FIXED_COST=5000, 
                     CAR_FIXED_COST=5000
                     )
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_low.csv', 
                     K=300, 
                     rnd_seed=prob_ser, 
                     buffer_min=40, 
                     file_prefix='STAGE1', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=9000, 
                     WALK_FIXED_COST=4000, 
                     CAR_FIXED_COST=5000)
prob_ser += 1

## Hidden test instances

In [13]:
prob_ser = 1

gen_problem_instance('./opt_challenge_data_order_density_high.csv', 
                     K=300, 
                     rnd_seed=prob_ser, 
                     buffer_min=20, 
                     file_prefix='STAGE1_TEST', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=6000, 
                     WALK_FIXED_COST=3000, 
                     CAR_FIXED_COST=5000)
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_medium.csv', 
                     K=200, 
                     rnd_seed=prob_ser, 
                     buffer_min=30, 
                     file_prefix='STAGE1_TEST', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=5000, 
                     WALK_FIXED_COST=4000, 
                     CAR_FIXED_COST=5000)
prob_ser += 1

gen_problem_instance('./opt_challenge_data_order_density_low.csv', 
                     K=100, 
                     rnd_seed=prob_ser, 
                     buffer_min=40, 
                     file_prefix='STAGE1_TEST', 
                     instance_name=f'{prob_ser}', 
                     BIKE_FIXED_COST=9000, 
                     WALK_FIXED_COST=4000, 
                     CAR_FIXED_COST=6000)
prob_ser += 1