In [18]:
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
import numpy as np
import pandas as pd
import copy
from random import randint

Load the dataset

In [19]:
train_pricing_decisions = pd.read_csv('../../data/train_prices_decisions.csv')
test_user_info = pd.read_csv('../../data/test_user_info.csv')
num_qcut = 100

In [20]:
print(train_pricing_decisions.head())
print(test_user_info.head())

   user_index  Covariate1  Covariate2  Covariate3  price_item_0  price_item_1  \
0           0    4.319065    4.917636    6.054873      1.659785      3.598304   
1           1    6.375840    7.228608    1.324521     13.120469      2.578281   
2           2    1.524010    0.393772    4.873626     27.145855      4.538328   
3           3    1.243759    1.651083    2.268083      5.275166     14.818790   
4           4    2.510326    8.265984    4.602699     12.681638      6.809336   

   item_bought  
0            0  
1            1  
2           -1  
3            0  
4            0  
   user_index  Covariate1  Covariate2  Covariate3
0       30000    5.290482    4.597224    2.616371
1       30001    8.069435    0.308068    2.910754
2       30002    2.117773    1.527354    1.320430
3       30003    6.828597    3.455275    0.769989
4       30004    0.012091    3.873894    1.250743


In [21]:
def label_purchase(row, item):
    if (row['item_bought'] == item) :
        return 1
    return 0

def fit_logistic_regression(df, item):
    df_copy = copy.deepcopy(df)
    df_copy['purchased'] = df_copy.apply(lambda row: label_purchase(row, item), axis=1)
    #implement log reg
    feature_col = ['Covariate1', 'Covariate2', 'Covariate3', 'price_item_0', 'price_item_1']
    logreg = LogisticRegression(fit_intercept=True, solver='lbfgs')
    X_train = df_copy[feature_col]
    Y_train = df_copy['purchased'] 
    logreg.fit(X_train.values, Y_train.values)
    return logreg

In [22]:
prices_to_predict_0 = np.linspace(min(train_pricing_decisions['price_item_0']), max(train_pricing_decisions['price_item_0']), num_qcut)
prices_to_predict_1 = np.linspace(min(train_pricing_decisions['price_item_1']), max(train_pricing_decisions['price_item_1']), num_qcut)

#get prediction model
model_0 = fit_logistic_regression(train_pricing_decisions, 0)
model_1 = fit_logistic_regression(train_pricing_decisions, 1)

In [23]:
picklefile_0 = open('trained_model_0', 'wb')
picklefile_1 = open('trained_model_1', 'wb')
#pickle the object and store it in a file
pickle.dump(model_0, picklefile_0)
picklefile_0.close()
pickle.dump(model_1, picklefile_1)
picklefile_1.close()

In [24]:
#check that the object is correctly pickled and works when unpickled
# del model_0
# picklefile = open('./trained_model_0', 'rb')
# new_model = pickle.load(picklefile)
# new_model.predict(np.array([1,2,3,0.1,0.1]).reshape(1,-1))

In [25]:
#function for getting demand of a specific price
def get_demand(fitted_model, list_cov):
    prediction = pd.DataFrame(fitted_model.predict_proba(list_cov), columns = ["refused", "accepted"])
    return [prediction.iloc[i]['accepted'] for i in range(len(prediction))]

In [26]:
# test_user_info = test_user_info[0:5]
# print(test_user_info)

In [27]:
# new_buyer_covariates = test_user_info.iloc[0][1:].tolist()
# for i in range(len(1)):
# # for i in range(3):
#     list_covs = []
#     for j in range(len(prices_to_predict_0)):
#         price_0 = prices_to_predict_0[j]
#         price_1 = prices_to_predict_1[j]
#         cov = test_user_info.iloc[i][1:].tolist() + [price_0, price_1]
#         list_covs.append(cov)
        
#     predicted_purchase_0 = get_demand(model_0, list_covs)
#     predicted_purchase_1 = get_demand(model_1, list_covs)

#     predicted_purchase_0 = [predicted_purchase_0.iloc[i]['accepted'] for i in range(len(predicted_purchase_0))]
#     predicted_purchase_1 = [predicted_purchase_1.iloc[i]['accepted'] for i in range(len(predicted_purchase_1))]

#     demand_item_0.append(predicted_purchase_0)
#     demand_item_1.append(predicted_purchase_1)


# for idx in range(len(test_user_info)):
#     list_covs = []
#     for i in range(len(prices_to_predict_0)):
#         price_0 = prices_to_predict_0[i]
#         for j in range(len(prices_to_predict_1)):
#             price_1 = prices_to_predict_0[j]
#             new_buyer_covariates = test_user_info.iloc[idx][1:].tolist()
#             list_covs.append(new_buyer_covariates + [price_0, price_1])
            
#     predicted_purchase_0 = get_demand(model_0, list_covs)
#     predicted_purchase_1 = get_demand(model_1, list_covs)

#     predicted_purchase_0 = [predicted_purchase_0.iloc[i]['accepted'] for i in range(len(predicted_purchase_0))]
#     predicted_purchase_1 = [predicted_purchase_1.iloc[i]['accepted'] for i in range(len(predicted_purchase_1))]

#     demand_item_0.append(predicted_purchase_0)
#     demand_item_1.append(predicted_purchase_1)

# new_buyer_covariates = test_user_info.iloc[1][1:].tolist()
# list_covs = [new_buyer_covariates + [1, 2]]
# predicted_purchase_0 = get_demand(model_0, list_covs)
# predicted_purchase_0 = [predicted_purchase_0.iloc[i]['accepted'] for i in range(len(predicted_purchase_0))]

# print(predicted_purchase_0[0])

In [28]:
def get_optimal (covariates):
    max_price_0, max_price_1 = 0,0
    max_rev = 0

    #randomly sampled 100 pairs of prices
    for i in range (100):
        rand_item_0 = randint(0,num_qcut-1)
        rand_item_1 = randint(0,num_qcut-1)

        list_covs = [covariates + [prices_to_predict_0[rand_item_0], prices_to_predict_1[rand_item_1]]]

        prediction_0 = get_demand(model_0, list_covs)[0]
        prediction_1 = get_demand(model_1, list_covs)[0]

        cur_rev_0 = prediction_0 * prices_to_predict_0[rand_item_0]
        cur_rev_1 = prediction_1 * prices_to_predict_1[rand_item_1]

        if (cur_rev_0 + cur_rev_1 > max_rev):
            max_rev = cur_rev_0 + cur_rev_1
            max_price_0 = rand_item_0
            max_price_1 = rand_item_1
        
    return max_price_0, max_price_1, max_rev

In [29]:
def refine_price(price_0, price_1, covariates):
    #sample another 11*11 points around the current price
    
    rev_max = 0
    max_price_0 = price_0
    max_price_1 = price_1
    dir_x = [1, -1, 0, 1, -1, 0, 1, -1]
    dir_y = [0, 0, 1, 1, 1, -1, -1, -1]
   
    for step in range(5):
        for idx in range(len(dir_x)):
            next_x = price_0 + dir_x[idx] * step
            next_y = price_1 + dir_y[idx] * step
            
            if (next_x >= 0 and next_x < num_qcut and next_y >= 0 and next_y < num_qcut):
                list_covs = [covariates + [prices_to_predict_0[next_x], prices_to_predict_1[next_y]]]
                prediction_0 = get_demand(model_0, list_covs)[0]
                prediction_1 = get_demand(model_1, list_covs)[0]

                cur_rev_0 = prediction_0 * prices_to_predict_0[next_x]
                cur_rev_1 = prediction_1 * prices_to_predict_1[next_y]

                if (cur_rev_0 + cur_rev_1 > rev_max):
                    rev_max = cur_rev_0 + cur_rev_1
                    max_price_0 = next_x
                    max_price_1 = next_y

    return max_price_0, max_price_1, rev_max

In [30]:
tolerance = 0.01
max_steps = 50

# optimal_price_list = []
# optimal_rev_list = []
output = pd.DataFrame(columns=['user_index', 'price_item_0','price_item_1','expected_revenue'])
for customer in range(len(test_user_info)):
    cov = [test_user_info.iloc[customer]['Covariate1'], test_user_info.iloc[customer]['Covariate2'], test_user_info.iloc[customer]['Covariate3']]
    price_0, price_1, cur_max = get_optimal(cov)
    # while(max_steps > 0):
    #     r_price_0, r_price_1, r_max = refine_price(price_0, price_1, customer)
    #     if (abs(r_max - cur_max) <= tolerance or r_max < cur_max):
    #         break
    #     price_0, price_1, cur_max = r_price_0, r_price_1, r_max
    
    while(max_steps > 0):
        r_price_0, r_price_1, r_max = refine_price(price_0, price_1, cov)
        if (abs(r_max - cur_max) <= tolerance or r_max < cur_max):
            break
        price_0, price_1, cur_max = r_price_0, r_price_1, r_max
        max_steps -= 1
    price_0 = max(prices_to_predict_0[price_0], 0)
    price_1 = max(prices_to_predict_1[price_1], 0)

#     optimal_price_list.append([price_0, price_1])
#     optimal_rev_list.append(cur_max)
    output.loc[len(output.index)] = [test_user_info['user_index'].iloc[customer], price_0, price_1, cur_max]
    if customer % 1000 == 0:
        print('Finished', customer)
output.to_csv('../part2_static_prices_submission.csv',index=False)

Finished 0
Finished 1000
Finished 2000
Finished 3000
Finished 4000
Finished 5000
Finished 6000
Finished 7000
Finished 8000
Finished 9000
Finished 10000
Finished 11000
Finished 12000
Finished 13000
Finished 14000
Finished 15000
Finished 16000
Finished 17000
Finished 18000
Finished 19000
Finished 20000
Finished 21000
Finished 22000
Finished 23000
Finished 24000
Finished 25000
Finished 26000
Finished 27000
Finished 28000
Finished 29000


In [31]:
print(optimal_price_list)
print(optimal_rev_list)

NameError: name 'optimal_price_list' is not defined

In [32]:
output

Unnamed: 0,user_index,price_item_0,price_item_1,expected_revenue
0,30000.0,102.512876,27.393515,24.565190
1,30001.0,102.512876,27.393515,24.823401
2,30002.0,102.512876,27.393515,25.034283
3,30003.0,102.512876,31.450333,28.673785
4,30004.0,102.512876,27.393515,24.404083
...,...,...,...,...
29995,59995.0,98.371346,28.407720,27.224557
29996,59996.0,102.512876,23.336698,22.972927
29997,59997.0,100.442111,24.350903,23.162646
29998,59998.0,100.442111,27.393515,26.264371


In [None]:
#brute force
# def get_optimal_bf(customer_idx):
#     max_revenue = 0
#     max_price_0, max_price_1 = 0,0

#     index_0, index_1 = 0,0

#     demand_0 = demand_item_0[customer_idx]
#     demand_1 = demand_item_1[customer_idx]

#     for i in range(len(demand_0)):
#         price_0 = prices_to_predict_0[i//num_qcut]
#         price_1 = prices_to_predict_1[i%num_qcut]
#         cur_rev = demand_0[i] * price_0 + demand_1[i] * price_1
#         if (cur_rev > max_revenue):
#             max_price_0 = price_0
#             max_price_1 = price_1
#             index_0 = i//num_qcut
#             index_1 = i%num_qcut
#             max_revenue = cur_rev

#     return max_revenue, max_price_0, max_price_1, index_0, index_1