In [1]:
import pandas as pd
import numpy as np
import pickle

FILEPATH = 'agents/pricemaker_files/'

def openpickle(filename):
    with open(filename, "rb") as readfile:
        loaded = pickle.load(readfile)
    return loaded



In [2]:
train_pricing_decisions = pd.read_csv('./data/train_prices_decisions.csv')
train_covariate = openpickle('./data/train_covariate')
train_noisy_embedding = openpickle('./data/train_noisy_embedding')
existing_embedding = openpickle(FILEPATH + 'user_embedding')
item0_embedding = openpickle(FILEPATH + 'item0embedding')
item1_embedding = openpickle(FILEPATH + 'item1embedding')

In [3]:
item_embedding = np.array([item0_embedding, item1_embedding])
train_pricing_decisions = train_pricing_decisions.set_index('user_index')
price_pair = train_pricing_decisions.drop(columns=["item_bought"])

In [4]:
existing_train_idx = list(train_noisy_embedding.index)
new_train_idx = [u for u in list(train_covariate.index) if u not in existing_train_idx]
new_train_covariate = train_covariate.loc[new_train_idx]

In [5]:
neigh = pickle.load(open(FILEPATH + 'knn_model', "rb"))

neighbor_ids = neigh.kneighbors(new_train_covariate, return_distance=False)
for i in range(len(new_train_idx)):
    curr_embedding = list(existing_embedding.iloc[neighbor_ids[i]].mean(axis=0))
    train_noisy_embedding.loc[new_train_idx[i]] = curr_embedding

train_noisy_embedding = train_noisy_embedding.sort_index()

In [6]:
X_train = train_covariate.join(train_noisy_embedding @ item_embedding.T).join(price_pair)
X_train.columns = X_train.columns.astype(str)
print(X_train.shape)

y_train = train_pricing_decisions['item_bought']

(14000, 7)


## Demand prediction

In [7]:
from sklearn.linear_model import LogisticRegressionCV

In [8]:
clf = LogisticRegressionCV(multi_class="multinomial", max_iter=1000).fit(X_train.values, y_train.values)

In [9]:
pickle.dump(clf, open(FILEPATH + 'logit_model', 'wb'))