In [7]:
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier as RandomForest
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np
import pickle

# import open bandit pipeline (obp)
import obp
from obp.policy import (
    IPWLearner, 
    QLearner,
    NNPolicyLearner, 
    Random
)

In [6]:
import os
os.getcwd()

'/Users/luciajayne/LuciaLocal/librec-auto-demo2021-main/demoOFAiR'

In [8]:
reranked_output_file = './exp00000/result/out-1.txt' #need a double for slash before r (carriage return)
item_feature_file = '../dataOFAiR/item-features.csv'

In [9]:
def ipw_train_learner(context,actions,max_iter=5000):
    
    # define NNPolicyLearner with IPW as its objective function
    ipw_rf = IPWLearner(
        n_actions=len(actions),
        base_classifier = LogisticRegression(random_state=0,max_iter=5000,C=500).fit(context, actions)
        
        #base_classifier=RandomForest(
        #n_estimators=30, min_samples_leaf=10, random_state=12345
        #)
    )


    rewards = np.ones(len(context))

    # train NNPolicyLearner on the training set of logged bandit data
    ipw_rf.fit(
        context=context,
        action=actions,
        reward=rewards
    )
    
    return ipw_rf

def agg_fnc(x):
    d = []
    d.append(','.join(map(str,x["itemID"])))
    d.append(','.join(map(str,x["rating"])))
    return pd.Series(d,index=["L","L_rating"])
             

In [10]:
#Read and process data
df = pd.read_csv(reranked_output_file,header=None)#[:1000] #only reading the top 1000

df_items = pd.read_csv(item_feature_file,header=None)
df_items.columns = ["itemID","condition","num"]

df.columns = ['userID','itemID','rating']
df = df.sort_values(by="rating")
df.head(),df.shape


(       userID  itemID  rating
 33549     671    8199     0.0
 2099       42   36931     0.0
 15199     304     889     0.0
 27549     551     670     0.0
 15249     305    9010     0.0,
 (33550, 3))

In [11]:
#Create column of lists (L) and expand it into columns
df_L = df.groupby('userID')[['itemID','rating']].apply(agg_fnc).reset_index()
df_L2 = df_L['L'].str.split(",", expand=True)
len_list = df_L2.shape[1];len_list

50

In [12]:
#Create action, position, and context
# actions = itemID
# posisiton = rank in list
# context = userID
df_L2_stack = df_L2.stack()
df_L2_stack.name = "action"
df_input = df_L2_stack.to_frame().reset_index(level=1).join(df_L["userID"])
df_input.columns = ['position','action','context']

In [13]:
#This must be set to the max from the item dataset
#max_actions =df_input['action'].astype(int).max() +1
max_actions = df_items['itemID'].astype(int).max() + 1

df_input

Unnamed: 0,position,action,context
0,0,86817,1
0,1,39414,1
0,2,140174,1
0,3,83361,1
0,4,5765,1
...,...,...,...
670,45,31547,671
670,46,4930,671
670,47,31952,671
670,48,5071,671


In [19]:
#Define NNPolicyLearner with IPW as its objective function
ipw_rf = IPWLearner(
    n_actions= max_actions,
    base_classifier = RandomForest(n_estimators=1000,random_state=0),
    len_list = 50 #default 10
    #len_list = len(df_input["positions"].unique()) #number of unique recommendations
    )

#Convert df_input field to input for the learner
context = df_input["context"].astype(int).values.reshape(-1,1)
actions = df_input["action"].astype(int).values.reshape(-1,)
positions = df_input["position"].astype(int).values.reshape(-1,)
rewards = np.ones(df_input.shape[0])
context.shape,actions.shape,rewards.shape,positions.shape
 
#Train learner
ipw_rf.fit(
    context=context,
    action=actions,
    reward=rewards,
    position=positions
)

In [20]:
#Make prediction  - for testing
#Looking at all the users. To see if it can handle all the users.
all_users = np.unique(df_L["userID"]).reshape(-1,1)
results = ipw_rf.predict(context=all_users)
results.shape #size of the data stored in the learner

(671, 160719, 50)

In [21]:
#Transform results to list(L)
#how long it takes to one user
results[0].T.argmax(axis=1)

array([ 86817,  39414, 140174,  83361,   5765,   8899,    876,     99,
        27724,  80717,   5051, 107978,   4459,   2620,   2927,  42632,
        31547,  86000,   4235,    389,  64278,  48791,   4566,  26797,
         2284,   5646,   7484,   3576,    559,    845,  73290,  26171,
        25752,  25764,   2636,   4201,    966,   4518,   1859,   4252,
         8420,   5062,  48301,   4437,  31952,  31116,   8208,   5071,
         1819,   1563])

In [22]:
df_L

Unnamed: 0,userID,L,L_rating
0,1,"86817,39414,140174,83361,5765,8899,876,99,2772...","0.0,0.0060840143886695,0.0120846810711471,0.01..."
1,2,"4232,8899,5051,37857,214,26251,820,39414,309,7...","0.0,0.0045460211417753,0.0094039757291869,0.01..."
2,3,"40226,26152,1945,5460,4252,1546,64278,309,4879...","0.0,0.0046877527981084,0.0086959450327293,0.01..."
3,4,"309,83827,4252,64900,48791,50658,3036,876,8675...","0.0,0.0058205519149359,0.0118411276396502,0.01..."
4,5,"2636,1281,4518,8938,2284,4252,64900,2927,26171...","0.0,0.0047734555100379,0.0100955365811297,0.01..."
...,...,...,...
666,667,"83827,73290,5765,4566,4232,52767,559,8899,5051...","0.0,0.0061835909057236,0.011923783901765,0.017..."
667,668,"83827,1945,37731,8275,86817,52767,820,50658,22...","0.0,0.0063279314242814,0.0130862530312499,0.01..."
668,669,"3021,6963,107978,8899,1450,582,5765,37857,2679...","0.0,0.0060960190036755,0.013304707787738,0.019..."
669,670,"107978,26258,25774,7484,1933,582,8699,5765,889...","0.0,0.0057961401200287,0.0126646447647999,0.02..."


In [23]:
#To Do: load prev saved reranking file
#with open('OBP_RERANKERS.pickle','rb') as f:
#    learner2,df_L2 = pickle.load(f)

rerank_dict = {}
rerank_dict["OFAiR"] = (ipw_rf,df_L) 
with open('OBP_Rerankers.pickle', 'wb') as f:
    pickle.dump(rerank_dict, f)
f.close()