In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [2]:
cd 'drive/My Drive/Dissertation/data/'

/content/drive/My Drive/Dissertation/data


In [3]:
ls

genome-scores.csv  links.csv   ratings.csv  tags.csv
genome-tags.csv    movies.csv  README.txt


In [4]:
import pandas as pd
import numpy as np

In [5]:
def read_data():
    ratings = pd.read_csv('ratings.csv', engine = 'python')
    movies  = pd.read_csv( 'movies.csv', engine = 'python')
    links   = pd.read_csv(  'links.csv', engine = 'python')
    
    movies = movies.join(movies.genres.str.get_dummies().astype(bool))
    movies.drop('genres', inplace = True, axis = 1)
    data = ratings.join(movies, on = 'movieId', how = 'left', rsuffix = '_movie')
    
    return data


def preprocess(data, N = 20000, stars = 4.5):
    # remove ratings of movies with < N ratings. too few ratings will cause the recsys to get stuck in offline evaluation
    movies_to_keep = pd.DataFrame(data.movieId.value_counts()).loc[pd.DataFrame(data.movieId.value_counts())['movieId'] >= N].index
    data           = data.loc[data['movieId'].isin(movies_to_keep)]

    # shuffle rows to debias order of user ids
    data = data.sample(frac = 1)

    # create a 't' column to represent time steps for the bandit to simulate a live learning scenario
    data['t']  = np.arange(len(data))
    data.index = data['t']

    # rating >= stars is a 'like', < stars is a 'dislike'
    data['liked'] = data['rating'].apply(lambda x: 1 if x >= stars else 0)

    return data


def get_ratings(N = 20000):
    data = read_data()
    data = preprocess(data, N)
    return data

In [6]:
def replay_score(history, data, t, batch_size, recs):
    # reward if rec matches logged data, ignore otherwise
    actions = data[t:t+batch_size]
    actions = actions.loc[actions['movieId'].isin(recs)]
    actions['scoring_round'] = t

    # add row to history if recs match logging policy
    history = history.append(actions)
    action_liked = actions[['movieId', 'liked']]

    return history, action_liked

In [7]:
data = get_ratings(N = 1500)

In [8]:
data

Unnamed: 0_level_0,userId,movieId,rating,timestamp,movieId_movie,title,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,t,liked
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
0,160210,931,2.0,1002134161,952.0,Around the World in 80 Days (1956),False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,0
1,147850,551,4.5,1270693968,558.0,"Pagemaster, The (1994)",False,True,True,True,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,1,1
2,66811,1953,5.0,957332706,2042.0,D2: The Mighty Ducks (1994),False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,2,1
3,28661,64957,3.5,1465089693,,,,,,,,,,,,,,,,,,,,,,,3,0
4,58548,541,3.0,995161420,547.0,Surviving the Game (1994),False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21121286,59252,163,2.0,1111799352,165.0,Die Hard: With a Vengeance (1995),False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,21121286,0
21121287,115091,736,2.0,1210270980,752.0,Vermont Is For Lovers (1992),False,False,False,False,False,True,False,True,False,False,False,False,False,False,False,True,False,False,False,False,21121287,0
21121288,91661,1356,5.0,1031001092,1392.0,Citizen Ruth (1996),False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,21121288,1
21121289,136906,112,4.0,974734975,114.0,Margaret's Museum (1995),False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,21121289,0


In [None]:
# simulation parameters: slate size, batch size (number of events per training iteration)
slate_size = 5
batch_size = 10

# empty history 
# (the algorithm should be able to see all events and outcomes prior to the current timestep, but not current or future ones)
history = pd.DataFrame(data = None, columns = data.columns)
history = history.astype({'movieId': 'int32', 'liked': 'float'})

# empty list to store scores at each step
rewards = []

for t in range(data.shape[0] // batch_size):
    t = t * batch_size
    # generate recommendations from a random policy
    recs = np.random.choice(data.movieId.unique(), size = (slate_size), replace = False)
    # send recommendations and dataset to a scoring function for the model to learn & adjust its policy at the next iteration
    history, action_score = replay_score(history, data, t, batch_size, recs)
    if action_score is not None:
        action_score = action_score.liked.tolist()
        rewards.extend(action_score)