In [10]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
import random
from random import randint

We have two types of movies, type 1 and type 2. Each movie has a vector assciotate with them, [x,y]. x and y are binary variables with possible values of 0 and 1. x = 1 means that the film is of type 1, and y = 1 means that the film is of type2.
Each user also has a hidden vector associated with him or her, [b,c], where b and c are preferences for type 1 and type 2 films respectively. $0 \leq b,c \leq 1$ and $b + c = 1$. If a user's perference for a type of film is [a,b], then this user has $a$ chance of accepting recommendations of films of type 1 and $b$ chance of accepting recommendations of films of type 2. This user will rate a film of type a: $a*5 $

In [71]:
# Generate number_of_users of users. Return a number_of_users * 2 array, 
# where each row of the array indicates the user's movie preference.
def generate_Users(number_of_users):
    result = []
    for i in range(0,number_of_users):
        a = random.uniform(0,1)
        result.append([a, 1 - a])
    return(np.array(result))

In [193]:
# Generate a matrix films, where each row of the matrix
# represents the properties of a single film.
def generate_Films(number_of_films):
    result = []
    for i in range(0,number_of_films):
        a = randint(0,1)
        result.append([a, 1-a])
    return(np.array(result))

In [194]:
# Total number of films and user preference matrix
# Percentage is the chance a user has viewed a given film
# Return Value: Each row represents a single film
def user_movie_ratings(films, users, percentage):
    num_films = films.shape[0]
    result = []
    movies = films.tolist()
    Users = users.tolist()
    for film in movies:
        rating = []
        for user in Users:
            ran = random.uniform(0,1)
            if (ran <= percentage):
                t = round(5 * (user[0] * film[0]+ user[1] * film[1])/2)
                if t == 0:
                    t = round(5 * random.uniform(0.1,1))
                rating.append(t)
            else:
                rating.append(0)
        result.append(rating)
    return(np.array(result))

In [266]:
users = generate_Users(6040)
films = generate_Films(3883)
ratings = user_movie_ratings(films, users, 0.05)


In [206]:
def rec(rates):
    user_ratings_mean = np.mean(rates.T, axis = 1)
    R_demeaned = ratings.T - user_ratings_mean.reshape(-1, 1)
    U, sigma, Vt = svds(R_demeaned, k = 50)
    sigma = np.diag(sigma)
    all_user_predicted_ratings = np.round(np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1))
    return(all_user_predicted_ratings)

Question: Should we also recommend videos that users have already viewed? At least for me, Youtube recommends videos that I have already watched all the time.

In [250]:
def recommend_movies(predictions_df, userID,num_recommendations=5):
    temp = predictions_df[userID,]
    recommendations = np.argsort(temp)[-num_recommendations:]
    return recommendations

Initial Recommendations

In [267]:
Recommended = rec(ratings)

In [268]:
initial = []
for i in range(0,users.shape[0]):
    temp = recommend_movies(Recommended, i).tolist()
    type1 = 0
    type2 = 0
    for item in temp:
        if (films[item,0] == 1):
            type1 += 1
        else:
            type2 += 1
    initial.append([type1,type2])

In [269]:
# Simulating what happens when receiving a recommendation
# Take in a list of recommendations
def accepting_recommendations(recommend,initial):
    isChange = False
    j = 0
    for i in range(0,users.shape[0]):
        videos = recommend_movies(recommend,i).tolist()
        type1 = 0
        type2 = 0
        for item in videos:
            if (films[item,0] == 1):
                type1 += 1
            else:
                type2 += 1

        user = users[i,]
        if type1 != initial[i][0] or type2 != initial[i][1]:
            isChange = True
            j += 1
            initial[i][0] = type1
            initial[i][1] = type2
        for video in videos:
            if films[video][0] == 1:
                ran = random.uniform(0,1)
                if ran <= user[0]:
                    mean_rating = np.mean(ratings[:,video])
                    rate = min(round(mean_rating + user[0]),5)
                    ratings[video,i] = rate
            else: 
                ran = random.uniform(0,1)
                if ran <= user[1]:
                    mean_rating = np.mean(ratings[:,video])
                    rate = min(round(mean_rating + user[1]),5)
                    ratings[video,i] = rate
    return(isChange,initial,j)

Run Simulation

In [None]:
i = 0
final_result = []
while True:
    Recommended = rec(ratings)
    Changed, initial, j = accepting_recommendations(Recommended, initial)
    if not Changed and i != 0:
        final_result = initial
        break
    i = i + 1
    print('change')
    print(j)
    print('complete round ' + str(i))

change
0
complete round 1
change
1477
complete round 2
change
1398
complete round 3
change
1393
complete round 4
change
1415
complete round 5
change
1371
complete round 6
change
1347
complete round 7
change
1416
complete round 8
change
1448
complete round 9
change
1375
complete round 10
change
1399
complete round 11
change
1365
complete round 12
change
1381
complete round 13
change
1349
complete round 14
change
1356
complete round 15
change
1329
complete round 16
change
1383
complete round 17
change
1382
complete round 18
change
1375
complete round 19
change
1391
complete round 20
change
1360
complete round 21
change
1347
complete round 22
change
1399
complete round 23
change
1412
complete round 24
change
1374
complete round 25
change
1408
complete round 26
change
1345
complete round 27
change
1303
complete round 28
