# An ensemble method for top-N recommendations from the SVD

## SVD

In [None]:
import pandas as pd
import numpy as np
import scipy.sparse as sp
from scipy.sparse.linalg import svds
import pickle
import copy 
import random

In [None]:
data_file = pd.read_table('data/training.csv', sep = ',', header=None, engine='python')
data_file.shape

In [None]:
#movies 3666(gercege karşılık gelen index) alıp 3952(gerçekid) döner, movie_indices 3952 alıp 3666 döner
users = np.unique(data_file[0]) # 1(0.idex) den 6040(6039.index) a kadar
movies = np.unique(data_file[1]) # 1(0.idex) den 3952(3666.idex) ye kadar

number_of_rows = len(users) #6040
number_of_columns = len(movies) #3667

movie_indices, user_indices = {}, {}
 
for i in range(len(movies)):
    movie_indices[movies[i]] = i # movie_indices[3952] = 3666 x.filmin indisini verir
    
for i in range(len(users)):
    user_indices[users[i]] = i # x.userın indisini verir

In [None]:
V = sp.lil_matrix((number_of_rows, number_of_columns))
for line in data_file.values:
    u, i , r , gona = map(int,line)
    V[user_indices[u], movie_indices[i]] = r # gerçek user ve movie idnin indexini bulup ratingi matrixteki yere atar

In [None]:
print(users)
print(user_indices[6000])
print(movies)
print(movie_indices[3952])
#print (V[:3,:])

In [None]:
print(movies[253])
print(movies[20])
print(movies[0])
print(movies)

In [None]:
u,s, vt = svds(V, k = 16)

In [None]:
s_diag_matrix = np.zeros((s.shape[0], s.shape[0]))

for i in range(s.shape[0]):
    s_diag_matrix[i,i] = s[i]

In [None]:
X_lr = np.dot(np.dot(u, s_diag_matrix), vt)

In [None]:
negcounter = 0
poscounter = 0
for i in range(s.size-1,s.size):
    for factor in vt[i,:]:
        if factor > 0:
            poscounter = poscounter + 1
        else:
            negcounter = negcounter + 1
print(negcounter,poscounter)

In [None]:
X_lr[0,2354]

In [None]:
#X_lr.tofile(file = 'data/svdresults.csv', sep = "::")

In [None]:
X_lr.shape

## Ensemble Method for Top-N Recommendations

In [None]:
class Node:
    def __init__(self):
        self.itemFactors = np.empty(shape = (32,0))
        self.userFactors = np.empty(shape = (32,0))
        self.itemList = []
        self.userList = []
        self.factor = None
        self.score = 0
        self.left = None
        self.right = None

In [None]:
def printNode(node):
    print("itemFactors :") 
    #print(node.itemFactors)
    print(node.itemFactors.shape)
    print("userFactors :")
    #print(node.userFactors)
    print(node.userFactors.shape)
    print("factor :")
    print(node.factor)
    print("score :")
    print(node.score)
    #print("left :")
    #print(node.left)
    #print("right :")
    #print(node.right)
def printTree(node):
    printNode(node)
    if (node.left != None):
        print("LEFT :")
        printTree(node.left)
    if (node.right != None):
        print("RIGHT :")
        printTree(node.right)
def countLeaves(node):
    count = 0
    if (node.left != None):
        if (node.left.itemFactors.shape[1] <= 300):
            count += 1
        else:
            count += countLeaves(node.left)
    if (node.right != None):
        if (node.right.itemFactors.shape[1] <= 300):
            count += 1
        else:
            count += countLeaves(node.right)
    return count

In [None]:
V = Node()
V.itemFactors = vt
V.userFactors = u
#for user in users:
#    V.userList.append(user)
#for item in movies:
#    V.itemList.append(item)
for user in user_indices:
    V.userList.append(user-1)
for item in movie_indices:
    V.itemList.append(item-1)
print(V.itemList[0])

In [None]:
V.itemFactors.shape

In [None]:
V.userFactors.shape

In [None]:
def splitNode(node, factor):
    node.left = Node()
    node.right = Node()
    left = 0 #flag and counter
    right = 0
    empt = Node()
    if (np.array_equal(node.itemFactors, empt.itemFactors) == False):   #bos item factor girebilir mi cond dene
    #if (len(node.itemFactors.shape) > 1):   #bos item factor girebilir mi cond dene
        #print(node.itemFactors.shape)
        for i in range(node.itemFactors.shape[1]):
            #print(node.itemFactors[:, i])
            if (node.itemFactors[:, i][factor] >= 0 ):
                if (left == 0):
                    node.left.itemFactors = node.itemFactors[:, i]
                else:
                    node.left.itemFactors = np.vstack((node.left.itemFactors, node.itemFactors[:, i]))
                node.left.itemList.append(node.itemList[i])
                left += 1
            else:
                #continue
                if (right == 0):
                    node.right.itemFactors = node.itemFactors[:, i]
                else:
                    node.right.itemFactors =np.vstack((node.right.itemFactors, node.itemFactors[:, i]))
                node.right.itemList.append(node.itemList[i])
                right += 1
        node.left.itemFactors = np.transpose(node.left.itemFactors)
        node.right.itemFactors = np.transpose(node.right.itemFactors)
    else:
        print('itemFactors not available')
    left = 0
    right = 0
    if (np.array_equal(node.userFactors, empt.userFactors) == False):   #bos user factor girebilme ihtimali dusun
    #if (len(node.userFactors.shape) > 1):   #bos user factor girebilme ihtimali dusun
        #print(node.userFactors.shape)
        for i in range(node.userFactors.shape[0]):
            if (node.userFactors[i, :][factor] >= 0 ):
                if (left == 0):
                    node.left.userFactors = node.userFactors[i, :]
                else:
                    node.left.userFactors = np.vstack((node.left.userFactors, node.userFactors[i, :]))
                node.left.userList.append(node.userList[i])
                left += 1
            else:
                if (right == 0):
                    node.right.userFactors = node.userFactors[i, :]
                else:
                    node.right.userFactors = np.vstack((node.right.userFactors, node.userFactors[i, :]))
                node.right.userList.append(node.userList[i])
                right += 1
    else:
        print('userFactors not available')
    node.score =  computePrecision(node)

In [None]:
def findTopN(matrix, N):
    newMatrix = matrix.argsort(axis = 1)
    newMatrix = np.fliplr(newMatrix)
    return newMatrix[:,:N]
    

In [None]:
mx = findTopN(X_lr, 5)
print(mx)
print(mx.shape)
print(mx[6039,:])

In [None]:
precisionAt = 5
def computePrecision(node):
    relevantRight = np.dot(node.right.userFactors, node.right.itemFactors)
    relevantLeft = np.dot(node.left.userFactors, node.left.itemFactors) 
    topRight = findTopN(relevantRight, precisionAt)
    topLeft = findTopN(relevantLeft, precisionAt)
    #print(topRight.shape)
    #print(topLeft.shape)
    
    samePlace = 0
    inTopN = 0
    for u in range(len(node.left.userList)):
        #print(u)
        #print(node.left.userList[u])
        for i in range(precisionAt):
            #print(node.left.itemList[topLeft[0,:][i]])
            if (node.left.itemList[topLeft[u,:][i]] == mx[node.left.userList[u],:][i]):
                #print("ALLAH")
                #print(node.left.itemList[topLeft[u,:][i]])
                #print(mx[node.left.userList[u],:])
                samePlace += 1
            if node.left.itemList[topLeft[u,:][i]] in mx[node.left.userList[u],:]:
                inTopN += 1
    for u in range(len(node.right.userList)):
        #print(u)
        #print(node.right.userList[u])
        for i in range(precisionAt):
            #print(node.right.itemList[topRight[0,:][i]])
            if (node.right.itemList[topRight[u,:][i]] == mx[node.right.userList[u],:][i]):
                #print(node.right.itemList[topRight[u,:][i]])
                #print(mx[node.right.userList[u],:])
                samePlace += 1
            if node.right.itemList[topRight[u,:][i]] in mx[node.right.userList[u],:]:
                inTopN += 1
    #print(samePlace)
    #print(inTopN)
    ret = float(inTopN)/(len(node.userList)*precisionAt)
    #ret = float(samePlace)/(len(node.userList)*precisionAt)
    print(ret)
    return ret
    

In [None]:
splitNode(V, 15)

In [None]:
def buildTree(node, factors):
    #printNode(node)
    #print(factors)
    empt = Node()
    if (len(node.itemFactors.shape) > 1 and node.itemFactors.shape[1] <= 300):
        print("Threshold value is reached")
        return node
    elif (len(node.itemFactors.shape) <= 1):
        print('one item factor')
        return node
    elif (node.itemFactors.shape[1] == 0):
        print('no item factor')
        return node
    winner = Node()
    if not factors:
        print('factors empty')
        return node
    for factor in factors:
        node.factor = factor
        splitNode(node,factor)
        if (node.score >= winner.score):
            winner = copy.deepcopy(node) 
    print(winner.score)
    print(winner.factor)
    factors.remove(winner.factor)
    node.factor = winner.factor
    lfactors = list(factors)
    rfactors = list(factors)
    buildTree(winner.left, lfactors)
    buildTree(winner.right, rfactors)
    return node
    

In [None]:
def buildForest(factors, p, a, sl):
    forest = []
    groups = getFactorGroups(len(factors), p, a)
    for g in groups:
        print(g)
        V = Node()
        V.itemFactors = vt[group,:]
        V.userFactors = u[:,group]
        buildTree(V,g)
        forest.append(V)
    return forest

In [None]:
factors = []
factors.extend(range(0,16))
buildForest(factors, 0.5, 3, 300)

In [None]:
def getFactorGroups(size, p, a):
    groups = []
    for i in range(0,a):
        counterList = np.zeros(len(factors))
        for c in range(0,int(1/p)):
            group = []
            while (len(group) < size * p):
                available = []
                for k in range(len(counterList)):
                    if (counterList[k] < 1 and k not in group):
                        available.append(k)
                factor = available[random.randint(0, len(available)-1)]
                counterList[factor] = counterList[factor] + 1 
                group.append(factor)
            groups.append(group)
    return groups
            

In [None]:
print(V.itemFactors.shape)
print(V.userFactors.shape)

In [None]:
splitNode(V,0)
print(V.right.itemFactors.shape)
print(V.left.itemFactors.shape)
print(V.right.userFactors.shape)
print(V.left.userFactors.shape)
printNode(V)

In [None]:
factors = []
factors.extend(range(0,s.size))
buildTree(V,factors)

In [None]:
print("ROOT :")
printTree(V)

In [None]:
countLeaves(V)