# Redis for Recommendations
## Preparations

In [140]:
import redis

print("Preparing Notebook ...")
r = redis.StrictRedis()
print("Connected to: " + str(r))
print("Flushed: " + str(r.flushall()))

Preparing Notebook ...
Connected to: Redis<ConnectionPool<Connection<host=localhost,port=6379,db=0>>>
Flushed: True


## Content Based Filtering

In [27]:
# David owns i.e. 1 comic per one of the following categories:
r.sadd('usr:david:catg', 'fantasy', 'super-heros', 'scifi')

# Here the items per category
r.sadd('ctg:scifi:items','Valerian', 'Fantastic Four')
r.sadd('ctg:super-heros:items', 'Batman', 'Spiderman', 'Wonder Woman')
r.sadd('ctg:fantasy:items', 'Avatar', 'Dragon Age')

# The following items could be interesting for David
## BTW: SSCAN better for large sets
categories = r.smembers('usr:david:catg')

## Helper to prepare key list
keys = []
for ctg in categories:
    keys.append("ctg:" + ctg.decode('UTF8')+ ":items")
   
    
## BTW: SUNIONSTORE for materializing large result sets
result = r.sunion(keys)
print(result)

{b'Spiderman', b'Batman', b'Fantastic Four', b'Wonder Woman', b'Valerian', b'Avatar', b'Dragon Age'}


## Collaborative Filtering

In [56]:
# David owns the comics Spiderman and Batman
r.sadd('usr:david:items','Spiderman', 'Batman')

# Pieter owns the comics Wonder Woman and Batman
r.sadd('usr:pieter:items', 'Wonder Woman', 'Batman')

# The following is the reverse mapping per item
r.sadd('itm:spiderman:users', 'david')
r.sadd('itm:batman:users', 'david', 'pieter')
r.sadd('item:wonder_woman:users', 'pieter')

# These are all the users interested in the same items as David
items = r.smembers('usr:david:items')
keys = []
for item in items:
    keys.append("itm:" + item.decode('UTF-8').lower().replace(' ', '_') + ":users")

users = r.sunion(keys)
print("Users interested in the same items as David: " + str(users))

                 
# Pieter is interested in the same items as David, so here the recommendation for David based on Pieter's interests
print("David is interested in: " + str(r.smembers('usr:david:items')))
david_key = 'usr:david:items'

for usr in users:
    usr_key = "usr:" + usr.decode('UTF-8') + ":items"
    if usr_key != david_key:
        print("David could be also interested in: " + str(r.sdiff(usr_key, david_key)))



Users interested in the same items as David: {b'pieter', b'david'}
David is interested in: {b'Spiderman', b'Batman'}
David could be also interested in: {b'Wonder Woman'}


In [174]:
# TODO: Check syntax of r.zadd
def zadd(key, score, item):
    return r.execute_command('ZADD', key, score, item)

# TODO: redis-py doesn't support weitghts here
def zinterstore(target, keys, weights):
    return r.execute_command('ZINTERSTORE', target, len(keys), *keys, 'WEIGHTS', *weights)

def zunionstore_agg_min(target, keys, weights):
    # Weights will be applied before the aggregation is executed as part of the union
    return r.execute_command('ZUNIONSTORE', target, len(keys), *keys, 'WEIGHTS', *weights, 'AGGREGATE', 'MIN')

# Root Mean Square
import math
def rms(values):
    sq_sum = 0
    for v in values:
        v = v[1]
        v = v ** 2
        sq_sum = sq_sum + v
    sq_sum_avg = sq_sum / len(values)
    return math.sqrt(sq_sum_avg)
        


# Ratings by user
zadd('usr:david:ratings', 3.0, 'spiderman')
zadd('usr:david:ratings', 4.0, 'batman')
zadd('usr:david:ratings', 3.0, 'superman')
zadd('usr:pieter:ratings', 3.0, 'batman')
zadd('usr:pieter:ratings', 5.0, 'wonder_woman')
zadd('usr:pieter:ratings', 1.0, 'aqua_man')
zadd('usr:pieter:ratings', 4.0, 'superman')


# Ratings by item
zadd('itm:spiderman:ratings', 3.0, 'david')
zadd('itm:batman:ratings', 4.0, 'david')
zadd('itm:batman:ratings', 3.0, 'pieter')
zadd('itm:wonder_woman:ratings', 5.0, 'pieter')

# Items rated by David
rated = r.zrange('usr:david:ratings', 0, -1)
keys = []
for rt in rated_david:
    key = "itm:" + rt.decode('UTF8') + ":ratings"
    keys.append(key)

r.zunionstore('usr:david:ratings:same', keys)
users = r.zrange('usr:david:ratings:same', 0, -1)
print("The following users rated David's items: " + str(users))

#Calculate similarities
david_key = 'usr:david:ratings'
for usr in users:
        usr = usr.decode('UTF-8')
        usr_key = "usr:" + usr + ':ratings'
        
        if usr_key != david_key:
            usr_keys = [ david_key, usr_key ]
            # Weights are multiplying the scores
            usr_weights = [1, -1]
            '''
            By default, the resulting score of an element is the sum of its scores in the sorted sets where it exists. 
            Weights multiplicators for scores
            The weight is (1,-1) means that we subtract the second value from the first
            So rms:<user1>:<user2> does for now just store the distance between the user ratings
            '''
            zinterstore("dist:david:" + usr, usr_keys, usr_weights)
            dists = r.zrange("dist:david:" + usr, 0, -1, True, True)
            print("The rating distance to " + usr + " is " + str(dists))
            print("The average distance (RMS) to " + usr + " is " + str(rms(dists)))
            
            # The user is similar enough to David, add items of other users to the recommendation list
            if rms(dists) <= 1:
                #print(r.zrangebyscore(usr_key,4,5))
                
                # Items those are rated by David will have a negative score
                usr_filter = [-1, 1]
                zunionstore_agg_min('rec:david', usr_keys, usr_filter)
                # Filter only items with a score between 4 and 5 out
                print("The following is highly recommended: " + str(r.zrangebyscore('rec:david',4,5, withscores=True)))
                
                
                
    

The following users rated David's items: [b'pieter', b'david']
The rating distance to pieter is [(b'batman', 1.0), (b'superman', -1.0)]
The average distance (RMS) to pieter is 1.0
The following is highly recommended: [(b'wonder_woman', 5.0)]
