# Recommendation System

## Instructions 

1. Run all cells
2. The precision index will be printed in the last cell

## Details

1. Cossine Similarity measure
2. User-item Colaborative filtering
3. Outliers are not removed

### Reading dataset to create a list of users and a list of news

In [103]:
import numpy as np
import math
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
import scipy
from random import shuffle


def read_file(filename):
    users = []
    news = []
    with open(filename, 'r') as f:
        data = f.readlines()
        for row in data:
            user, item = row.strip('\n').split('|')
            users.append(user)
            news.append(item)
    return users, news

users, news = read_file('viewed_news.csv')

#### Create a dictionary where the key is the user_id and the value is a list of all news read by the user

In [104]:
def create_dictionary(users, news):
    users_dict = {}
    for user, item in zip(users, news):
        if user not in users_dict:
            users_dict[user] = list()  
        users_dict[user].append(item)    
    return users_dict

users_interests = create_dictionary(users, news)
unique_interests = sorted(set(news))

#### Split news between train and test

In [105]:
users_interests_train = {}
users_interests_test = {}

for user, news_list in users_interests.items():
    size = len(news_list)
    shuffle(news_list)
    nt = int(math.floor(size * (2.0/3.0)))
    users_interests_train[user]=news_list[0:nt]
    users_interests_test[user]=news_list[nt:]

# print users_interests_train['556f426b1700003500aac7f5']
# print users_interests_test['556f426b1700003500aac7f5']


#### Create a matrix of user similarity

In [106]:
def make_user_interest_vector(user_interests):
    return [1 if interest in user_interests else 0 for interest in unique_interests]

index = 0
user_index = {}
matrix_of_interests = []
for user, value in users_interests_train.items():
    user_index[index]=user
    index += 1
    matrix_of_interests.append(value)
    
user_interest_matrix = map(make_user_interest_vector, matrix_of_interests)
user_similarity = cosine_similarity(csr_matrix(user_interest_matrix), dense_output=False)

#### Method to list the most similar users of a given user

In [107]:
def most_similar_users_to(user):
    pairs = []
    
    cx = scipy.sparse.coo_matrix(user_similarity[user,:])
    
    for user_id, similarity in zip(cx.col, cx.data):
        if user_id != user and similarity > 0:
            pairs.append((user_id, similarity))
            
    return sorted(pairs, key=lambda elem: elem[1], reverse=True)

# print(most_similar_users_to(1))

#### User suggestions 

In [108]:
def user_based_suggestions(real_user, k):
    suggestions = {}
    
    user = 0
    for key,v in user_index.items():
        if real_user in v:
            user = key
            
    for user_id, similarity in most_similar_users_to(user):
        index = user_index[user_id]
        news = users_interests_train[index]
        for interest in news:
            if suggestions.has_key(interest):
                suggestions[interest] = suggestions.get(interest) + similarity
            else:
                suggestions[interest] = similarity
                
    suggestions = sorted(suggestions.items(), key=lambda x: x[1], reverse=True)
#     print(len(suggestions))
    
    final_suggestions = []
    index_user = user_index[user]
    for a,_ in suggestions:
        if a not in users_interests_train[index_user]:
              final_suggestions.append(a)
    
#     print(len(final_suggestions))
    return final_suggestions[:k]

# user_based_suggestions('5571a31f1700007a08aac810', 10)

In [112]:
sr = 0.0
for user in users_interests:
    size = len(users_interests_test[user])
    ru = set(user_based_suggestions(user, size))
    tu = set(users_interests_test[user])
    
    pu = len(ru.union(tu))/float(len(tu))
    sr = sr + pu 

In [111]:
print "Indice de precisao geral"
print sr/len(users_interests)

Indice de precisao geral
1.47288693232
