This notebook is intended to walk-through the concept of content-based recommendation engines based on the example shown on Analyticas Vidhya: 

https://www.analyticsvidhya.com/blog/2015/08/beginners-guide-learn-content-based-recommender-systems/

In [None]:
import pandas as pd
import numpy as np

## Build Item Profile

- Items: articles

- Features: topics in articles

In [None]:
articles = pd.DataFrame({
    'big_data': [1,0,0,0,0,1],
    'r': [0,1,0,0,1,0],
    'python': [1,1,0,1,0,0],
    'machine_learning': [0,1,1,1,0,1],
    'learning_paths': [1,0,1,0,0,0]
})
articles = articles[['big_data', 'r', 'python', 'machine_learning', 'learning_paths']]
articles

Note: Since we have a binary matrix, using the 1+log(f(f,d)) approach would give the same matrix as above, so the next step is to normalize

In [None]:
total_attributes = articles.sum(axis=1)
pd.DataFrame({'total_attributes':total_attributes})

In [None]:
articles_normalized = pd.DataFrame()
for i, row in articles.iterrows():
    articles_normalized = articles_normalized.append(row/np.sqrt(total_attributes[i]))

In [None]:
articles_normalized = articles_normalized[['big_data', 'r', 'python', 'machine_learning', 'learning_paths']]
articles_normalized

In [None]:
df = articles.sum(axis=0)
idf = np.log10(len(articles)/df)

In [None]:
df

In [None]:
idf

## Build User Profile

In [None]:
input_1 = input('Do you like article 1:')
input_2 = input('Do you like article 2:')
input_3 = input('Do you like article 3:')
input_4 = input('Do you like article 4:')
input_5 = input('Do you like article 5:')
input_6 = input('Do you like article 6:')

In [None]:
user = [int(input_1), int(input_2), int(input_3), 
        int(input_4), int(input_5), int(input_6)]
user

In [None]:
user_profile = np.dot(articles_normalized.T, user)
user_profile

## Predictions

In [None]:
import operator
import functools

def sumproduct(*lists):
    return sum(functools.reduce(operator.mul, data) for data in zip(*lists))

In [None]:
articles_normalized

In [None]:
articles_normalized.iloc[4]

In [None]:
if sumproduct(articles_normalized.iloc[4,:], user_profile, idf) < 0:
    print("don't recommend")
else:
    print("recommend")