In [34]:
## Import necessary packages
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

In [92]:
## Read dataset and adjust first column
## Replace NaN's with 0 for now
ratings = pd.read_csv("test.csv",index_col=0)
ratings = ratings.fillna(0)
ratings

Unnamed: 0,Football,Rugby,Gaelic Football,Walking,Aerobics,Ice Hockey
user 1,4,5,5,2,1,2
user 2,5,4,5,2,2,4
user 3,4,5,3,3,2,2
user 4,1,2,2,4,5,1
user 5,4,3,3,1,2,3
user 6,4,5,1,2,3,3
user 7,1,2,2,5,4,4
user 8,1,3,2,4,3,3
user 9,5,1,4,3,1,2
user 10,2,2,2,5,5,1


In [93]:
## Method which standardises data values to be more
## accurately represent a user's opinion.
def standardise(row):
    new_row = (row - row.mean()) / (row.max() - row.min())
    return new_row

## Apply standardise method to new table
ratings_std = ratings.apply(standardise)

## Create item similarity matrix and use transposed table 
## since we want similarity between items which need to be in rows.
item_similarity = cosine_similarity(ratings_std.T)
print(item_similarity)

[[ 1.          0.39834096  0.63859905 -0.78484311 -0.80573512  0.15461289]
 [ 0.39834096  1.          0.17582418 -0.56043956 -0.3877551   0.20912144]
 [ 0.63859905  0.17582418  1.         -0.46745562 -0.72527473  0.11260385]
 [-0.78484311 -0.56043956 -0.46745562  1.          0.72527473 -0.26274233]
 [-0.80573512 -0.3877551  -0.72527473  0.72527473  1.         -0.27882859]
 [ 0.15461289  0.20912144  0.11260385 -0.26274233 -0.27882859  1.        ]]


In [94]:
## Create data frame from item similarity matrix
## Values represented as percentages on how similar they are to another item
item_similarity_df = pd.DataFrame(item_similarity,index=ratings.columns,columns=ratings.columns)
item_similarity_df

Unnamed: 0,Football,Rugby,Gaelic Football,Walking,Aerobics,Ice Hockey
Football,1.0,0.398341,0.638599,-0.784843,-0.805735,0.154613
Rugby,0.398341,1.0,0.175824,-0.56044,-0.387755,0.209121
Gaelic Football,0.638599,0.175824,1.0,-0.467456,-0.725275,0.112604
Walking,-0.784843,-0.56044,-0.467456,1.0,0.725275,-0.262742
Aerobics,-0.805735,-0.387755,-0.725275,0.725275,1.0,-0.278829
Ice Hockey,0.154613,0.209121,0.112604,-0.262742,-0.278829,1.0


In [95]:
## Create recommendation method
def get_similar_item(activity_name, user_rating):
    ## Scale movie by user's rating
    similar_score = item_similarity_df[activity_name]*(user_rating-2.5)
    similar_score = similar_score.sort_values(ascending=False)
    
    return similar_score

print(get_similar_item("Football", 2))

Aerobics           0.402868
Walking            0.392422
Ice Hockey        -0.077306
Rugby             -0.199170
Gaelic Football   -0.319300
Football          -0.500000
Name: Football, dtype: float64


In [96]:
ball_games_lover = [("Football",5), ("Rugby", 2), ("Walking", 1)]
similar_activities = pd.DataFrame()
for activity, rating in ball_games_lover:
    similar_activities = similar_activities.append(get_similar_item(activity, rating),ignore_index=True)
    
similar_activities.head()
similar_activities.sum().sort_values(ascending=False)

Football           4.080132
Gaelic Football    2.660318
Rugby              2.030389
Ice Hockey         0.920060
Aerobics          -3.602250
Walking           -3.824745
dtype: float64