# Recommender system example

In [6]:
%matplotlib inline

import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.externals import joblib
import numpy as np
import recommenders
import evaluation


# Load music data

In [8]:
triplets_file = 'https://static.turi.com/datasets/millionsong/10000.txt'
songs_metadata_file = 'https://static.turi.com/datasets/millionsong/song_data.csv'

# Read user_id, song_id, listen_-count
song_df_1 = pd.read_table(triplets_file, header=None)
song_df_1.columns = ['user_id', 'song_id', 'listen_count']

# Read song metadata
song_df_2 = pd.read_csv(songs_metadata_file)

# Merge both files 
song_df = pd.merge(
    song_df_1,
    song_df_2.drop_duplicates(['song_id']),
    on='song_id',
    how='left'
)


# Create subset and combine columns

In [14]:
song_df = song_df.head(10000)

song_df['song'] = song_df['title'].map(str) + ' - ' + song_df['artist_name']

# Get unique users and songs

In [23]:
users = song_df['user_id'].unique()
songs = song_df['song'].unique()

# Split data into training and testing

In [26]:
train_data, test_data = train_test_split(song_df, test_size=.20, random_state=0)

# Simple popularity-based recommender

In [27]:
pm = recommenders.popularity_recommender_py()
pm.create(train_data, 'user_id', 'song')

# Make popularity-based prediction

In [29]:
# It will get the same results no matter which user is used

user_id = users[-1]
pm.recommend(user_id)

Unnamed: 0,user_id,song,score,Rank
3194,15cc706a7f24975ca831aaaf297bf0392746b3fe,Sehr kosmisch - Harmonia,37,1.0
4083,15cc706a7f24975ca831aaaf297bf0392746b3fe,Undo - Björk,27,2.0
931,15cc706a7f24975ca831aaaf297bf0392746b3fe,Dog Days Are Over (Radio Edit) - Florence + Th...,24,3.0
4443,15cc706a7f24975ca831aaaf297bf0392746b3fe,You're The One - Dwight Yoakam,24,4.0
3034,15cc706a7f24975ca831aaaf297bf0392746b3fe,Revelry - Kings Of Leon,21,5.0
3189,15cc706a7f24975ca831aaaf297bf0392746b3fe,Secrets - OneRepublic,21,6.0
4112,15cc706a7f24975ca831aaaf297bf0392746b3fe,Use Somebody - Kings Of Leon,21,7.0
1207,15cc706a7f24975ca831aaaf297bf0392746b3fe,Fireflies - Charttraxx Karaoke,20,8.0
1577,15cc706a7f24975ca831aaaf297bf0392746b3fe,Hey_ Soul Sister - Train,19,9.0
1626,15cc706a7f24975ca831aaaf297bf0392746b3fe,Horn Concerto No. 4 in E flat K495: II. Romanc...,19,10.0


# Personalised recommender

In [30]:
pm = recommenders.item_similarity_recommender_py()
pm.create(train_data, 'user_id', 'song')

# Create recommendations for different users

In [None]:
# User 5

user_id = users[5]
user_items = pm.get_user_items(user_id)

print("------------------------------------------------------------------------------------")
print("Training data songs for the user userid: %s:" % user_id)
print("------------------------------------------------------------------------------------")

for user_item in user_items:
    print(user_item)
    
print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

#Recommend songs for the user using personalized model
pm.recommend(user_id)

------------------------------------------------------------------------------------
Training data songs for the user userid: 4bd88bfb25263a75bbdd467e74018f4ae570e5df:
------------------------------------------------------------------------------------
Just Lose It - Eminem
Without Me - Eminem
16 Candles - The Crests
Speechless - Lady GaGa
Push It - Salt-N-Pepa
Ghosts 'n' Stuff (Original Instrumental Mix) - Deadmau5
Say My Name - Destiny's Child
My Dad's Gone Crazy - Eminem / Hailie Jade
The Real Slim Shady - Eminem
Somebody To Love - Justin Bieber
Forgive Me - Leona Lewis
Missing You - John Waite
Ya Nada Queda - Kudai
----------------------------------------------------------------------
Recommendation process going on:
----------------------------------------------------------------------
No. of unique songs for the user: 13
no. of unique songs in the training set: 4483
