In [4]:
from pyspark.sql import SparkSession

spark=SparkSession\
.builder\
.appName('Use of implicit collaborative filtering for band Recommendation System')\
.getOrCreate()

rawData=spark.read\
.format('csv')\
.option('delimiter','\t')\
.option('header','true')\
.load("/home/titan/dataset/ML_JRVI/lastfm/user_artists.dat")

In [5]:
rawData.toPandas().head(3)

Unnamed: 0,userID,artistID,weight
0,2,51,13883
1,2,52,11690
2,2,53,11351


the weights col. shows that the number of time a particular artist id  has been listened by the user id

In [6]:
from pyspark.sql.functions import col
dataset=rawData.select(col('userID').cast('float'),
                      col('artistID').cast('float'),
                      col('weight').cast('float'),
                      )

dataset.toPandas().head()

Unnamed: 0,userID,artistID,weight
0,2.0,51.0,13883.0
1,2.0,52.0,11690.0
2,2.0,53.0,11351.0
3,2.0,54.0,10300.0
4,2.0,55.0,8983.0


In [7]:
dataset.select('weight').describe().toPandas()

Unnamed: 0,summary,weight
0,count,92834.0
1,mean,745.2439300256372
2,stddev,3751.32208038768
3,min,1.0
4,max,352698.0


In [10]:
from pyspark.sql.functions import stddev, mean,col

data=dataset.select(mean('weight').alias('mean_weight'),
                   stddev('weight').alias('stddev_weight'))\
                    .crossJoin(dataset).withColumn('weight_scaled',
                            (col('weight')-col('mean_weight'))/col('stddev_weight'))
data.toPandas().head(3)

Unnamed: 0,mean_weight,stddev_weight,userID,artistID,weight,weight_scaled
0,745.24393,3751.32208,2.0,51.0,13883.0,3.502167
1,745.24393,3751.32208,2.0,52.0,11690.0,2.917573
2,745.24393,3751.32208,2.0,53.0,11351.0,2.827205


In [11]:
(trainingData,testData)=data.randomSplit([0.8,0.2])

In [12]:
from pyspark.ml.recommendation import ALS

als=ALS(maxIter=5,
       regParam=0.1,
       userCol='userID',
       itemCol='artistID',
       implicitPrefs=True,
       ratingCol='weight_scaled',
       coldStartStrategy='drop')

model=als.fit(trainingData)

In [13]:
predict=model.transform(testData)
predict.toPandas().head()

Unnamed: 0,mean_weight,stddev_weight,userID,artistID,weight,weight_scaled,prediction
0,745.24393,3751.32208,447.0,148.0,173.0,-0.152545,0.0
1,745.24393,3751.32208,850.0,463.0,784.0,0.010331,0.000899
2,745.24393,3751.32208,2055.0,463.0,863.0,0.031391,0.001416
3,745.24393,3751.32208,592.0,463.0,544.0,-0.053646,-0.002151
4,745.24393,3751.32208,1145.0,471.0,129.0,-0.164274,0.005917


In [14]:
predictPandas=predict.select('weight_scaled','prediction').toPandas()
predictPandas.describe()

Unnamed: 0,weight_scaled,prediction
count,16386.0,16386.0
mean,0.010113,0.040408
std,0.873735,0.096568
min,-0.198395,-0.383064
25%,-0.167739,0.0
50%,-0.124288,0.002078
75%,-0.022724,0.033306
max,46.753585,1.028189


In [15]:
artistdata=spark.read\
.format('csv')\
.option('delimiter','\t')\
.option('header','true')\
.load("/home/titan/dataset/ML_JRVI/lastfm/artists.dat")
artistdata.toPandas().head(3)

Unnamed: 0,id,name,url,pictureURL
0,1,MALICE MIZER,http://www.last.fm/music/MALICE+MIZER,http://userserve-ak.last.fm/serve/252/10808.jpg
1,2,Diary of Dreams,http://www.last.fm/music/Diary+of+Dreams,http://userserve-ak.last.fm/serve/252/3052066.jpg
2,3,Carpathian Forest,http://www.last.fm/music/Carpathian+Forest,http://userserve-ak.last.fm/serve/252/40222717...


In [20]:
from pyspark.sql.types import IntegerType

def getRecommendationsForUser(userID, numRecs):
    userDFs=spark.createDataFrame([userID],IntegerType()).toDF('userID')
    userRecs=model.recommendForUserSubset(userDFs,numRecs)
    
    artLists=userRecs.collect()[0].recommendations
#     moviesLists
    artDFs=spark.createDataFrame(artLists)
#     moviesDFs.toPandas()
    recommendedArtists=artistdata.join(artDFs,artistdata.id==artDFs.artistID)\
    .orderBy('rating',ascending=False)\
    .select('name','url','rating')
    
    return recommendedArtists

In [21]:
recommendationForUsers=getRecommendationsForUser(234,3)
recommendationForUsers.toPandas()

Unnamed: 0,name,url,rating
0,Eminem,http://www.last.fm/music/Eminem,0.044459
1,50 Cent,http://www.last.fm/music/50+Cent,0.028677
2,Björk,http://www.last.fm/music/Bj%C3%B6rk,0.026222
