In [25]:
import pyspark
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

In [26]:
# load data
data = pd.read_csv('ratings.csv') 
matrix = data
matrix

Unnamed: 0,game_id,user_id,rating
0,1,314,5
1,1,439,3
2,1,588,5
3,1,1169,4
4,1,1185,4
...,...,...,...
981543,10000,48386,5
981544,10000,49007,4
981545,10000,49383,5
981546,10000,50124,5


In [27]:
spark = pyspark.sql.SparkSession.builder.appName('Games').getOrCreate()
spark

In [28]:
# create spark dataframe with header and infer schema
df = spark.read.csv('ratings.csv', header=True, inferSchema=True)

# cast all the columns to int
df = df.select(df.user_id.cast('int'), df.game_id.cast('int'), df.rating.cast('int'))

In [43]:
# build test and train data
(train, test) = df.randomSplit([0.8, 0.2], seed = 42)

In [44]:
# build the recommendation model using ALS on the training data
als = ALS(maxIter=5, regParam=0.01, userCol='user_id', itemCol='game_id', ratingCol='rating')

In [45]:
# fit the model to the training data
model = als.fit(train)

In [46]:
# evaluate the model by computing the RMSE on the test data
predictions = model.transform(test)
evaluator = RegressionEvaluator(metricName='rmse', labelCol='rating', predictionCol='prediction')

In [47]:
# get input user id
id_input = int(input('Enter user id: '))

Enter user id: 10


In [48]:
# make a single prediction
single_user = test.filter(test['user_id']==id_input).select(['game_id', 'user_id'])
single_user.show()

+-------+-------+
|game_id|user_id|
+-------+-------+
|   1506|     10|
|   3409|     10|
|   5084|     10|
|   5926|     10|
+-------+-------+



In [49]:
# recommend top 5 games for the user
recommendations = model.transform(single_user)
print('recommendations')
recommendations.orderBy('prediction', ascending=False).show(n=5)

recommendations
+-------+-------+----------+
|game_id|user_id|prediction|
+-------+-------+----------+
|   3409|     10| 4.2762156|
|   5926|     10|   4.07368|
|   1506|     10| 3.7054896|
|   5084|     10| 3.6437006|
+-------+-------+----------+



In [50]:
# list of game_id recommended to user 
game_id = recommendations.orderBy('prediction', ascending=False).select('game_id').collect()
recomendation_score = recommendations.orderBy('prediction', ascending=False).select('prediction').collect()

In [51]:
games = pd.read_csv('games.csv')
games

Unnamed: 0,game_id,name,release_date,summary,meta_score
0,1,The Legend of Zelda: Ocarina of Time,23-Nov-98,"As a young boy, Link is tricked by Ganondorf, ...",99
1,2,Tony Hawk's Pro Skater 2,20-Sep-00,As most major publishers' development efforts ...,98
2,3,Grand Theft Auto IV,29-Apr-08,[Metacritic's 2008 PS3 Game of the Year; Also ...,98
3,4,SoulCalibur,8-Sep-99,"This is a tale of souls and swords, transcendi...",98
4,5,Super Mario Galaxy,12-Nov-07,[Metacritic's 2007 Wii Game of the Year] The u...,97
...,...,...,...,...,...
9995,9996,Buku Sudoku,28-May-08,[Xbox Live Arcade] Experience the excitement o...,60
9996,9997,Guilty Gear Dust Strikers,25-Apr-06,"The no holds barred, sci-fi fighting franchise...",60
9997,9998,The Shoot,19-Oct-10,The Shoot is a first-person oriented shooter g...,60
9998,9999,Spectra (2015),10-Jul-15,Spectra is a twitch racing game. Blast your ni...,60


In [52]:
# print name game with game_id
for i in range(len(game_id)):
    game_data = games.loc[games['game_id'] == game_id[i][0]]
    print(i+1 , ':', 'game:' ,game_data['name'].values[0], '  score:', recomendation_score[i][0])

1 : game: Guild Wars: Eye of the North   score: 4.276215553283691
2 : game: NFL GameDay 2001   score: 4.0736799240112305
3 : game: Super Mario Advance   score: 3.7054896354675293
4 : game: Crimson Sea 2   score: 3.64370059967041
