# Tugas 4 Big Data - Recommendation System

dataset = 

https://www.kaggle.com/dahlia25/metacritic-video-game-comments/downloads/metacritic-video-game-comments.zip/1#metacritic_game_user_comments.csv (Metacritic Video Game Comments)

### 1.Spark Initialization

In [1]:
import findspark
findspark.init()

In [3]:
from pyspark.sql import SparkSession

# Create Spark Session
spark = SparkSession \
    .builder \
    .appName("Python Spark Recommendation System Example") \
    .getOrCreate()

print(spark)

<pyspark.sql.session.SparkSession object at 0x000002707E605A90>


In [4]:
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS
import pandas as pd

### 2. Load Data

In [43]:
df3 = spark.read.csv("D:/KULIAH/SMT-6_BIG-DATA/dataset/result0.txt", header=None,inferSchema=True)

In [44]:
df3.schema

StructType(List(StructField(_c0,IntegerType,true),StructField(_c1,StringType,true),StructField(_c2,StringType,true),StructField(_c3,IntegerType,true),StructField(_c4,StringType,true),StructField(_c5,StringType,true)))

In [45]:
df3.show()

+---+--------------------+----------+---+--------------------+---------------+
|_c0|                 _c1|       _c2|_c3|                 _c4|            _c5|
+---+--------------------+----------+---+--------------------+---------------+
|  0|The Legend of Zel...|Nintendo64| 10|Everything in OoT...|     SirCaestus|
|  1|The Legend of Zel...|Nintendo64| 10|I won't bore you ...|       Kaistlin|
|  2|The Legend of Zel...|Nintendo64| 10|Anyone who gives ...|         Jacody|
|  3|The Legend of Zel...|Nintendo64| 10|I'm one of those ...|     doodlerman|
|  4|The Legend of Zel...|Nintendo64| 10| This game is the...|        StevenA|
|  5|The Legend of Zel...|Nintendo64| 10|I think it's funn...|       joei1382|
|  6|The Legend of Zel...|Nintendo64|  9|I played A Link T...|         Corvix|
|  7|The Legend of Zel...|Nintendo64| 10|"The Legend of Ze...|          Malon|
|  8|The Legend of Zel...|Nintendo64| 10|            This ...|      Nosidda89|
|  9|The Legend of Zel...|Nintendo64| 10|I'm not kid

In [46]:
df3 = df3.selectExpr("_c1 as Title" , "_c3 as Userscore", "_c5 as Username")

In [47]:
df3 = df3.select(df3.Username,
                   df3.Title,
                   df3.Userscore)
df3.show()
df3.printSchema()
df3.createOrReplaceTempView("ratingsdf")

+---------------+--------------------+---------+
|       Username|               Title|Userscore|
+---------------+--------------------+---------+
|     SirCaestus|The Legend of Zel...|       10|
|       Kaistlin|The Legend of Zel...|       10|
|         Jacody|The Legend of Zel...|       10|
|     doodlerman|The Legend of Zel...|       10|
|        StevenA|The Legend of Zel...|       10|
|       joei1382|The Legend of Zel...|       10|
|         Corvix|The Legend of Zel...|        9|
|          Malon|The Legend of Zel...|       10|
|      Nosidda89|The Legend of Zel...|       10|
| Regeneration13|The Legend of Zel...|       10|
|KendylKlownfish|The Legend of Zel...|       10|
|       Boffboff|The Legend of Zel...|       10|
|          Bolts|The Legend of Zel...|       10|
| TheJokerJulian|The Legend of Zel...|       10|
|      velasco92|The Legend of Zel...|       10|
|       JordanI.|The Legend of Zel...|       10|
|         brentl|The Legend of Zel...|       10|
|   Dankiller127|The

In [9]:
# since user col need to be an int value, we need to change our 'string' ids to int
from pyspark.ml.feature import IndexToString, StringIndexer

In [48]:
stringindexer = StringIndexer(inputCol='Username',outputCol='UserId')
stringindexer.setHandleInvalid("keep")
model = stringindexer.fit(df3)
indexed = model.transform(df3)

In [49]:
stringindexer_item = StringIndexer(inputCol='Title',outputCol='GameId')
stringindexer_item.setHandleInvalid("keep") 
model = stringindexer_item.fit(indexed)
indexed = model.transform(indexed)

In [50]:
indexed.show()

+---------------+--------------------+---------+-------+------+
|       Username|               Title|Userscore| UserId|GameId|
+---------------+--------------------+---------+-------+------+
|     SirCaestus|The Legend of Zel...|       10| 4754.0|  32.0|
|       Kaistlin|The Legend of Zel...|       10|45530.0|  32.0|
|         Jacody|The Legend of Zel...|       10|39494.0|  32.0|
|     doodlerman|The Legend of Zel...|       10|   87.0|  32.0|
|        StevenA|The Legend of Zel...|       10| 2371.0|  32.0|
|       joei1382|The Legend of Zel...|       10|45113.0|  32.0|
|         Corvix|The Legend of Zel...|        9|31509.0|  32.0|
|          Malon|The Legend of Zel...|       10|51811.0|  32.0|
|      Nosidda89|The Legend of Zel...|       10| 1021.0|  32.0|
| Regeneration13|The Legend of Zel...|       10|21816.0|  32.0|
|KendylKlownfish|The Legend of Zel...|       10|   72.0|  32.0|
|       Boffboff|The Legend of Zel...|       10|40193.0|  32.0|
|          Bolts|The Legend of Zel...|  

In [60]:
df3= indexed.select(indexed.Username,
                     indexed.UserId,
                   indexed.Title,
                    indexed.GameId,
                   indexed.Userscore.cast("int"))
df3.show()


+---------------+-------+--------------------+------+---------+
|       Username| UserId|               Title|GameId|Userscore|
+---------------+-------+--------------------+------+---------+
|     SirCaestus| 4754.0|The Legend of Zel...|  32.0|       10|
|       Kaistlin|45530.0|The Legend of Zel...|  32.0|       10|
|         Jacody|39494.0|The Legend of Zel...|  32.0|       10|
|     doodlerman|   87.0|The Legend of Zel...|  32.0|       10|
|        StevenA| 2371.0|The Legend of Zel...|  32.0|       10|
|       joei1382|45113.0|The Legend of Zel...|  32.0|       10|
|         Corvix|31509.0|The Legend of Zel...|  32.0|        9|
|          Malon|51811.0|The Legend of Zel...|  32.0|       10|
|      Nosidda89| 1021.0|The Legend of Zel...|  32.0|       10|
| Regeneration13|21816.0|The Legend of Zel...|  32.0|       10|
|KendylKlownfish|   72.0|The Legend of Zel...|  32.0|       10|
|       Boffboff|40193.0|The Legend of Zel...|  32.0|       10|
|          Bolts| 3870.0|The Legend of Z

### 3. Create Model

In [61]:
(training, test) = df3.randomSplit([0.8, 0.2])

In [62]:
# Build the recommendation model using ALS on the training data
# Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
als = ALS(maxIter=5, regParam=0.01, userCol="UserId", itemCol="GameId", ratingCol="Userscore",
          coldStartStrategy="drop")
model = als.fit(training)

In [63]:
model = als.fit(training)

In [78]:
temp = temp.select(temp.GameId,temp.Title)

In [64]:
# Evaluate the model by computing the RMSE on the test data
predictions = model.transform(test)
evaluator = RegressionEvaluator(metricName="rmse", labelCol="Userscore",
                                predictionCol="prediction")
rmse = evaluator.evaluate(predictions)
print("Root-mean-square error = " + str(rmse))

Root-mean-square error = 9.332089442555532


In [65]:
predictions.show()

+---------------+-------+--------------------+------+---------+-----------+
|       Username| UserId|               Title|GameId|Userscore| prediction|
+---------------+-------+--------------------+------+---------+-----------+
|        Ronivan|12028.0|Age of Empires II...| 148.0|        3|  0.9109781|
|       davinci2|16367.0|Age of Empires II...| 148.0|       10|  4.0817137|
|    Schlafanzug| 1963.0|Age of Empires II...| 148.0|       10|   25.56472|
|         JeffC.| 1607.0|Age of Empires II...| 148.0|        0|  10.695454|
|  ColombiaGames| 1292.0|Age of Empires II...| 148.0|       10|   4.767411|
|KendylKlownfish|   72.0|Yoshi's Island: S...| 243.0|       10| -24.126621|
|         SeanN.|16639.0|Yoshi's Island: S...| 243.0|       10|  2.9645362|
|      thethethe| 2661.0|Yoshi's Island: S...| 243.0|       10| -12.894782|
|       anggadaz|  842.0|           Undertale|  31.0|       10|  3.8102298|
|      BabyRants| 3279.0|           Undertale|  31.0|       10|   6.213803|
|        Ale

In [66]:
# Generate top 10 item recommendations for each user
userRecs = model.recommendForAllUsers(10)
# Generate top 10 user recommendations for each item
itemRecs = model.recommendForAllItems(10)

In [67]:
userRecs.show()

+------+--------------------+
|UserId|     recommendations|
+------+--------------------+
|   148|[[273, 214.55751]...|
|   463|[[217, 118.46659]...|
|   471|[[250, 106.10432]...|
|   496|[[250, 102.92329]...|
|   833|[[221, 96.35108],...|
|  1088|[[254, 48.998947]...|
|  1238|[[260, 65.84913],...|
|  1342|[[250, 115.5149],...|
|  1580|[[197, 38.38764],...|
|  1591|[[289, 66.12353],...|
|  1645|[[270, 98.28488],...|
|  1829|[[258, 66.87807],...|
|  1959|[[258, 24.66218],...|
|  2122|[[260, 32.791], [...|
|  2142|[[250, 56.930565]...|
|  2366|[[252, 116.348465...|
|  2659|[[258, 58.340546]...|
|  2866|[[279, 29.6516], ...|
|  3175|[[252, 90.21308],...|
|  3749|[[289, 77.72006],...|
+------+--------------------+
only showing top 20 rows



In [68]:
itemRecs.show()

+------+--------------------+
|GameId|     recommendations|
+------+--------------------+
|   148|[[204, 72.12928],...|
|   243|[[225, 145.07103]...|
|    31|[[472, 38.217567]...|
|   251|[[585, 204.21915]...|
|    85|[[134, 52.024727]...|
|   137|[[324, 99.89302],...|
|    65|[[1053, 64.139206...|
|    53|[[104, 39.629433]...|
|   255|[[154, 211.51372]...|
|   133|[[697, 102.77333]...|
|   296|[[314, 217.24173]...|
|    78|[[1053, 41.194748...|
|   155|[[1053, 75.03219]...|
|   108|[[866, 55.397915]...|
|   211|[[314, 193.25792]...|
|   193|[[247, 89.37335],...|
|    34|[[104, 31.351816]...|
|   101|[[557, 48.90856],...|
|   115|[[247, 61.193962]...|
|   126|[[358, 85.27717],...|
+------+--------------------+
only showing top 20 rows

