![title](https://databricks-training.s3.amazonaws.com/img/matrix_factorization.png)

In [None]:
from pyspark.ml.feature import StringIndexer
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

In [None]:
events = (sqlContext.read.csv('hdfs://hdfs-mesos/data.csv', sep=';', inferSchema=True)
    .withColumnRenamed('_c0', 'time')
    .withColumnRenamed('_c1', 'item')
    .withColumnRenamed('_c2', 'user'))
events.take(5)

In [None]:
user_items = events.groupBy('user', 'item').count().cache()
user_items.take(5)

In [None]:
user_indexer = StringIndexer(inputCol="user", outputCol="userIdx")
user_items = user_indexer.fit(user_items).transform(user_items)
user_items.take(5)

In [None]:
item_indexer = StringIndexer(inputCol="item", outputCol="itemIdx")
user_items = item_indexer.fit(user_items).transform(user_items)
user_items.take(5)

In [None]:
(training, test) = user_items.randomSplit([0.8, 0.2])
training.take(5)

In [None]:
als = ALS(maxIter=5, regParam=0.01, userCol="userIdx", itemCol="itemIdx", ratingCol="count", implicitPrefs=True)
model = als.fit(training)

In [None]:
predictions = model.transform(test)

In [None]:
predictions.select('user', 'item', 'count', 'prediction').take(10)

In [None]:
evaluator = RegressionEvaluator(metricName="rmse", labelCol="count", predictionCol="prediction")
rmse = evaluator.evaluate(predictions)
print("Root-mean-square error = " + str(rmse))

```¯\_(ツ)_/¯```

In [None]:
test.createOrReplaceTempView("test")
training.select('userIdx').distinct().createOrReplaceTempView("model_users")
training.select('itemIdx').distinct().createOrReplaceTempView("model_items")

In [None]:
test2 = spark.sql("SELECT user, item, count, test.userIdx as userIdx, test.itemIdx as itemIdx \
                FROM test \
                JOIN model_users ON test.userIdx = model_users.userIdx \
                JOIN model_items ON test.itemIdx = model_items.itemIdx")

In [None]:
test2.show()

In [None]:
predictions2 = model.transform(test2)

In [None]:
rmse = evaluator.evaluate(predictions2)
print("Root-mean-square error = " + str(rmse))

```(•̀ᴗ•́)و ̑̑```