In [4]:
import numpy as np
import pandas as pd
import pyspark
import unittest
from mmlspark.evaluate import *
from pyspark.ml.tuning import *
from pyspark.sql.types import *

In [5]:
def create_sample_data():
    spark = pyspark.sql.SparkSession.builder.master("local[*]") \
        .config('spark.driver.extraClassPath',
                "/home/dciborow/mmlspark2/BuildArtifacts/packages/m2/com/microsoft/ml/spark/mmlspark_2.11/0.0/mmlspark_2.11-0.0.jar") \
        .getOrCreate()
    # Synthesize some testing data.
    rating_pred = pd.DataFrame({
        'customerID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4],
        'itemID': [1, 2, 3, 4, 5, 2, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5],
        'rating': [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        'timeStamp': [d.strftime('%Y%m%d') for d in pd.date_range('2018-01-01', '2018-01-21')]
    })
    rating_true = pd.DataFrame({
        'customerID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4],
        'itemID': [3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6],
        'rating': [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        'timeStamp': [d.strftime('%Y%m%d') for d in pd.date_range('2018-01-01', '2018-01-21')]
    })
    dfs_pred = spark.createDataFrame(rating_pred)
    dfs_true = spark.createDataFrame(rating_true)
    return dfs_pred, dfs_true, rating_pred, spark


In [8]:
k = 5

In [11]:
'''
Test ranking evaluation methods
'''
dfs_pred, dfs_true, rating_pred, spark = create_sample_data()

# Evaluate ranking metrics.

evaluator_ranking = RankingEvaluation(k, dfs_true, dfs_pred)

recall = evaluator_ranking.recall_at_k()
precision = evaluator_ranking.precision_at_k()
ndcg = evaluator_ranking.ndcg_at_k()
map = evaluator_ranking.map_at_k()

print(recall)
print(precision)
print(ndcg)
print(map)

0.75
0.7500000000000001
0.7113207924233891
0.6075


In [13]:
'''
Test ranking evaluation methods
'''
dfs_pred, dfs_true, rating_pred, spark = create_sample_data()

# Evaluate distribution metrics.

evaluator_distribution = DistributionMetrics(k, dfs_true, dfs_pred)

print(evaluator_distribution.popularity_at_k().head(1))

diversity = evaluator_distribution.diversity_at_k()
max_diversity = evaluator_distribution.max_diversity()

print(diversity)
print(max_diversity)

+---+------+---------+
| id|itemID|binNumber|
+---+------+---------+
|  1|     1|        0|
|  2|     2|        1|
|  3|     3|        1|
|  4|     4|        1|
|  5|     5|        2|
|  6|     6|        2|
|  7|     7|        2|
+---+------+---------+

None
[Row(binNumber=0, itemCounts=2, percentage=10.0, lower='0.0', upper='33.33')]
1.0
1.0


In [14]:
'''
Test ranking evaluation methods
'''
dfs_pred, dfs_true, rating_pred, spark = create_sample_data()

# Evaluate rating metrics.

rating_pred['rating'] = rating_pred['rating'] + np.random.normal(0, 1, rating_pred.shape[0])

dfs_pred = spark.createDataFrame(rating_pred)

evaluator_rating = RatingEvaluation(dfs_true, dfs_pred)

rsquared = evaluator_rating.rsquared
exp_var = evaluator_rating.exp_var
mae = evaluator_rating.mae
rmse = evaluator_rating.rmse

print(rsquared)
print(exp_var)
print(mae)
print(rmse)

-0.11670578237612461
0.09729363532479457
0.7528206481819291
0.9648624624924145
