In [1]:
%pip install turicreate

Collecting turicreate
  Downloading turicreate-6.4.1-cp38-cp38-manylinux1_x86_64.whl (92.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting numpy
  Downloading numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting numba<0.51.0
  Downloading numba-0.50.1-cp38-cp38-manylinux2014_x86_64.whl (3.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting resampy==0.2.1
  Downloading resampy-0.2.1.tar.gz (322 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.4/322.4 KB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting prettytable=

In [2]:
import turicreate as tc
import pandas as pd
import numpy as np
import pickle

## Load testing data an train data

In [3]:
train_df = pd.read_feather("../data/MOOCCubeX/train_ratings.feather")
test_df = pd.read_feather("../data/MOOCCubeX/test_ratings.feather")

In [4]:
train_df.head()

Unnamed: 0,user,course,rating
0,U_2452,C_697684,1.805788
1,U_6286,C_1886678,0.96225
2,U_11731,C_1997524,2.192645
3,U_24067,C_1646044,2.540003
4,U_24067,C_2316355,3.810004


In [5]:
test_df.head()

Unnamed: 0,user,course,rating
0,U_2452,C_2059297,1.091089
1,U_6286,C_1842789,4.264014
2,U_11731,C_1811739,1.336306
3,U_24067,C_948101,0.762493
4,U_24277,C_1732839,0.816497


## Train CF Model

In [12]:
train_data = tc.load_sframe(train_df)  # Data needs to be converted to SFrame format

In [14]:
def create_model_by_field(field_name, model_name):
    model = None
    if model_name == "SVG":
        model = tc.ranking_factorization_recommender.create(train_data, user_id='user', item_id='course', target=field_name)
        model = model.recommend(users=train_df["user"].unique().tolist(), k = 5).to_dataframe()
        
    else:
        model = tc.factorization_recommender.create(train_data, user_id='user', item_id='course', target=field_name)
        model = model.recommend(users=train_df["user"].unique().tolist(), k = 5).to_dataframe()        
    return model

In [15]:
svg_model = create_model_by_field("rating", "SVG")
mf_model = create_model_by_field("rating", "MF")

## Evaluation

In [16]:
def rank_to_array(recommendations_dataframe, test_dataframe):
    final_array = []
    len_courses = []
    for name, group in recommendations_dataframe.groupby("user"):
        courses_list = test_dataframe[test_dataframe["user"] == name]["course"].to_list()
        temp_array = []
        for course in group["course"].to_list():
            temp_array.append(int(course in courses_list))
        final_array.append(temp_array)
        if len(courses_list) >= 5:
          len_courses.append(5)
        else:
          len_courses.append(len(courses_list))
    return final_array , len_courses

In [18]:
def precision_at_k(r, k):
    """Score is precision @ k

    Relevance is binary (nonzero is relevant).

    >>> r = [0, 0, 1]
    >>> precision_at_k(r, 1)
    0.0
    >>> precision_at_k(r, 2)
    0.0
    >>> precision_at_k(r, 3)
    0.33333333333333331
    >>> precision_at_k(r, 4)
    Traceback (most recent call last):
        File "<stdin>", line 1, in ?
    ValueError: Relevance score length < k


    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)

    Returns:
        Precision @ k

    Raises:
        ValueError: len(r) must be >= k
    """
    assert k >= 1
    r = np.asarray(r)[:k] != 0
    if r.size != k:
        raise ValueError('Relevance score length < k')
    return np.mean(r)

In [19]:
def mean_precision_at_k(r, k):
    precision = 0.0
    for i in range(0,len(r)):
      precision += precision_at_k(r[i],k[i])
    return precision / len(k)

In [20]:
def custom_mean_precision(r,k):
  precision = 0.0
  for i in range(0,len(r)):
    precision += np.sum(r[i]) / k[i]
  return precision / len(k)

### Evaluation

In [21]:
final_array , len_courses = rank_to_array(svg_model, test_df)
print("custom ",custom_mean_precision(final_array , len_courses))
print("At_k ",mean_precision_at_k(final_array , len_courses))

custom  0.02798734485276223
At_k  0.0034071550255536627


In [23]:
final_array , len_courses = rank_to_array(mf_model, test_df)
print("custom ",custom_mean_precision(final_array , len_courses))
print("At_k ",mean_precision_at_k(final_array , len_courses))

custom  0.0029204185933317107
At_k  0.0002433682161109759
