In [1]:
from funk_svd.dataset import fetch_ml_ratings
from funk_svd import SVD
import pandas as pd
import random
from rdflib import Graph
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold
from tqdm import tqdm

from viscars.evaluation.metrics.factory import MetricFactory, MetricType
from viscars.recommenders import Recommender

In [2]:
from rdflib.term import Literal, URIRef
from rdflib.namespace import DefinedNamespace, Namespace


class DASHB(DefinedNamespace):
    """
    The Dynamic Dashboard Ontology
    """

    _fail = True

    # Classes
    User: URIRef
    UserGroup: URIRef
    Dashboard: URIRef
    Tab: URIRef
    Widget: URIRef
    Property: URIRef
    Metric: URIRef
    Visualization: URIRef
    ObservationBoundary: URIRef

    # User
    memberOf: URIRef

    # Dashboard
    hasTab: URIRef

    # Tab
    hasWidget: URIRef

    # Widget
    createdBy: URIRef
    hasProperty: URIRef
    visualizedBy: URIRef
    hasObservationBoundary: URIRef

    # Property
    produces: URIRef

    # ObservationBoundary
    hasMinBoundary: Literal
    hasMaxBoundary: Literal
    hasBoundaryLabel: Literal

    _NS = Namespace('http://dynamicdashboard.ilabt.imec.be/broker/ontologies/dashboard#')


In [3]:
graph = Graph()
graph.parse('data/protego/graph.ttl')
graph.parse('data/protego/protego_zplus.ttl')

print(len(graph))

36269


In [4]:
qry = '''
    PREFIX dashb: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/dashboard#>
    PREFIX sosa: <http://www.w3.org/ns/sosa/>
    PREFIX ssn-ext: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/ssn-extension/>

    SELECT ?user ?context ?item WHERE {
        ?sensor ssn-ext:subSystemOf ?context ;
            sosa:observes ?item .
            
        ?widget dashb:hasProperty ?item ;
                dashb:createdBy ?user .                
    }
'''

result = list(graph.query(qry))
users = list(set([str(row.user) for row in result]))
items = list(set([str(row.item) for row in result]))
contexts = list(set([str(row.context) for row in result]))

ratings = pd.DataFrame({
    'u_id': [users.index(str(row.user)) for row in result],
    'i_id': [items.index(str(row.item)) for row in result],
    'rating': len(result) * [1],
    'c_id': [contexts.index(str(row.context)) for row in result]
})
print(ratings.head())

   u_id  i_id  rating  c_id
0     0    22       1     2
1     3    23       1     3
2     0    23       1     3
3     0    45       1     7
4     3     9       1    14


In [5]:
qry = '''
    PREFIX dashb: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/dashboard#>

    SELECT ?user WHERE {
        ?user a dashb:User .
    }
'''

users = pd.DataFrame({'id': [str(row.user) for row in graph.query(qry)]})

In [6]:
qry = '''
    PREFIX dashb: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/dashboard#>
    PREFIX sosa: <http://www.w3.org/ns/sosa/>

    SELECT ?item WHERE {
        ?item a sosa:ObservableProperty .
    }
'''

items = pd.DataFrame({'id': [str(row.item) for row in graph.query(qry)]})

In [7]:
print(len(users), len(items))

4 2826


In [8]:
df = pd.DataFrame({'u_id': [], 'i_id': [], 'rating': []})

for u_id, row in users.iterrows():
    uid = row['id']
    for i_id, row in items.iterrows():
        iid = row['id']
        row = {'u_id': [u_id], 'i_id': [i_id]}
        if ((ratings['u_id'] == uid) & (ratings['i_id'] == iid)).any():
            row['rating'] = [1]
        else:
            row['rating'] = [0]

        df = pd.concat([df, pd.DataFrame(row)])

df['u_id'] = df['u_id'].astype(int)
df['i_id'] = df['i_id'].astype(int)

print(len(df))
print(len(df[df['rating'] == 1]))

11304
0


In [9]:
train = ratings.sample(frac=0.8, random_state=7)
val = ratings.drop(train.index.tolist()).sample(frac=0.5, random_state=8)
test = ratings.drop(train.index.tolist()).drop(val.index.tolist())

In [10]:
svd = SVD(lr=0.001, reg=0.005, n_epochs=100, n_factors=15, 
          early_stopping=True, shuffle=False, min_rating=0, max_rating=1)

svd.fit(X=train, X_val=val)

pred = svd.predict(test)
mae = mean_absolute_error(test['rating'], pred)

print(f'Test MAE: {mae:.2f}')

Preprocessing data...

Preprocessing data...

Epoch 1/100  | val_loss: 0.00 - val_rmse: 0.00 - val_mae: 0.00 - took 0.8 sec
Epoch 2/100  | val_loss: 0.00 - val_rmse: 0.00 - val_mae: 0.00 - took 0.0 sec

Training took 1 sec
Test MAE: 0.00


In [39]:
class MatrixFactorization(Recommender):
    def __init__(self, graph: Graph, verbose=False):
        super().__init__(graph, verbose)

    def _build_model(self):
        qry = '''
            PREFIX dashb: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/dashboard#>
            PREFIX sosa: <http://www.w3.org/ns/sosa/>
            PREFIX ssn-ext: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/ssn-extension/>

            SELECT ?user ?context ?item WHERE {
                ?sensor ssn-ext:subSystemOf ?context ;
                    sosa:observes ?item .

                ?widget dashb:hasProperty ?item ;
                        dashb:createdBy ?user .                
            }
        '''

        result = list(self.graph.query(qry))
        self.users = list(set([str(row.user) for row in result]))
        self.items = list(set([str(row.item) for row in result]))
        self.contexts = list(set([str(row.context) for row in result]))
        
        model = SVD(lr=0.001, reg=0.005, n_epochs=100, n_factors=15, 
                    early_stopping=True, shuffle=False, min_rating=0, max_rating=1)
        self.model = model
        
    def fit(self, ratings: pd.DataFrame):        
        train = ratings.sample(frac=0.8, random_state=7)
        val = ratings.drop(train.index.tolist())
        
        self.model.fit(X=train, X_val=val)

    def predict(self, uid, cid, *kwargs):   
        # Get all items for context
        qry = f'''
            PREFIX dashb: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/dashboard#>
            PREFIX sosa: <http://www.w3.org/ns/sosa/>
            PREFIX ssn-ext: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/ssn-extension/>

            SELECT ?property WHERE {{
                ?sensor ssn-ext:subSystemOf <{self.contexts[cid]}> ;
                    sosa:observes ?property .
            }}
        '''
        items = [str(row.property) for row in self.graph.query(qry)]
        
        scores = {}
        for iid in items:
            if iid in self.items:
                i_id = self.items.index(iid)
            else:
                i_id = len(self.items) + items.index(iid)
            
            df_ = pd.DataFrame({'u_id': [u_id], 'i_id': [i_id]})
            scores[i_id] = self.model.predict(df_)[0]

        recommendations = [{'contextId': cid, 'itemId': item, 'score': score} for item, score in scores.items()]
        random.seed()
        random.shuffle(recommendations)
        return sorted(recommendations, key=lambda n: n['score'], reverse=True)

    def top_n(self, uid: [], cid: [], n: int, **kwargs):
        pass

In [40]:
# Context metadata
qry = '''
    PREFIX dashb: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/dashboard#>
    PREFIX sosa: <http://www.w3.org/ns/sosa/>
    PREFIX ssn-ext: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/ssn-extension/>

    SELECT ?context WHERE {
        ?sensor ssn-ext:subSystemOf ?context ;
            sosa:observes ?property .
    }
'''
context_metadata = {'id': []}

result = graph.query(qry)
for row in result:
    context_metadata.get('id').append(row[0])
context_metadata_df = pd.DataFrame.from_dict(context_metadata)

# User metadata
qry = '''
    PREFIX dashb: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/dashboard#>

    SELECT ?user ?username ?role WHERE {
        ?user dashb:memberOf ?role .
    }
'''
user_metadata = {'id': [], 'type': []}

result = graph.query(qry)
for row in result:
    user_metadata.get('id').append(row[0])
    user_metadata.get('type').append(row[1])
user_metadata_df = pd.DataFrame.from_dict(user_metadata)

# Item metadata
qry = '''
    PREFIX dashb: <http://dynamicdashboard.ilabt.imec.be/broker/ontologies/dashboard#>
    PREFIX sosa: <http://www.w3.org/ns/sosa/>

    SELECT ?property WHERE {
        ?property a sosa:ObservableProperty .
    }
'''
item_metadata = {'id': []}

result = graph.query(qry)
for row in result:
    item_metadata.get('id').append(row[0])
item_metadata_df = pd.DataFrame.from_dict(item_metadata)


def build_subgraph_from_ratings(ratings: pd.DataFrame) -> Graph:
    sub_graph = Graph()

    for idx, row in context_metadata_df.iterrows():
        cid = row['id']
        sub_graph += graph.triples((cid, None, None))
        sub_graph += graph.triples((None, None, cid))

    for idx, row in user_metadata_df.iterrows():
        uid = row['id']
        sub_graph += graph.triples((uid, DASHB.memberOf, None))

    for idx, row in item_metadata_df.iterrows():
        iid = row['id']
        sub_graph += graph.triples((iid, None, None))
        sub_graph += graph.triples((None, None, iid))

    for idx, row in ratings.iterrows():
        uid = row['u_id']
        iid = row['i_id']
        cid = row['c_id']

        sub_graph += graph.triples((uid, None, None))
        sub_graph += graph.triples((None, None, uid))
        sub_graph += graph.triples((iid, None, None))
        sub_graph += graph.triples((None, None, iid))

        sub_graph += graph.triples((cid, None, None))
        sub_graph += graph.triples((None, None, cid))

    return sub_graph

In [47]:
class KFoldCrossValidation():

    def __init__(self, recommender: Recommender, metrics: [], k=5):
        """
        :param project_id: ID of the project (to load the correct data).
        :param recommender: Recommender
        :param metrics: List of Metrics
        :param k: Number of folds
        """
        self.recommender = recommender
        self.metrics = metrics
        self.k = k

    def evaluate(self, ratings, **kwargs):
        kf = KFold(n_splits=self.k, shuffle=True)

        n_fold = 0

        result = {'folds': [], 'result': {}}
        for train_idx, test_idx in kf.split(ratings):
            train = ratings.iloc[train_idx]
            test = ratings.iloc[test_idx]
            
            self.recommender.fit(train)

            fold_scores = {}

            for uid in tqdm(test['u_id'].unique()):
                df_user = test.loc[test['u_id'] == uid]

                for cid in df_user['c_id']:
                    predictions = self.recommender.predict(uid, cid, **kwargs)
                    recommendations = [r['itemId'] for r in predictions]

                    truth = []
                    t_user = test.loc[test['u_id'] == uid]
                    for idx, row in t_user.iterrows():
                        if row['c_id'] == cid:
                            truth.append(row['i_id'])
                    # truth = list(test.loc[test['user'] == uid].loc[test['context'] == cid]['item'])
                    print(predictions)
                    print(recommendations)
                    print(truth)
                    
                    for metric in self.metrics:
                        if str(metric) not in fold_scores.keys():
                            fold_scores[str(metric)] = []
                        score = metric.calculate(recommendations, truth)
                        fold_scores[str(metric)].append(score)

            result_for_fold = {}
            for metric, scores in fold_scores.items():
                avg = sum(scores) / len(scores)
                result_for_fold[metric] = avg

                if metric not in result['result'].keys():
                    result['result'][metric] = []
                result['result'][metric].append(avg)
            result['folds'].append(result_for_fold)

            n_fold += 1

        final_results = {}
        for metric_type, score in result['result'].items():
            final_results[metric_type] = sum(score) / len(score)

        result['result'] = final_results
        return result

In [48]:
metric_factory = MetricFactory()

metrics = ['f1@1', 'ndcg@1', 'ndcg@3']
parsed_metrics = []
for metric in metrics:
    m_split = metric.split('@')
    m_type = m_split[0]
    n = int(m_split[1]) if len(m_split) >= 2 else None

    metric_ = metric_factory.get(MetricType.reverse_lookup(m_type), n)
    parsed_metrics.append(metric_)

In [None]:
recommender = MatrixFactorization(graph)

evaluator = KFoldCrossValidation(recommender, metrics=parsed_metrics, k=5)
result = evaluator.evaluate(ratings)

for fold in result['folds']:
    print(fold)
print(result['result'])

Preprocessing data...

Preprocessing data...

Epoch 1/100  | val_loss: 0.00 - val_rmse: 0.00 - val_mae: 0.00 - took 0.0 sec
Epoch 2/100  | val_loss: 0.00 - val_rmse: 0.00 - val_mae: 0.00 - took 0.0 sec

Training took 0 sec


100%|█████████████████████████████████████████████| 3/3 [00:00<00:00, 37.18it/s]


[{'contextId': 2, 'itemId': 49, 'score': 1}, {'contextId': 2, 'itemId': 48, 'score': 1}, {'contextId': 2, 'itemId': 55, 'score': 1}, {'contextId': 2, 'itemId': 53, 'score': 1}, {'contextId': 2, 'itemId': 56, 'score': 1}, {'contextId': 2, 'itemId': 52, 'score': 1}, {'contextId': 2, 'itemId': 22, 'score': 1}, {'contextId': 2, 'itemId': 26, 'score': 1}, {'contextId': 2, 'itemId': 51, 'score': 1}, {'contextId': 2, 'itemId': 54, 'score': 1}]
[49, 48, 55, 53, 56, 52, 22, 26, 51, 54]
[22]
[{'contextId': 7, 'itemId': 50, 'score': 1}, {'contextId': 7, 'itemId': 54, 'score': 1}, {'contextId': 7, 'itemId': 2, 'score': 1}, {'contextId': 7, 'itemId': 53, 'score': 1}, {'contextId': 7, 'itemId': 56, 'score': 1}, {'contextId': 7, 'itemId': 37, 'score': 1}, {'contextId': 7, 'itemId': 55, 'score': 1}, {'contextId': 7, 'itemId': 49, 'score': 1}, {'contextId': 7, 'itemId': 45, 'score': 0.9846470105683822}, {'contextId': 7, 'itemId': 1, 'score': 0.9530662735724774}]
[50, 54, 2, 53, 56, 37, 55, 49, 45, 1]
[

100%|█████████████████████████████████████████████| 4/4 [00:00<00:00, 44.08it/s]


[{'contextId': 10, 'itemId': 50, 'score': 1}, {'contextId': 10, 'itemId': 54, 'score': 1}, {'contextId': 10, 'itemId': 16, 'score': 1}, {'contextId': 10, 'itemId': 48, 'score': 1}, {'contextId': 10, 'itemId': 12, 'score': 1}, {'contextId': 10, 'itemId': 53, 'score': 1}, {'contextId': 10, 'itemId': 49, 'score': 1}, {'contextId': 10, 'itemId': 55, 'score': 1}, {'contextId': 10, 'itemId': 56, 'score': 1}, {'contextId': 10, 'itemId': 17, 'score': 0.9733051371212954}]
[50, 54, 16, 48, 12, 53, 49, 55, 56, 17]
[12]
[{'contextId': 13, 'itemId': 55, 'score': 1}, {'contextId': 13, 'itemId': 54, 'score': 1}, {'contextId': 13, 'itemId': 49, 'score': 1}, {'contextId': 13, 'itemId': 52, 'score': 1}, {'contextId': 13, 'itemId': 3, 'score': 1}, {'contextId': 13, 'itemId': 53, 'score': 1}, {'contextId': 13, 'itemId': 51, 'score': 1}, {'contextId': 13, 'itemId': 40, 'score': 1}, {'contextId': 13, 'itemId': 56, 'score': 1}, {'contextId': 13, 'itemId': 33, 'score': 0.9800663877967742}]
[55, 54, 49, 52, 3,

  0%|                                                     | 0/3 [00:00<?, ?it/s]

[{'contextId': 3, 'itemId': 53, 'score': 1}, {'contextId': 3, 'itemId': 36, 'score': 1}, {'contextId': 3, 'itemId': 49, 'score': 1}, {'contextId': 3, 'itemId': 55, 'score': 1}, {'contextId': 3, 'itemId': 54, 'score': 1}, {'contextId': 3, 'itemId': 50, 'score': 1}, {'contextId': 3, 'itemId': 52, 'score': 1}, {'contextId': 3, 'itemId': 56, 'score': 1}, {'contextId': 3, 'itemId': 51, 'score': 1}, {'contextId': 3, 'itemId': 23, 'score': 0.9862147405048413}]
[53, 36, 49, 55, 54, 50, 52, 56, 51, 23]
[23]
[{'contextId': 6, 'itemId': 10, 'score': 1}, {'contextId': 6, 'itemId': 11, 'score': 1}, {'contextId': 6, 'itemId': 56, 'score': 1}, {'contextId': 6, 'itemId': 49, 'score': 1}, {'contextId': 6, 'itemId': 48, 'score': 1}, {'contextId': 6, 'itemId': 53, 'score': 1}, {'contextId': 6, 'itemId': 50, 'score': 1}, {'contextId': 6, 'itemId': 51, 'score': 1}, {'contextId': 6, 'itemId': 52, 'score': 1}, {'contextId': 6, 'itemId': 55, 'score': 1}]
[10, 11, 56, 49, 48, 53, 50, 51, 52, 55]
[10]


100%|█████████████████████████████████████████████| 3/3 [00:00<00:00, 35.17it/s]


[{'contextId': 17, 'itemId': 52, 'score': 1}, {'contextId': 17, 'itemId': 48, 'score': 1}, {'contextId': 17, 'itemId': 55, 'score': 1}, {'contextId': 17, 'itemId': 56, 'score': 1}, {'contextId': 17, 'itemId': 50, 'score': 1}, {'contextId': 17, 'itemId': 49, 'score': 1}, {'contextId': 17, 'itemId': 53, 'score': 1}, {'contextId': 17, 'itemId': 8, 'score': 1}, {'contextId': 17, 'itemId': 51, 'score': 1}, {'contextId': 17, 'itemId': 19, 'score': 1}]
[52, 48, 55, 56, 50, 49, 53, 8, 51, 19]
[19]
[{'contextId': 7, 'itemId': 55, 'score': 1}, {'contextId': 7, 'itemId': 53, 'score': 1}, {'contextId': 7, 'itemId': 50, 'score': 1}, {'contextId': 7, 'itemId': 56, 'score': 1}, {'contextId': 7, 'itemId': 45, 'score': 1}, {'contextId': 7, 'itemId': 1, 'score': 1}, {'contextId': 7, 'itemId': 49, 'score': 1}, {'contextId': 7, 'itemId': 54, 'score': 1}, {'contextId': 7, 'itemId': 2, 'score': 1}, {'contextId': 7, 'itemId': 37, 'score': 0.9309748382380031}]
[55, 53, 50, 56, 45, 1, 49, 54, 2, 37]
[1]
[{'con

  0%|                                                     | 0/4 [00:00<?, ?it/s]

[{'contextId': 3, 'itemId': 36, 'score': 1}, {'contextId': 3, 'itemId': 53, 'score': 0.9999911469462187}, {'contextId': 3, 'itemId': 49, 'score': 0.9999911469462187}, {'contextId': 3, 'itemId': 55, 'score': 0.9999911469462187}, {'contextId': 3, 'itemId': 56, 'score': 0.9999911469462187}, {'contextId': 3, 'itemId': 54, 'score': 0.9999911469462187}, {'contextId': 3, 'itemId': 52, 'score': 0.9999911469462187}, {'contextId': 3, 'itemId': 51, 'score': 0.9999911469462187}, {'contextId': 3, 'itemId': 50, 'score': 0.9999911469462187}, {'contextId': 3, 'itemId': 23, 'score': 0.9946963757854176}]
[36, 53, 49, 55, 56, 54, 52, 51, 50, 23]
[23]


100%|█████████████████████████████████████████████| 4/4 [00:00<00:00, 46.17it/s]


[{'contextId': 14, 'itemId': 24, 'score': 0.9999911469462187}, {'contextId': 14, 'itemId': 50, 'score': 0.9999911469462187}, {'contextId': 14, 'itemId': 52, 'score': 0.9999911469462187}, {'contextId': 14, 'itemId': 51, 'score': 0.9999911469462187}, {'contextId': 14, 'itemId': 54, 'score': 0.9999911469462187}, {'contextId': 14, 'itemId': 49, 'score': 0.9999911469462187}, {'contextId': 14, 'itemId': 56, 'score': 0.9999911469462187}, {'contextId': 14, 'itemId': 53, 'score': 0.9999911469462187}, {'contextId': 14, 'itemId': 55, 'score': 0.9999911469462187}, {'contextId': 14, 'itemId': 9, 'score': 0.9796813394400548}]
[24, 50, 52, 51, 54, 49, 56, 53, 55, 9]
[24]
[{'contextId': 4, 'itemId': 50, 'score': 0.9999911469462187}, {'contextId': 4, 'itemId': 53, 'score': 0.9999911469462187}, {'contextId': 4, 'itemId': 55, 'score': 0.9999911469462187}, {'contextId': 4, 'itemId': 56, 'score': 0.9999911469462187}, {'contextId': 4, 'itemId': 54, 'score': 0.9999911469462187}, {'contextId': 4, 'itemId': 38

  0%|                                                     | 0/4 [00:00<?, ?it/s]

[{'contextId': 12, 'itemId': 18, 'score': 1}, {'contextId': 12, 'itemId': 50, 'score': 1}, {'contextId': 12, 'itemId': 54, 'score': 1}, {'contextId': 12, 'itemId': 34, 'score': 1}, {'contextId': 12, 'itemId': 53, 'score': 1}, {'contextId': 12, 'itemId': 48, 'score': 1}, {'contextId': 12, 'itemId': 49, 'score': 1}, {'contextId': 12, 'itemId': 56, 'score': 1}, {'contextId': 12, 'itemId': 55, 'score': 1}, {'contextId': 12, 'itemId': 31, 'score': 0.9694261026973692}]
[18, 50, 54, 34, 53, 48, 49, 56, 55, 31]
[34]
[{'contextId': 0, 'itemId': 50, 'score': 1}, {'contextId': 0, 'itemId': 51, 'score': 1}, {'contextId': 0, 'itemId': 56, 'score': 1}, {'contextId': 0, 'itemId': 49, 'score': 1}, {'contextId': 0, 'itemId': 44, 'score': 1}, {'contextId': 0, 'itemId': 55, 'score': 1}, {'contextId': 0, 'itemId': 54, 'score': 1}, {'contextId': 0, 'itemId': 53, 'score': 1}, {'contextId': 0, 'itemId': 52, 'score': 1}, {'contextId': 0, 'itemId': 43, 'score': 0.9438403322727204}]
[50, 51, 56, 49, 44, 55, 54,

100%|█████████████████████████████████████████████| 4/4 [00:00<00:00, 50.94it/s]

[{'contextId': 5, 'itemId': 55, 'score': 1}, {'contextId': 5, 'itemId': 52, 'score': 1}, {'contextId': 5, 'itemId': 54, 'score': 1}, {'contextId': 5, 'itemId': 21, 'score': 1}, {'contextId': 5, 'itemId': 49, 'score': 1}, {'contextId': 5, 'itemId': 50, 'score': 1}, {'contextId': 5, 'itemId': 32, 'score': 1}, {'contextId': 5, 'itemId': 51, 'score': 1}, {'contextId': 5, 'itemId': 56, 'score': 1}, {'contextId': 5, 'itemId': 53, 'score': 1}]
[55, 52, 54, 21, 49, 50, 32, 51, 56, 53]
[32]
[{'contextId': 3, 'itemId': 56, 'score': 1}, {'contextId': 3, 'itemId': 55, 'score': 1}, {'contextId': 3, 'itemId': 50, 'score': 1}, {'contextId': 3, 'itemId': 23, 'score': 1}, {'contextId': 3, 'itemId': 54, 'score': 1}, {'contextId': 3, 'itemId': 36, 'score': 1}, {'contextId': 3, 'itemId': 53, 'score': 1}, {'contextId': 3, 'itemId': 51, 'score': 1}, {'contextId': 3, 'itemId': 49, 'score': 1}, {'contextId': 3, 'itemId': 52, 'score': 1}]
[56, 55, 50, 23, 54, 36, 53, 51, 49, 52]
[36]
[{'contextId': 17, 'itemId




In [45]:
class LeaveOneOutCrossValidation():

    def __init__(self, recommender: Recommender, metrics: []):
        """
        :param recommender: Recommender
        :param metrics: List of Metrics
        :param k: Number of folds
        """
        self.recommender = recommender
        self.metrics = metrics

    def _split_ratings(self, ratings, cid):
        train = ratings[ratings['c_id'] != cid]
        test = ratings[ratings['c_id'] == cid]

        return train, test

    def evaluate(self, ratings, **kwargs):
        result = {'folds': [], 'result': {}}
        
        for cid in ratings['c_id'].unique():
            train, test = self._split_ratings(ratings, cid)

            self.recommender.fit(train)

            fold_scores = {}

            df_context = test.loc[test['c_id'] == cid]

            for uid in df_context['u_id']:
                predictions = self.recommender.predict(uid, cid, **kwargs)
                recommendations = [r['itemId'] for r in predictions]

                truth = []
                t_context = test.loc[test['c_id'] == cid]
                for idx, row in t_context.iterrows():
                    if row['u_id'] == uid:
                        truth.append(row['i_id'])
                #truth = list(test.loc[test['user'] == uid].loc[test['context'] == cid]['item'])
                print(predictions)
                print(recommendations)
                print(truth)

                for metric in self.metrics:
                    if str(metric) not in fold_scores.keys():
                        fold_scores[str(metric)] = []
                    score = metric.calculate(recommendations, truth)
                    fold_scores[str(metric)].append(score)

            result_for_fold = {'cid': cid}
            for metric_type, scores in fold_scores.items():
                avg = sum(scores) / len(scores)
                result_for_fold[metric_type] = avg

                if metric_type not in result['result'].keys():
                    result['result'][metric_type] = []
                result['result'][metric_type].append(avg)
            result['folds'].append(result_for_fold)

        final_results = {}
        for metric_type, score in result['result'].items():
            final_results[metric_type] = sum(score) / len(score)

        result['result'] = final_results
        return result

In [46]:
recommender = MatrixFactorization(graph)

evaluator = LeaveOneOutCrossValidation(recommender, metrics=parsed_metrics)
result = evaluator.evaluate(ratings)

for fold in result['folds']:
    print(fold)
print(result['result'])

Preprocessing data...

Preprocessing data...

Epoch 1/100  | val_loss: 0.00 - val_rmse: 0.00 - val_mae: 0.00 - took 0.0 sec
Epoch 2/100  | val_loss: 0.00 - val_rmse: 0.00 - val_mae: 0.00 - took 0.0 sec

Training took 0 sec
[{'contextId': 2, 'itemId': 51, 'score': 0.9998883430554173}, {'contextId': 2, 'itemId': 22, 'score': 0.9998883430554173}, {'contextId': 2, 'itemId': 52, 'score': 0.9998883430554173}, {'contextId': 2, 'itemId': 48, 'score': 0.9998883430554173}, {'contextId': 2, 'itemId': 26, 'score': 0.9998883430554173}, {'contextId': 2, 'itemId': 56, 'score': 0.9998883430554173}, {'contextId': 2, 'itemId': 49, 'score': 0.9998883430554173}, {'contextId': 2, 'itemId': 54, 'score': 0.9998883430554173}, {'contextId': 2, 'itemId': 55, 'score': 0.9998883430554173}, {'contextId': 2, 'itemId': 53, 'score': 0.9998883430554173}]
[51, 22, 52, 48, 26, 56, 49, 54, 55, 53]
[22, 26]
[{'contextId': 2, 'itemId': 52, 'score': 0.9998883430554173}, {'contextId': 2, 'itemId': 56, 'score': 0.999888343055