In [1]:
%reload_ext autoreload
%reload_ext blackcellmagic
%autoreload 2
%config InlineBackend.figure_format='retina'
%config InlineBackend.figure_format='svg'

In [2]:
import os
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
import re

import sys

sys.path.extend(["../bpr", "../"])

from train_simple import train_bpr, RandomSampler
from functools import partial
from kutils import (
    load_amazon_electronics,
    load_citeulike,
    load_mind,
    load_ml100k,
    load_ml1m,
    load_tripadvisor,
    load_yelp,
    trim
)

In [3]:

def print_stats(df):
    n_interactions = len(df)
    n_items = df["item_id"].nunique()
    n_users = df["user_id"].nunique()
    density = 100 * n_interactions / (n_users * n_items)

    print(f"{'Interactions':<15}: {n_interactions:>10,}")
    print(f"{'Users':<15}: {n_users:>10,}")
    print(f"{'Items':<15}: {n_items:>10,}")
    print(f"{'Density':<15}: {density:>10.4f}")


loaders = {
    "amazon": load_amazon_electronics,
#     "citeulike": load_citeulike,
    "mind": load_mind,
    "ml-1m": load_ml1m,
    "ml100k": load_ml100k,
    "tripadvisor": load_tripadvisor,
    "yelp": load_yelp
}
paths = {
    "amazon": "../data/amazon/amazon-electronics.csv",
#     "citeulike": "../data/citeulike/mult.dat",
    "mind": "../data/mind/behaviors.tsv",
    "ml-1m": "../data/ml-1m/ratings.dat",
    "ml100k": "../data/ml-100k/u.data",
    "tripadvisor": "../data/tripadvisor/tripadvisor.csv",
    "yelp": "../data/yelp/yelp.csv",
}

In [4]:
for dname, path in paths.items():
    try:
        print(dname.upper())
        df = loaders[dname](path)
        subset = trim(df, copy=False)
        print_stats(subset)
        print()
    except KeyError:
        print('No loader for %s' % dname)

AMAZON
Interactions   :  1,089,763
Users          :     63,175
Items          :    193,120
Density        :     0.0089

MIND
Interactions   :    798,330
Users          :     26,938
Items          :     31,846
Density        :     0.0931

ML-1M
Interactions   :  1,000,209
Users          :      6,040
Items          :      3,706
Density        :     4.4684

ML100K
Interactions   :    100,000
Users          :        943
Items          :      1,682
Density        :     6.3047

TRIPADVISOR
Interactions   :      3,312
Users          :        278
Items          :      1,141
Density        :     1.0441

YELP
Interactions   :     44,022
Users          :      2,398
Items          :      8,147
Density        :     0.2253

