In [None]:
#########################################################################################################################################
## Copyright (c) 2016 - Technicolor R&D France
## 
## The source code form of this Open Source Project components is subject to the terms of the Clear BSD license.
##
## You can redistribute it and/or modify it under the terms of the Clear BSD License (http://directory.fsf.org/wiki/License:ClearBSD)
##
## See LICENSE file for more details.
##
## This software project does also include third party Open Source Software: See data/LICENSE file for more details.
#########################################################################################################################################

# Initial Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from numpy import random
import TAUITM.data_construction
import TAUITM.identification
%pylab inline

# Data Loading

Read the list of movies contained in MovieTweeting.

In [None]:
movies,names,mid_to_pos = TAUITM.data_construction.read_movies('data/movies.dat')

Read the ratings of MovieTweeting. The variable ratings is a dictionary of the ratings of users referenced by their user id. A rating is in the form [movie,rating_value,timestamp].

In [None]:
ratings = TAUITM.data_construction.read_ratings_MT('data/ratings.dat')

Create two set of accounts, a small one and a big one.

In [None]:
household_size=2 # Number of members in composite households.
min_rating=100 # Minimum number of ratings for an user to.
close=True # True to pair users actives during nearby time periods.
time_period='week' # Periodicity used, week or day or absolute.
min_time_distance=60*60 # Number of seconds 
composite=TAUITM.data_construction.generate_composite_users(ratings,movies,household_size,min_ratings=min_rating,remove_unknown=True,close=close,time=time_period,seed=0,min_time_distance=min_time_distance)
composite_small=TAUITM.data_construction.generate_composite_users(ratings,movies,household_size,min_ratings=min_rating,remove_unknown=True,close=close,big=False,time=time_period,seed=0,min_time_distance=min_time_distance)
accounts=composite
composite_all=composite+composite_small

# Visualize the first accounts

In [None]:
def set_color(p):
    """ Choose the colors which will be used in the plot """
    return (0,p,1-p)

def set_shape(i):
    """ Choose the shapes which will be used in the plot """
    possible_shape={0:'o',1:'s',2:'*'}
    return possible_shape[i]

In [None]:
# Get the absolute times for display
composite_absolute=TAUITM.data_construction.generate_composite_users(ratings,movies,household_size,min_ratings=min_rating,remove_unknown=True,close=close,time='absolute',seed=0,min_time_distance=min_time_distance)

n=10

scale=1.
plt.figure(num=None, figsize=(15, 10), dpi=80, facecolor='w', edgecolor='k')
for u in range(n):
    for event in composite_absolute[u][1]:
        plt.scatter(event[2],(0.5+u)*scale+((0.7*random.random()+0.8*event[1])*scale/(2.2)),color=set_color(event[1]),marker=set_shape(event[1]))
        plt.axhline(y=u+1.4,c='black')
        
plt.xlabel('Consumption time (hours since epoch)',fontsize=20)
plt.ylabel('Account #',fontsize=20)
plt.axis([None, None, 0.4, u+1.4])
plt.yticks(range(1,n+1))
plt.show()

# Apply the different identification algorithms

In [None]:
verbose=False # Put to False to avoid all displays

Baseline: Assign all movies to the first user

In [None]:
predictions_one=TAUITM.identification.one_assignement(composite,movies,household_size)

Algorithm of Kabutoya et al.

In [None]:
Y_kab,perf_kab,predictions_kab,params_kab=TAUITM.identification.em_kabutoya(composite_all,movies,mid_to_pos,composite,verbose=verbose,Nb_it=200,alpha=0.1,beta=0.1,gamma=0.1)

Improvement of Kabutoya algorithm using Von Mises instead of pseudo-Gaussians.

In [None]:
Y_VM,perf_VM,predictions_VM,param_VM=TAUITM.identification.em_ITVM(composite_all,movies,mid_to_pos,composite,verbose=verbose,Nb_it=200,alpha=0.1,beta=0.1,gamma=0.1,K=50)

Algorithm develloped in the paper: Time Aware User Identification With Topic Models

In [None]:
# If you have plenty of memory you can use the first one which is slightly faster, else use the second which is less memory intensive
Y_TAUITM,perf_TAUITM,predictions_TAUITM,params_TAUITM=TAUITM.identification.em_TAUITM(composite_all,movies,mid_to_pos,composite,verbose=verbose,Nb_it=200,tau=0.1,rho=0.1,beta=0.1,K=50,R=10,household_size=2,alpha=0.1,gamma=0.1,Genre_weight=0)
#perf_TAUITM,predictions_TAUITM,params_TAUITM=TAUITM.identification.em_TAUITM_memory(composite_all,movies,mid_to_pos,composite,verbose=verbose,Nb_it=200,tau=0.1,rho=0.1,beta=0.1,K=50,R=10,household_size=2,alpha=0.1,gamma=0.1,Genre_weight=0)

# Results

Plot all metrics

In [None]:
to_be_evaluated={'IPG':predictions_kab,'ITVM':predictions_VM,'TAUITM':predictions_TAUITM,'One':predictions_one}
shape=[':','-','--','-.']
color=['b','r','g','c','m','y','k']

stats = TAUITM.performance.stats(composite,to_be_evaluated,household_size,cdf=True,measure="similarity",shape=shape,color=color)
stats_purity=TAUITM.performance.stats(composite,to_be_evaluated,household_size,cdf=True,measure="purity",shape=shape,color=color)
stats_mi=TAUITM.performance.stats(composite,to_be_evaluated,household_size,cdf=True,measure="ami",shape=shape,color=color)
stats_arand=TAUITM.performance.stats(composite,to_be_evaluated,household_size,cdf=True,measure="arand",shape=shape,color=color)


Display values for one particular metric

In [None]:
for name, values in stats.iteritems():
#for name, values in stats_purity.iteritems():
#for name, values in stats_mi.iteritems():
#for name, values in stats_arand.iteritems():
    print name
    for k,v in values.iteritems():
        print k, v
    print

# Visualizations

Show the item topics

In [None]:
Phi=params_TAUITM['Phi'] # Choose the experiment to display
k=10 # Get the k more reprensentative films by topics
for i,phi in enumerate(Phi):
    print "Topic",i
    for j in np.argsort(phi)[::-1][0:k]:
        print names[j], phi[j]
    print


Show the time topics.

In [None]:
def time_topic_emission(t,Gamma,r,T):
    floor_t=floor(t)
    ceil_t=ceil(t)
    if floor_t==ceil_t: # Handle the particular case where we have an integer
        ceil_t+=1
    prop=t-floor_t
    
    return (1-prop)*Gamma[r,int(floor_t)]+prop*Gamma[r,int(ceil_t)%T]
day_to_string=["Mon.","Tue.","Wed.","Thu.","Fri.","Sat.","Sun."]

In [None]:
plt.figure(num=None, figsize=(10, 10), facecolor='w', edgecolor='k')  
ax = plt.subplot( projection='polar')
for r in range(params_TAUITM['R']): # Time topic to be shown 
    all_t=np.arange(0,params_TAUITM['T'],0.01)
    PE=array([time_topic_emission(t,params_TAUITM['Gamma'],r,params_TAUITM['T']) for t in all_t])
    ax.plot(np.pi/2-all_t*(2*np.pi/params_TAUITM['T']),0.20+PE,label=r,linewidth=4)
    
tick_values=range(0,24*7,6)
plt.xticks(np.pi/2-array(tick_values)*(2*np.pi/params_TAUITM['T']), [(day_to_string[v/24]+" 0" if not(v%24) else v%24) for v in tick_values], rotation='vertical')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.yticks([0.20,0.25,0.30,0.35],[0,0.05,0.10,0.15])
plt.show()

Look at a particular account

In [None]:
def active_user(t,u,Psi,Pi,Gamma,T):
    # Knowing the time, predict the propability that one user is active.
    if t.is_integer(): # Handle the particular case where t is an integer
        floor_t=t
        ceil_t=t+1
    else:
        floor_t=floor(t)
        ceil_t=ceil(t)

    PV=Psi[u]*(Pi[u][:,:]*((1-(t-floor_t))*Gamma[None,:,int(floor_t)]+(1-(ceil_t-t))*Gamma[None,:,int(ceil_t)%T])).sum(axis=1)
    return PV


In [None]:
u=np.random.randint(len(composite)) # User
all_t=np.arange(0,params_TAUITM['T'],0.01)
AU=array([active_user(t,u,params_TAUITM['Psi'],params_TAUITM['Pi'],params_TAUITM['Gamma'],params_TAUITM['T']) for t in all_t])
scale=AU.max()

figure(num=None, figsize=(15, 10), dpi=300, facecolor='w', edgecolor='k')        
plt.plot(all_t,AU[:,0],c=(0., 0., 1.),linewidth=3)
plt.plot(all_t,AU[:,1],c=(0., 1., 0.),linewidth=3)
for event,pred in zip(composite[u][1],predictions_TAUITM[u][1]):
    plt.scatter(event[2],scale+((random.random()+1.2*event[1])*scale/(2.2)),color=set_color(pred[1]),marker=set_shape(event[1]))
    #plt.scatter(event[2],scale+((random.random()+2.4)*scale/(2.2)),color='black',marker='^')
    
tick_values=range(0,24*7,6)
plt.xticks(tick_values, [(day_to_string[v/24]+" 0" if not(v%24) else v%24) for v in tick_values], rotation='vertical')
xlim([0, 24*7]) 
ylim([0, 2.1*scale]) 
plt.show()