### Cofactor

Liang's extension of Alternating Least Squares Algorithm. [Factorization Meets the Item Embedding: Regularizing Matrix Factorization with Item Co-occurrence](https://dl.acm.org/doi/10.1145/2959100.2959182)

It co-factorizes both user-item interaction matrix and SPPMI matrix(kind of item-item co-occurence matrix) with shared item matrix. It claims that two different matrix reveals different information, thus exploiting both matrix will be helpful.

In [1]:
from buffalo import CFR, CFROption, StreamOptions
from buffalo import aux, log

In [2]:
opt = CFROption().get_default_option() # initialize default Cofactor option
opt                                    # Check buffalo/algo/options.py to see further.

{'evaluation_on_learning': True,
 'compute_loss_on_training': True,
 'early_stopping_rounds': 0,
 'save_best': False,
 'evaluation_period': 1,
 'save_period': 10,
 'random_seed': 0,
 'validation': {},
 'save_factors': False,
 'd': 20,
 'num_iters': 10,
 'num_workers': 1,
 'num_cg_max_iters': 3,
 'cg_tolerance': 1e-10,
 'eps': 1e-10,
 'reg_u': 0.1,
 'reg_i': 0.1,
 'reg_c': 0.1,
 'alpha': 8.0,
 'l': 1.0,
 'optimizer': 'manual_cg',
 'model_path': '',
 'data_opt': {}}

In [3]:
data_opt = StreamOptions().get_default_option()
data_opt.data.sppmi = {"windows": 5, "k": 10}
data_opt.input.main = 'data/ml-1m/stream'
data_opt.input.uid = 'data/ml-1m/uid'
data_opt.input.iid = 'data/ml-1m/iid'
data_opt.data.value_prepro = aux.Option({'name': 'OneBased'})
data_opt.data.path = './2-cfr.h5py'
data_opt.data.internal_data_type = 'matrix'

In [4]:
cofactor = CFR(opt, data_opt=data_opt)

[INFO    ] 2023-01-19 14:15:45 [stream.py:279] Create database from stream data
[INFO    ] 2023-01-19 14:15:45 [stream.py:103] gathering itemids from data/ml-1m/stream...
[INFO    ] 2023-01-19 14:15:45 [stream.py:127] Found 3706 unique itemids
[INFO    ] 2023-01-19 14:15:45 [stream.py:288] Creating working data...
[INFO    ] 2023-01-19 14:15:47 [stream.py:296] Building data part...
[INFO    ] 2023-01-19 14:15:47 [base.py:410] Building compressed triplets for rowwise...
[INFO    ] 2023-01-19 14:15:47 [base.py:411] Preprocessing...
[INFO    ] 2023-01-19 14:15:47 [base.py:414] In-memory Compressing ...
[INFO    ] 2023-01-19 14:15:48 [base.py:294] Load triplet files. Total job files: 11
[INFO    ] 2023-01-19 14:15:48 [base.py:444] Finished
[INFO    ] 2023-01-19 14:15:48 [base.py:410] Building compressed triplets for colwise...
[INFO    ] 2023-01-19 14:15:48 [base.py:411] Preprocessing...
[INFO    ] 2023-01-19 14:15:48 [base.py:414] In-memory Compressing ...
[INFO    ] 2023-01-19 14:15:48 [

In [5]:
cofactor.initialize()

In [6]:
cofactor.train()

[INFO    ] 2023-01-19 14:16:15 [buffered_data.py:72] Set data buffer size as 67108864(minimum required batch size is 251).
[INFO    ] 2023-01-19 14:16:15 [cfr.py:214] Iteration 1: Loss 0.000 Elapsed 0.098 secs
[INFO    ] 2023-01-19 14:16:15 [cfr.py:214] Iteration 2: Loss 0.000 Elapsed 0.095 secs
[INFO    ] 2023-01-19 14:16:15 [cfr.py:214] Iteration 3: Loss 0.000 Elapsed 0.093 secs
[INFO    ] 2023-01-19 14:16:16 [cfr.py:214] Iteration 4: Loss 0.000 Elapsed 0.093 secs
[INFO    ] 2023-01-19 14:16:16 [cfr.py:214] Iteration 5: Loss 0.000 Elapsed 0.091 secs
[INFO    ] 2023-01-19 14:16:16 [cfr.py:214] Iteration 6: Loss 0.000 Elapsed 0.093 secs
[INFO    ] 2023-01-19 14:16:16 [cfr.py:214] Iteration 7: Loss 0.000 Elapsed 0.095 secs
[INFO    ] 2023-01-19 14:16:16 [cfr.py:214] Iteration 8: Loss 0.000 Elapsed 0.095 secs
[INFO    ] 2023-01-19 14:16:16 [cfr.py:214] Iteration 9: Loss 0.000 Elapsed 0.092 secs
[INFO    ] 2023-01-19 14:16:16 [cfr.py:214] Iteration 10: Loss 0.000 Elapsed 0.099 secs


{'train_loss': 0.0}

### Recommendation for users

In [7]:
uids = [str(x) for x in range(61, 70)]
recommendation_result = cofactor.topk_recommendation(uids, topk=3)
for uid, iids in recommendation_result.items():
    print(f"for user {uid}, recommendations are ", f"\nitems {iids}.\n")

for user 61, recommendations are  
items ['Patriot,_The_(2000)', 'Frequency_(2000)', 'Shanghai_Noon_(2000)'].

for user 62, recommendations are  
items ['2001:_A_Space_Odyssey_(1968)', 'Bonnie_and_Clyde_(1967)', 'Close_Encounters_of_the_Third_Kind_(1977)'].

for user 63, recommendations are  
items ['Blair_Witch_Project,_The_(1999)', 'Eyes_Wide_Shut_(1999)', 'Austin_Powers:_The_Spy_Who_Shagged_Me_(1999)'].

for user 64, recommendations are  
items ['Jurassic_Park_(1993)', 'Terminator_2:_Judgment_Day_(1991)', 'Star_Wars:_Episode_VI_-_Return_of_the_Jedi_(1983)'].

for user 65, recommendations are  
items ['Braveheart_(1995)', 'Saving_Private_Ryan_(1998)', 'Patriot,_The_(2000)'].

for user 66, recommendations are  
items ['Jurassic_Park_(1993)', 'Braveheart_(1995)', 'Patriot,_The_(2000)'].

for user 67, recommendations are  
items ['Bridge_on_the_River_Kwai,_The_(1957)', 'To_Kill_a_Mockingbird_(1962)', 'North_by_Northwest_(1959)'].

for user 68, recommendations are  
items ['Being_John_Ma

### Recommendation for users in given pools

In [8]:
pool = ['Rules_of_Engagement_(2000)', 
        'Remember_the_Titans_(2000)', 
        'Skulls,_The_(2000)', 
        '28_Days_(2000)', 
        'Frequency_(2000)', 
        'Gone_in_60_Seconds_(2000)', 
        'What_Lies_Beneath_(2000)', 
        'Reindeer_Games_(2000)', 
        'Final_Destination_(2000)', 
        'Shanghai_Noon_(2000)']
uids = [str(x) for x in range(5)]
recommendation_result = cofactor.topk_recommendation(uids, topk=3, pool=pool)
for uid, iids in recommendation_result.items():
    print(f"for user {uid}, recommendations are ", f"\nitems {iids}.\n")

for user 1, recommendations are  
items ['Frequency_(2000)', 'Remember_the_Titans_(2000)', 'Shanghai_Noon_(2000)'].

for user 2, recommendations are  
items ['Rules_of_Engagement_(2000)', 'Shanghai_Noon_(2000)', 'Remember_the_Titans_(2000)'].

for user 3, recommendations are  
items ['Shanghai_Noon_(2000)', '28_Days_(2000)', 'Gone_in_60_Seconds_(2000)'].

for user 4, recommendations are  
items ['Shanghai_Noon_(2000)', 'Skulls,_The_(2000)', 'Gone_in_60_Seconds_(2000)'].



### Find Most similar items

In [9]:
print('Similar movies to Toy_Story_2_(1999) in similar items')
similar_items = cofactor.most_similar('Toy_Story_2_(1999)', 10)
print(similar_items)
for rank, (movie_name, score) in enumerate(similar_items):
    print(f'{rank + 1:02d}. {score:.3f} {movie_name}')


Similar movies to Toy_Story_2_(1999) in similar items
[("Bug's_Life,_A_(1998)", 0.9336268), ('Toy_Story_(1995)', 0.910489), ('Shakespeare_in_Love_(1998)', 0.8634493), ('Babe_(1995)', 0.8460558), ('Groundhog_Day_(1993)', 0.8172974), ('Being_John_Malkovich_(1999)', 0.80523443), ('Sixth_Sense,_The_(1999)', 0.7978962), ('Galaxy_Quest_(1999)', 0.7975167), ('Election_(1999)', 0.79327524), ('South_Park:_Bigger,_Longer_and_Uncut_(1999)', 0.7679639)]
01. 0.934 Bug's_Life,_A_(1998)
02. 0.910 Toy_Story_(1995)
03. 0.863 Shakespeare_in_Love_(1998)
04. 0.846 Babe_(1995)
05. 0.817 Groundhog_Day_(1993)
06. 0.805 Being_John_Malkovich_(1999)
07. 0.798 Sixth_Sense,_The_(1999)
08. 0.798 Galaxy_Quest_(1999)
09. 0.793 Election_(1999)
10. 0.768 South_Park:_Bigger,_Longer_and_Uncut_(1999)


### Find Most similar items given pool

In [10]:
pool = ['Rules_of_Engagement_(2000)', 
        'Remember_the_Titans_(2000)', 
        'Skulls,_The_(2000)', 
        '28_Days_(2000)', 
        'Frequency_(2000)', 
        'Gone_in_60_Seconds_(2000)', 
        'What_Lies_Beneath_(2000)', 
        'Reindeer_Games_(2000)', 
        'Final_Destination_(2000)', 
        'Shanghai_Noon_(2000)']
similar_items = cofactor.most_similar('Toy_Story_2_(1999)', 5, pool=pool)
for rank, (movie_name, score) in enumerate(similar_items):
    print(f'{rank + 1:02d}. {score:.3f} {movie_name}')

01. 0.371 Gone_in_60_Seconds_(2000)
02. 0.357 28_Days_(2000)
03. 0.335 Frequency_(2000)
04. 0.332 Shanghai_Noon_(2000)
05. 0.283 What_Lies_Beneath_(2000)
06. 0.231 Final_Destination_(2000)
