# SVD: Distance Matrix Job

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../../src')

import numpy as np
import pandas as pd

import logging
import warnings

from surprise import SVD

import api
import model as ml
import util as ut
from logger import LoggerBuilder
from domain_context import DomainContext 

In [3]:
TEMP_PATH               = '../../temp'
INPUT_INTERACTIONS_PATH = f'{TEMP_PATH}/input_interations.csv'
HOST                    = 'http://localhost:8000'
API_TOKEN               = 'e3ff025094fe0ee474501bbeda0a2a44e80230c1'
DOWNLOAD                = True
UNRATED_USER_ITEMS      = True
COMPLETE_RATING_MATRIX  = True

### Setup

In [4]:
!mkdir -p {TEMP_PATH}

In [5]:
LoggerBuilder.build()

In [6]:
warnings.filterwarnings('ignore')

In [7]:
ctx = DomainContext(API_TOKEN, HOST)

### Get interactions

In [8]:
interactions = pd.DataFrame.from_records(ctx.interaction_repository.find())
interactions.to_csv(INPUT_INTERACTIONS_PATH, encoding='utf-8', index=False, header=False)

2022-12-05 19:07:20,787 [INFO] - Page 1/2 - Interactions 100000/147655
2022-12-05 19:07:23,133 [INFO] - Page 2/2 - Interactions 147655/147655
2022-12-05 19:07:23,133 [INFO] - 147655 Total Interactions 


In [9]:
# ctx.interaction_service.plot_n_users_by_item(interactions)

### Resolve unrated user items

In [10]:
user_item = ctx.interaction_service.unrated_distinct_user_item(interactions)

In [11]:
len(user_item)

### Train Model

In [15]:
model_manager  = ml.ModelManager(
    file_path  = INPUT_INTERACTIONS_PATH,
    model      = SVD()
)
model_manager.train()

model_manager.predict_inplase(user_item)

complete_interactions = ut.concat(interactions, user_item)

2022-12-05 19:11:03,248 [INFO] Prediction progress: 100%


In [16]:
rating_matrix = ctx.interaction_service.to_matrix(complete_interactions)

2022-12-05 19:14:44,219 [INFO] Processing: 99%


In [17]:
ut.Picket.save(f'{TEMP_PATH}/rating_matrix', rating_matrix)

In [None]:
# sns.heatmap(rating_matrix.toarray())