# Construindo um Modelo de Recomendação de produtos

## Atualizar bibliotecas Oracle: ADS e OCI, e instalar a biblioteca open source Surprise

In [None]:
!pip install -U oracle-ads

In [None]:
!pip install -U oci

In [None]:
!pip install -U surprise

## Importando Bibliotecas e Features

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import oci
import logging
import difflib
import random
import tempfile
import ads

from ads.model.generic_model import GenericModel
from ads.common.auth import default_signer
from surprise import Dataset
from surprise import Reader
from surprise import SVD
from surprise.model_selection import cross_validate
from ads.common.oci_logging import OCILogGroup
from oci.data_science.models import ModelConfigurationDetails, InstanceConfiguration, \
                                    FixedSizeScalingPolicy, CategoryLogDetails, LogDetails, \
                                    SingleModelDeploymentConfigurationDetails, CreateModelDeploymentDetails

%matplotlib inline

## Lendo os Dados Do GitHub

In [31]:
ratings_data = pd.read_csv('https://raw.githubusercontent.com/rafaelrdias/Sugestao-de-Produtos---OCI/main/Files/books.csv')
books_metadata = pd.read_csv('https://raw.githubusercontent.com/rafaelrdias/Sugestao-de-Produtos---OCI/main/Files/ratings.csv')
ratings_data.head(10)

Unnamed: 0,id,book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m/2767052.jpg,https://images.gr-assets.com/books/1447303603s/2767052.jpg
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m/3.jpg,https://images.gr-assets.com/books/1474154022s/3.jpg
2,3,41865,41865,3212258,226,316015849,9780316000000.0,Stephenie Meyer,2005.0,Twilight,...,3866839,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m/41865.jpg,https://images.gr-assets.com/books/1361039443s/41865.jpg
3,4,2657,2657,3275794,487,61120081,9780061000000.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3198671,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m/2657.jpg,https://images.gr-assets.com/books/1361975680s/2657.jpg
4,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m/4671.jpg,https://images.gr-assets.com/books/1490528560s/4671.jpg
5,6,11870085,11870085,16827462,226,525478817,9780525000000.0,John Green,2012.0,The Fault in Our Stars,...,2346404,2478609,140739,47994,92723,327550,698471,1311871,https://images.gr-assets.com/books/1360206420m/11870085.jpg,https://images.gr-assets.com/books/1360206420s/11870085.jpg
6,7,5907,5907,1540236,969,618260307,9780618000000.0,J.R.R. Tolkien,1937.0,The Hobbit or There and Back Again,...,2071616,2196809,37653,46023,76784,288649,665635,1119718,https://images.gr-assets.com/books/1372847500m/5907.jpg,https://images.gr-assets.com/books/1372847500s/5907.jpg
7,8,5107,5107,3036731,360,316769177,9780317000000.0,J.D. Salinger,1951.0,The Catcher in the Rye,...,2044241,2120637,44920,109383,185520,455042,661516,709176,https://images.gr-assets.com/books/1398034300m/5107.jpg,https://images.gr-assets.com/books/1398034300s/5107.jpg
8,9,960,960,3338963,311,1416524797,9781417000000.0,Dan Brown,2000.0,Angels & Demons,...,2001311,2078754,25112,77841,145740,458429,716569,680175,https://images.gr-assets.com/books/1303390735m/960.jpg,https://images.gr-assets.com/books/1303390735s/960.jpg
9,10,1885,1885,3060926,3455,679783261,9780680000000.0,Jane Austen,1813.0,Pride and Prejudice,...,2035490,2191465,49152,54700,86485,284852,609755,1155673,https://images.gr-assets.com/books/1320399351m/1885.jpg,https://images.gr-assets.com/books/1320399351s/1885.jpg


In [32]:
ratings_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 23 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10000 non-null  int64  
 1   book_id                    10000 non-null  int64  
 2   best_book_id               10000 non-null  int64  
 3   work_id                    10000 non-null  int64  
 4   books_count                10000 non-null  int64  
 5   isbn                       9300 non-null   object 
 6   isbn13                     9415 non-null   float64
 7   authors                    10000 non-null  object 
 8   original_publication_year  9979 non-null   float64
 9   original_title             9415 non-null   object 
 10  title                      10000 non-null  object 
 11  language_code              8916 non-null   object 
 12  average_rating             10000 non-null  float64
 13  ratings_count              10000 non-null  int6

In [4]:
books_metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 23 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10000 non-null  int64  
 1   book_id                    10000 non-null  int64  
 2   best_book_id               10000 non-null  int64  
 3   work_id                    10000 non-null  int64  
 4   books_count                10000 non-null  int64  
 5   isbn                       9300 non-null   object 
 6   isbn13                     9415 non-null   float64
 7   authors                    10000 non-null  object 
 8   original_publication_year  9979 non-null   float64
 9   original_title             9415 non-null   object 
 10  title                      10000 non-null  object 
 11  language_code              8916 non-null   object 
 12  average_rating             10000 non-null  float64
 13  ratings_count              10000 non-null  int6

## Criando o Dataset especificamente para o Surprise

In [5]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_data[['user_id', 'book_id', 'rating']], reader)

## Cross-Validating uma Amostra para SVD Model

In [6]:
svd = SVD(verbose=True, n_epochs=10)
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8568  0.8571  0.8551  0.8563  0.0009  
MAE (testset)     0.6757  0.6756  0.6748  0.6754  0.0004  
Fit time          4.58    4.97    4.95    4.84    0.18    
Test time         2.84    2.83    3.00    2.89    0.08    


{'test_rmse': array([0.85682853, 0.85711811, 0.85509447]),
 'test_mae': array([0.67571649, 0.67559642, 0.67478984]),
 'fit_time': (4.58173942565918, 4.974778890609741, 4.950160026550293),
 'test_time': (2.835078716278076, 2.8303072452545166, 2.997844934463501)}

In [7]:
trainset = data.build_full_trainset()
svd.fit(trainset)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fb66ab52e50>

## Gerando Previsões de Avaliação por Usuário para Livros

In [8]:
svd.predict(uid=10, iid=100)

Prediction(uid=10, iid=100, r_ui=None, est=4.12533266900806, details={'was_impossible': False})

## Preparando Dados Para Colocar Modelo Em Produção

In [11]:
trainset_uids = list(trainset.all_items())
uid_converter = lambda x: trainset.to_raw_uid(x)
trainset_raw_uids = list(map(uid_converter, trainset_uids))
trainset_raw_uids = pd.DataFrame(trainset_raw_uids)
trainset_raw_uids.rename(columns = {0 : 'uid'}, inplace = True)

In [14]:
trainset_iids = list(trainset.all_items())
iid_converter = lambda x: trainset.to_raw_iid(x)
trainset_raw_iids = list(map(iid_converter, trainset_iids))
trainset_raw_iids = pd.DataFrame(trainset_raw_iids)
trainset_raw_iids.rename(columns = {0 : 'iid'}, inplace = True)

In [17]:
trainset_final = pd.concat([trainset_raw_uids, trainset_raw_iids], axis=1, join = 'inner')

## Autenticação Necessária para Data Science Acessar Recursos na OCI

In [36]:
ads.set_auth(auth="resource_principal")

## Preparando Configurações Necessárias para Deploy

In [37]:
svd_model = GenericModel(estimator=svd,
                            artifact_dir="artefato_modelo",
                            model_save_serializer="cloudpickle",
                            model_input_serializer="json"
                            )

In [38]:
svd_model.prepare(
    inference_conda_env="oci://<nome-do-bucket>@<namespace-do-bucket>/conda_environments/cpu/General Machine Learning for CPUs on Python 3.8/1.0/generalml_p38_cpu_v1",
    inference_python_version="3.8",
    X_sample=trainset_final,
    y_sample=trainset_final,
    force_overwrite = True
)

algorithm: null
artifact_dir:
  /home/datascience/artefato_modelo:
  - - model.pkl
    - test_json_output.json
    - score.py
    - output_schema.json
    - input_schema.json
    - runtime.yaml
    - .model-ignore
framework: null
model_deployment_id: null
model_id: null

## Validação das Configurações do Modelo Antes do Deploy

In [39]:
svd_model.introspect()

['model.pkl', 'test_json_output.json', 'score.py', 'output_schema.json', 'input_schema.json', 'runtime.yaml', '.model-ignore']


Unnamed: 0,Test key,Test name,Result,Message
0,runtime_env_path,Check that field MODEL_DEPLOYMENT.INFERENCE_ENV_PATH is set,Passed,
1,runtime_env_python,Check that field MODEL_DEPLOYMENT.INFERENCE_PYTHON_VERSION is set to a value of 3.6 or higher,Passed,
2,runtime_path_exist,Check that the file path in MODEL_DEPLOYMENT.INFERENCE_ENV_PATH is correct.,Passed,
3,runtime_version,Check that field MODEL_ARTIFACT_VERSION is set to 3.0,Passed,
4,runtime_yaml,"Check that the file ""runtime.yaml"" exists and is in the top level directory of the artifact directory",Passed,
5,score_load_model,Check that load_model() is defined,Passed,
6,score_predict,Check that predict() is defined,Passed,
7,score_predict_arg,Check that all other arguments in predict() are optional and have default values,Passed,
8,score_predict_data,"Check that the only required argument for predict() is named ""data""",Passed,
9,score_py,"Check that the file ""score.py"" exists and is in the top level directory of the artifact directory",Passed,


In [33]:
#svd_model.verify(trainset_final, auto_serialize_data=True)

## Salvando o Modelo no Catálogo de Modelos

In [41]:
model_id = svd_model.save(display_name="Product_Suggestion_Model")

Start loading model.pkl from model directory /home/datascience/artefato_modelo ...
Model is successfully loaded.
['model.pkl', 'test_json_output.json', 'score.py', 'output_schema.json', 'input_schema.json', 'runtime.yaml', '.model-ignore']


loop1:   0%|          | 0/4 [00:00<?, ?it/s]

## Criando Grupo de Log e Logs para Acesso e Predição para Monitoramento do Modelo

In [None]:
log_group_name = "ModelDeployment-Product-Suggestion"
access_log_name = "Access_Log"
predict_log_name = "Predict_Log"

log_group = OCILogGroup(display_name=log_group_name).create()
log_group_ocid = log_group.id
print(f"Log group OCID: {log_group_ocid}")

access_log = log_group.create_log(access_log_name)
access_log_ocid = access_log.id
print(f"Access log OCID: {access_log_ocid}")

# Create a predict log in the log group
predict_log = log_group.create_log(predict_log_name)
predict_log_ocid = predict_log.id
print(f"Predict log OCID: {predict_log_ocid}")

logs_configuration_details_object = CategoryLogDetails(access=LogDetails(log_group_id=log_group_ocid,
                                                                         log_id=access_log_ocid),
                                                       predict=LogDetails(log_group_id=log_group_ocid,
                                                                          log_id=predict_log_ocid))

In [None]:
svd_model.deploy(deployment_log_group_id = log_group_ocid,
                 deployment_access_log_id = access_log_ocid,
                 deployment_predict_log_id = predict_log_ocid)

## Gerando Recomendações de Livros para Usuários Conforme Melhores Avaliações Preditas

In [9]:
def get_book_id(book_title, metadata):
    
    existing_titles = list(metadata['title'].values)
    closest_titles = difflib.get_close_matches(book_title, existing_titles)
    book_id = metadata[metadata['title'] == closest_titles[0]]['id'].values[0]
    return book_id

def get_book_info(book_id, metadata):
    
    book_info = metadata[metadata['id'] == book_id][['id', 'isbn', 
                                                    'authors', 'title', 'original_title']]
    return book_info.to_dict(orient='records')

def predict_review(user_id, book_title, model, metadata):
    
    book_id = get_book_id(book_title, metadata)
    review_prediction = model.predict(uid=user_id, iid=book_id)
    return review_prediction.est

def generate_recommendation(user_id, model, metadata, thresh=4):
    
    book_titles = list(metadata['title'].values)
    random.shuffle(book_titles)
    
    for book_title in book_titles:
        rating = predict_review(user_id, book_title, model, metadata)
        if rating >= thresh:
            book_id = get_book_id(book_title, metadata)
            return get_book_info(book_id, metadata)


In [10]:
generate_recommendation(233, svd, books_metadata)

[{'id': 7029,
  'isbn': '763655988',
  'authors': 'Jon Klassen',
  'title': 'I Want My Hat Back',
  'original_title': 'I Want My Hat Back'}]