In [12]:
import pandas as pd
import numpy as np
import turicreate as tc
from sklearn.preprocessing import MinMaxScaler

##############################################
# Modelo baseado em conteúdo
##############################################

df = pd.read_csv(r'database.csv', delimiter=';')
df.head()

# Renovando a base de dados original
userServiceData = df[['Advertising ID', 'Event Name']]
userServiceDf = userServiceData.groupby(['Advertising ID','Event Name']).size().reset_index(name='ServiceUseByUser')
userServiceDf = userServiceDf.rename(columns={"Advertising ID": "UserID", "Event Name": "Service"})

# Scale ServiceUseByUser attribute
scaler = MinMaxScaler()
userServiceCountScaledDf = userServiceDf
userServiceCountScaledDf['ServiceUseByUser'] = pd.DataFrame(scaler.fit_transform(userServiceDf[['ServiceUseByUser']]))
userServiceCountScaledDf

# Escolhendo quais usuários possuem mais uso de Serviços distintos
# list = []

# for userId in userServiceDf['UserID']:
#     if(len(userServiceDf.loc[userServiceDf['UserID'] == userId]) > 15):
#         list.append(userId)

# Separando eventos do usuário
userServiceCountScaledDf.loc[userServiceCountScaledDf['UserID'] == '651fc761-7b29-490c-aefd-fe9b9bdd65c7']


# Removendo os 3 últimos usos do Usuário antes de criar o modelo
print(userServiceCountScaledDf.loc[userServiceCountScaledDf['UserID'] == '651fc761-7b29-490c-aefd-fe9b9bdd65c7'])
userServiceCountScaledDf = userServiceCountScaledDf.drop(index=[14488,14489,14490], axis=0)
print(userServiceCountScaledDf.loc[userServiceCountScaledDf['UserID'] == '651fc761-7b29-490c-aefd-fe9b9bdd65c7'])

# Criar modelo de recomendaçao baseado em conteudo
userServiceCountScaledSFrame = tc.SFrame(userServiceCountScaledDf)

item_content_recommender = tc.recommender.item_content_recommender.create(item_data=userServiceCountScaledSFrame, item_id='Service', observation_data=userServiceCountScaledSFrame, user_id='UserID', target='ServiceUseByUser', verbose = True)
#https://apple.github.io/turicreate/docs/api/generated/turicreate.recommender.create.html#turicreate.recommender.create


                                     UserID                    Service  \
14475  651fc761-7b29-490c-aefd-fe9b9bdd65c7        AGRANUNCIARPRODUTOS   
14476  651fc761-7b29-490c-aefd-fe9b9bdd65c7            AGRAnunCadastro   
14477  651fc761-7b29-490c-aefd-fe9b9bdd65c7           AGRFAZERPRODUTOS   
14478  651fc761-7b29-490c-aefd-fe9b9bdd65c7           AGRFazerCadastro   
14479  651fc761-7b29-490c-aefd-fe9b9bdd65c7      AGRMapaDeInteressados   
14480  651fc761-7b29-490c-aefd-fe9b9bdd65c7  CAGDeslocamentodoRegistro   
14481  651fc761-7b29-490c-aefd-fe9b9bdd65c7               CAGReligacao   
14482  651fc761-7b29-490c-aefd-fe9b9bdd65c7                DET2aviaCNH   
14483  651fc761-7b29-490c-aefd-fe9b9bdd65c7           DETCNHDefinitiva   
14484  651fc761-7b29-490c-aefd-fe9b9bdd65c7      DETCertidãonadaconsta   
14485  651fc761-7b29-490c-aefd-fe9b9bdd65c7            DETRenovacaoCNH   
14486  651fc761-7b29-490c-aefd-fe9b9bdd65c7                  PR2VIADAE   
14487  651fc761-7b29-490c-aefd-fe9b9bd

In [13]:
# Criando recomendações para o usuário selecionado
item_content_recommender.recommend(users=['651fc761-7b29-490c-aefd-fe9b9bdd65c7'])

UserID,Service,score,rank
651fc761-7b29-490c-aefd- fe9b9bdd65c7 ...,TESTECAgendar,0.0018915029672475,1
651fc761-7b29-490c-aefd- fe9b9bdd65c7 ...,TESTECResultado,0.0012609958648681,2
651fc761-7b29-490c-aefd- fe9b9bdd65c7 ...,SUANOTASaibaMais,0.0008406639099121,3
651fc761-7b29-490c-aefd- fe9b9bdd65c7 ...,SUANOTAQueroMeCadastrar,0.0008406639099121,4
651fc761-7b29-490c-aefd- fe9b9bdd65c7 ...,SUANOTAJaSouCadastrado,0.0008406639099121,5
651fc761-7b29-490c-aefd- fe9b9bdd65c7 ...,CAGFaturaDigital,0.0006305071023794,6
651fc761-7b29-490c-aefd- fe9b9bdd65c7 ...,ACESSOJaSouCadastrado,0.000630497932434,7
651fc761-7b29-490c-aefd- fe9b9bdd65c7 ...,PRDeclaracaoVacinacao,0.0003293202473567,8
651fc761-7b29-490c-aefd- fe9b9bdd65c7 ...,ACESSOQueroMeCadastrar,0.000210165977478,9
651fc761-7b29-490c-aefd- fe9b9bdd65c7 ...,CAGParcelamentodeConta,0.000210165977478,10


In [14]:
# Model Evaluation using TuricCreate metrics
train, test = tc.recommender.util.random_split_by_user(userServiceCountScaledSFrame, user_id='UserID', item_id='Service')
item_content_recommender = tc.recommender.item_content_recommender.create(item_data=userServiceCountScaledSFrame, item_id='Service', observation_data=userServiceCountScaledSFrame, user_id='UserID', target='ServiceUseByUser', verbose = True)
eval = item_content_recommender.evaluate(test)
eval

Applying transform:
Class             : AutoVectorizer

Model Fields
------------
Features          : ['UserID', 'ServiceUseByUser']
Excluded Features : ['Service']

Column            Type   Interpretation  Transforms  Output Type
----------------  -----  --------------  ----------  -----------
UserID            str    categorical     None        str        
ServiceUseByUser  float  numerical       None        float      


Defaulting to brute force instead of ball tree because there are multiple distance components.



Precision and recall summary statistics by cutoff
+--------+----------------+-------------+
| cutoff | mean_precision | mean_recall |
+--------+----------------+-------------+
|   1    |      0.0       |     0.0     |
|   2    |      0.0       |     0.0     |
|   3    |      0.0       |     0.0     |
|   4    |      0.0       |     0.0     |
|   5    |      0.0       |     0.0     |
|   6    |      0.0       |     0.0     |
|   7    |      0.0       |     0.0     |
|   8    |      0.0       |     0.0     |
|   9    |      0.0       |     0.0     |
|   10   |      0.0       |     0.0     |
+--------+----------------+-------------+
[10 rows x 3 columns]


Overall RMSE: 0.10493911344659498

Per User RMSE (best)
+-------------------------------+------+-------+
|             UserID            | rmse | count |
+-------------------------------+------+-------+
| 67643a62-44e0-4e16-befc-6b... | 0.0  |   1   |
+-------------------------------+------+-------+
[1 rows x 3 columns]


Per User RMSE

{'precision_recall_by_user': Columns:
 	UserID	str
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 5166
 
 Data:
 +-------------------------------+--------+-----------+--------+-------+
 |             UserID            | cutoff | precision | recall | count |
 +-------------------------------+--------+-----------+--------+-------+
 | 0094d15c-9e05-4819-a7ac-d4... |   1    |    0.0    |  0.0   |   1   |
 | 0094d15c-9e05-4819-a7ac-d4... |   2    |    0.0    |  0.0   |   1   |
 | 0094d15c-9e05-4819-a7ac-d4... |   3    |    0.0    |  0.0   |   1   |
 | 0094d15c-9e05-4819-a7ac-d4... |   4    |    0.0    |  0.0   |   1   |
 | 0094d15c-9e05-4819-a7ac-d4... |   5    |    0.0    |  0.0   |   1   |
 | 0094d15c-9e05-4819-a7ac-d4... |   6    |    0.0    |  0.0   |   1   |
 | 0094d15c-9e05-4819-a7ac-d4... |   7    |    0.0    |  0.0   |   1   |
 | 0094d15c-9e05-4819-a7ac-d4... |   8    |    0.0    |  0.0   |   1   |
 | 0094d15c-9e05-4819-a7ac-d4... |   9    |    0.0    |  0.0   | 