In [None]:
import pandas as pd 
import numpy as np
import random
random.seed(9001)
from surprise import SVD, accuracy
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
#pour avoir toujours les memes erreurs à chaque fois qu'on re exécute le projet.

In [None]:
from sqlalchemy import create_engine

engine = create_engine('postgres://pass_culture:passq@localhost:5434/pass_culture?sslmode=prefer')
connection = engine.connect()

In [None]:
user = pd.read_sql_query('SELECT id as user_id FROM "user" ORDER BY id', connection)

In [None]:
offer = pd.read_sql_query('SELECT id as offer_id FROM offer ORDER BY id', connection)

In [None]:
user_id, freq_users = np.unique(user.user_id, return_counts=True)#user_id les id des users, freq_users les freq de chaque user
offre_id, freq_offre = np.unique(offer.offer_id, return_counts=True)#offre_id les id des offres, freq_offre les freq de chaque offre
n_users = len(user_id)
n_offre = len(offre_id)
print("Le nombre des utilisateurs est : " + str(n_users) )
print("Le nombre des offres est : " + str(n_offre))

In [None]:
#On recupere la table des utilisateurs qui ont acheté des offres 
achete = pd.read_sql_query('SELECT "user".id as user_id, offer.id as offer_id\
                       FROM "booking" \
                       INNER JOIN "user" ON "user".id=booking."userId" \
                       INNER JOIN stock ON booking."stockId"=stock.id \
                       INNER JOIN offer ON offer.id=stock."offerId" \
                       WHERE booking."isUsed"=True AND booking."isCancelled"=False \
                       ORDER BY user_id', connection)
achete['rate']=6
achete

In [None]:
#On récupère les utilisateurs qui ont acheté mais pas consommés des offres 
pas_consome = pd.read_sql_query('SELECT "user".id as user_id, offer.id as offer_id\
                       FROM "booking" \
                       INNER JOIN "user" ON "user".id=booking."userId" \
                       INNER JOIN stock ON booking."stockId"=stock.id \
                       INNER JOIN offer ON offer.id=stock."offerId" \
                       WHERE booking."isUsed"=False AND booking."isCancelled"=False', connection)
pas_consome['rate']=5

In [None]:
#On récupère les utilisateurs qui ont acheté et annulé des offres 
annule = pd.read_sql_query('SELECT "user".id as user_id, offer.id as offer_id\
                       FROM "booking" \
                       INNER JOIN "user" ON "user".id=booking."userId" \
                       INNER JOIN stock ON booking."stockId"=stock.id \
                       INNER JOIN offer ON offer.id=stock."offerId" \
                       WHERE booking."isUsed"=False AND booking."isCancelled"=True', connection)
annule['rate']=4

In [None]:
#On récupère les utilisateurs qui ont liké des offres 
mis_en_fav = pd.read_sql_query('SELECT "userId" as user_id, "offerId" as offer_id \
                          FROM favorite', connection)
mis_en_fav['rate']=3

In [None]:
#On récupère les utilisateurs qui ont clické sur des offres
clic = pd.read_sql_query('SELECT "userId" AS user_id, "offerId" AS offer_id \
                          FROM recommendation \
                          WHERE "isClicked"=True', connection)
clic['rate']=2

In [None]:
ignore = pd.read_sql_query('SELECT "userId" AS user_id, "offerId" AS offer_id \
                          FROM recommendation \
                          WHERE "isClicked"=False', connection)
ignore['rate']=1

In [None]:
result = pd.concat([achete, pas_consome, annule, mis_en_fav, clic, ignore])
result = result.sort_values('rate').drop_duplicates(subset=['user_id', 'offer_id'], keep='last')
result.sort_values(by=['user_id'])
result

In [None]:
from plotly.offline import init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

data = result['rate'].value_counts().sort_index(ascending=False)
trace = go.Bar(x = data.index,
               text = ['{:.1f} %'.format(val) for val in (data.values / result.shape[0] * 100)],
               textposition = 'auto',
               textfont = dict(color = '#000000'),
               y = data.values,
               )
# Create layout
layout = dict(title = 'Distribution Of {} offers'.format(result.shape[0]),
              xaxis = dict(title = 'Rating'),
              yaxis = dict(title = 'Count'))
# Create plot
fig = go.Figure(data=[trace], layout=layout)
iplot(fig)

In [None]:
reader = Reader(rating_scale=(1, 6))
data = Dataset.load_from_df(result[['user_id', 'offer_id', 'rate']], reader)

trainset, testset = train_test_split(data, test_size=0.25)


In [None]:
trainset.n_users 
trainset.n_items

In [None]:
algo = SVD()
algo.fit(trainset)

In [None]:
trainset.ir

In [None]:
#https://surprise.readthedocs.io/en/stable/matrix_factorization.html
offer_factor_latent = algo.qi
offer_factor_latent = pd.DataFrame(offer_factor_latent)
offer_factor_latent

In [None]:
#Return a list of ratings that can be used as a testset in the test() method.
#The ratings are all the ratings that are in the trainset, i.e. all the ratings returned by the all_ratings() 
#generator. This is useful in cases where you want to to test your algorithm on the trainset.
testset = trainset.build_testset()
predictions = algo.test(testset)

In [None]:
predictions

In [None]:
accuracy.rmse(predictions)