In [1]:
import sqlalchemy
import matplotlib.pyplot as plt 
import seaborn as sns
import pandas as pd
import numpy as np


pd.option_context('display.max_rows',None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
#Importation des données de la base MSSQL avec sqlalchemy
server="localhost"
database="GIPA"
driver ="ODBC Driver 17 for SQL Server"
con = f'mssql://@{server}/{database}?driver={driver}'
engine = sqlalchemy.create_engine(con,fast_executemany=True)
con = engine.connect()
sql = "select NumFacture,CodeClient,CodeArticle,QteFacturee,CA from GIPA.dbo.dimension_facture"
df = pd.read_sql(sql,con)  #mise en place des données dans un DataFrame 
df_finale = df.groupby(['CodeClient','CodeArticle'])['QteFacturee'].sum().unstack().fillna(0) #transformer la dataset en format convenable pour l'algorithme apriori
df_finale.applymap(lambda x:abs(x))

CodeArticle,18100301,18100901,18120401,18120901,18121201,18121301,18130001,18150001,9002,9003,...,a2mj025f24,a60000b,a611001,a620000,a655000,m61041,m61148,zz0100100,zz70900030,zz70900050
CodeClient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00060,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,24.000,1229.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
00558,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,4654.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
13153,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,554.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
42358,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
69949,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,164.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99660,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
99880,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,606.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
99888,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,240.000,9375.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,15.000,0.000,0.000
99891,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,4855.000,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,21.000,0.000,0.000


In [2]:
def hot_encode(x):
    if x==0:
        return 0
    else:
        return 1
    

In [3]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity  

In [4]:
client_similarite_matrice = pd.DataFrame(
    cosine_similarity(df_finale)
)

In [6]:
client_similarite_matrice.columns = df_finale.index


In [9]:
client_similarite_matrice['CodeClient'] = df_finale.index
client_similarite_matrice = client_similarite_matrice.set_index('CodeClient')

In [58]:
def top_3_similarite_client_score(CodeArticle):
    client_sorted = client_similarite_matrice.loc[CodeArticle].sort_values(ascending=False)
    client_sorted.to_numpy()
    return client_sorted[1:4]

In [61]:
def top3_similarite_client_code(CodeArticle):
    clients_code = client_similarite_matrice.loc['00060'].sort_values(ascending=False).index
    clients_code = clients_code[1:4]
    return list(clients_code)

In [66]:
def top_3_similarite_client_articles(liste_code):
    articles_1 = set(df_finale.loc[liste_code[0]].iloc[
    df_finale.loc[liste_code[0]].to_numpy().nonzero()].index)
    
    articles_2 = set(df_finale.loc[liste_code[1]].iloc[
    df_finale.loc[liste_code[1]].to_numpy().nonzero()].index)
    
    articles_3 = set(df_finale.loc[liste_code[2]].iloc[
    df_finale.loc[liste_code[2]].to_numpy().nonzero()].index)
    return articles_1,articles_2,articles_3

In [69]:
liste = top3_similarite_client_code('00060')
article_client1 , article_client2 , article_client3 = top_3_similarite_client_articles(liste)

In [72]:
recommendation = set.difference(article_client1,article_client2,article_client3)

In [73]:
recommendation

{'965602', '96994', '96996', '973100'}

In [74]:
def recommend(liste):
    article_client1 , article_client2 , article_client3 = top_3_similarite_client_articles(liste)
    recommendation = set.difference(article_client1,article_client2,article_client3)
    return recommendation