#  **Système de recommandation**

Les règles d'association identifient des relations entre des éléments dans des ensembles de données, comme « si un client achète A, il achète aussi B ». Les systèmes de recommandation prédisent des éléments pertinents pour un utilisateur en se basant sur ses préférences passées ou celles d'autres utilisateurs, utilisés dans des services comme le streaming ou le commerce en ligne. Parmis ces algorithmes on nomme appriori et FP-growth

## **0. Imports**

In [25]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.preprocessing import TransactionEncoder
import numpy as np

## **1. Charger les données**

In [26]:
data = pd.read_csv("tv_shows.csv",header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,Cobra Kai,Lupin,12 Monkeys,Sherlock,,,,,,,...,,,,,,,,,,
1,Lost,Jack Ryan,The Flash,Game of thrones,House of Cards,12 Monkeys,Vikings,Fringe,The Mentalist,The Alienist,...,,,,,,,,,,
2,Education,Dr. House,Kingdom,The Walking Dead,,,,,,,...,,,,,,,,,,
3,Ozark,Education,Constantine,Preacher,Vikings,The Tick,,,,,...,,,,,,,,,,
4,Naruto,,,,,,,,,,...,,,,,,,,,,


## **2 Transformer les données**

In [None]:
#affichage des films du dataset
data=np.array(data)
films=[]
for row in data:
    films+=[item for item in row if not pd.isnull(item)] 
print(set(films))

{'The Expanse', 'Family Guy', 'Black Clover', 'Hunters', 'The Witcher', 'Preacher', 'The Vampire Diaries', 'The Originals', 'Naruto', 'Invincible', 'The Blacklist', 'Alice in Borderland', 'Upload', 'The man in the high castle', 'White Collar', 'How to get away with murder', 'Mare of Easttown', 'Elementary', '12 Monkeys', 'The IT Crowd', 'Outlander', 'Peaky Blinders', 'Dr Stone', 'The Stranger', 'Punisher', 'The Walking Dead', 'The Umbrella Academy', 'Two and a half men', 'Queen of the South', 'Sense 8', 'Death Note', 'Love Death Robots', 'Chernobyl', 'Vikings', 'Deception', 'Cobra Kai', 'Inside Job', 'Person of Interest', 'Startup', 'Supergirl', 'Grimm', 'Dark', 'Travellers', 'Constantine', 'Mr. Robot', 'Modern Family', 'The Night Manager', 'The Alienist', 'Doctor Who', 'Narcos', 'Billions', "Grey's Anatomy", 'Young Sheldon', 'Archer', 'Goliath', 'His Dark Materials', 'The Office', 'The Wheel of Time', 'Nine Perfect Strangers', 'Better Call Saul', 'The Newsroom', 'Absentia', 'American 

In [None]:
data= [[item for item in row if not pd.isnull(item)] for row in data]# elminer les valeurs nulles du dataset
te = TransactionEncoder()
te_ary = te.fit(data).transform(data)# Adapter et transformer les données en un format de tableau booléen pour l'analyse d'association
df = pd.DataFrame(te_ary, columns=te.columns_)

## **3. Apriori**

In [54]:
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)#  seuil de support minimal de 0.01 et en utilisant les noms de colonnes
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.058617,(12 Monkeys)
1,0.077090,(Absentia)
2,0.024768,(Alice in Borderland)
3,0.057792,(Archer)
4,0.026109,(Arrow)
...,...,...
342,0.018163,"(Ozark, Education, Two and a half men)"
343,0.010836,"(Two and a half men, Education, The Blacklist)"
344,0.012487,"(Ozark, Mr. Robot, The Blacklist)"
345,0.012384,"(Ozark, Mr. Robot, Two and a half men)"


In [55]:
rules_Apriori = association_rules(frequent_itemsets, metric="confidence",# Générer les règles d'association à partir des itemsets fréquents 
                                   min_threshold=0.1,num_itemsets=len(df))#en utilisant la métrique de confiance avec un seuil minimal de 0.1
rules_Apriori

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Atypical),(12 Monkeys),0.139938,0.058617,0.014241,0.101770,1.736180,1.0,0.006039,1.048042,0.493014,0.077268,0.045840,0.172364
1,(12 Monkeys),(Atypical),0.058617,0.139938,0.014241,0.242958,1.736180,1.0,0.006039,1.136082,0.450426,0.077268,0.119782,0.172364
2,(12 Monkeys),(Education),0.058617,0.255624,0.024252,0.413732,1.618517,1.0,0.009268,1.269686,0.405946,0.083630,0.212404,0.254303
3,(12 Monkeys),(Mr. Robot),0.058617,0.109391,0.011249,0.191901,1.754269,1.0,0.004837,1.102104,0.456734,0.071758,0.092645,0.147366
4,(Mr. Robot),(12 Monkeys),0.109391,0.058617,0.011249,0.102830,1.754269,1.0,0.004837,1.049281,0.482773,0.071758,0.046966,0.147366
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
487,"(Two and a half men, Mr. Robot)",(Ozark),0.024561,0.193705,0.012384,0.504202,2.602938,1.0,0.007626,1.626256,0.631325,0.060150,0.385091,0.284067
488,(Mr. Robot),"(Ozark, Two and a half men)",0.109391,0.042828,0.012384,0.113208,2.643328,1.0,0.007699,1.079365,0.698049,0.088561,0.073529,0.201182
489,"(Outer Banks, Ozark)",(Two and a half men),0.033024,0.183591,0.010010,0.303125,1.651086,1.0,0.003947,1.171528,0.407805,0.048452,0.146414,0.178825
490,"(Outer Banks, Two and a half men)",(Ozark),0.038390,0.193705,0.010010,0.260753,1.346134,1.0,0.002574,1.090697,0.267397,0.045074,0.083155,0.156215


## **4. FP-Growth**

In [56]:
frequent_itemsets_FP = fpgrowth(df, min_support=0.01, use_colnames=True)# avec un seuil de support minimal de 0.01
frequent_itemsets_FP

Unnamed: 0,support,itemsets
0,0.083075,(Cobra Kai)
1,0.058617,(12 Monkeys)
2,0.017957,(Lupin)
3,0.072549,(The Mentalist)
4,0.053767,(Jack Ryan)
...,...,...
342,0.018060,"(The Stranger, Ozark)"
343,0.010939,"(The Stranger, Mr. Robot)"
344,0.017750,"(The Stranger, Education)"
345,0.011558,"(Education, Travellers)"


In [57]:
rules_FP = association_rules(frequent_itemsets_FP, metric="lift", min_threshold=1.0,num_itemsets=len(df))#métrique de lift avec un seuil minimal de 1.0
rules_FP

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Cobra Kai),(Education),0.083075,0.255624,0.030650,0.368944,1.443306,1.0,0.009414,1.179572,0.334974,0.099497,0.152235,0.244424
1,(Education),(Cobra Kai),0.255624,0.083075,0.030650,0.119903,1.443306,1.0,0.009414,1.041845,0.412622,0.099497,0.040164,0.244424
2,(Atypical),(Cobra Kai),0.139938,0.083075,0.021569,0.154130,1.855301,1.0,0.009943,1.084002,0.536013,0.107070,0.077492,0.206879
3,(Cobra Kai),(Atypical),0.083075,0.139938,0.021569,0.259627,1.855301,1.0,0.009943,1.161661,0.502772,0.107070,0.139164,0.206879
4,(Cobra Kai),(Two and a half men),0.083075,0.183591,0.016925,0.203727,1.109675,1.0,0.001673,1.025287,0.107790,0.067769,0.024663,0.147957
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
625,(Education),(The Stranger),0.255624,0.043034,0.017750,0.069439,1.613579,1.0,0.006750,1.028375,0.510844,0.063189,0.027592,0.240954
626,(Education),(Travellers),0.255624,0.028070,0.011558,0.045216,1.610820,1.0,0.004383,1.017958,0.509418,0.042473,0.017641,0.228490
627,(Travellers),(Education),0.028070,0.255624,0.011558,0.411765,1.610820,1.0,0.004383,1.265439,0.390150,0.042473,0.209760,0.228490
628,(Ozark),(Travellers),0.193705,0.028070,0.010320,0.053277,1.897975,1.0,0.004883,1.026625,0.586786,0.048804,0.025934,0.210462


## **5. Systeme de Recommendation**

In [58]:
user_watched = [ 'Atypical','The Blacklist']

In [59]:
def recommendation(user_watched,method):
    if method:#choisir la methode apriori ou fp growth
        recommendations= rules_FP[rules_FP['antecedents'].apply(lambda x: set(user_watched).issubset(x))]
        # Filtrer les règles FP-growth où les antécédents contiennent les éléments vus par l'utilisateur
    else:
        recommendations= rules_Apriori[rules_Apriori['antecedents'].apply(lambda x: set(user_watched).issubset(x))]
    print([ list(r)[0] for r in set(recommendations["consequents"]) ])



In [60]:
recommendation(user_watched,1)

['Ozark', 'Education']
