In [13]:
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth, association_rules
from mlxtend.preprocessing import TransactionEncoder
from IPython.display import display

#This is for association rule assignment
#Leo Baltazar

Movies_Data = pd.read_csv("/Users/leobaltazar/Desktop/Data Mining/Movies.tsv", delimiter='\t')
Ratings_Data = pd.read_csv("/Users/leobaltazar/Desktop/Data Mining/Ratings.tsv", delimiter='\t')

Movies_Data["genres"] = Movies_Data["genres"].apply(lambda x: x.split("|"))

Ratings_Data["rating"] = pd.to_numeric(Ratings_Data["rating"]).astype(int)
high_ratings = Ratings_Data[Ratings_Data["rating"] >= 4]

merged_data = pd.merge(high_ratings, Movies_Data, on="movieId", how="inner")

transactions = merged_data.groupby("userId")["genres"].apply(lambda x: list(set(sum(x, []))))
transactions = transactions.tolist()

encoder = TransactionEncoder()
encoded_data = encoder.fit(transactions).transform(transactions)
df_transactions = pd.DataFrame(encoded_data, columns=encoder.columns_)
print(len(df_transactions))

frequent_itemsets = fpgrowth(df_transactions, min_support=0.6, use_colnames=True)

rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

display(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].sort_values("lift", ascending=False).head(20))



91


Unnamed: 0,antecedents,consequents,support,confidence,lift
251555,"(Animation, Romance, Drama, Thriller, Action)","(Comedy, Sci-Fi, Crime, Children)",0.615385,0.949153,1.393111
246894,"(Action, Adventure, Animation, Romance)","(Comedy, Sci-Fi, Crime, Children)",0.615385,0.949153,1.393111
252123,"(Comedy, Action, Animation, Romance)","(Sci-Fi, Adventure, Drama, Crime, Children)",0.615385,0.949153,1.393111
255289,"(Sci-Fi, Crime, Children)","(Comedy, Adventure, Animation, Romance, Drama,...",0.615385,0.903226,1.393111
241483,"(Comedy, Action, Animation, Romance)","(Sci-Fi, Crime, Children)",0.615385,0.949153,1.393111
241480,"(Comedy, Sci-Fi, Crime, Children)","(Action, Animation, Romance)",0.615385,0.903226,1.393111
252154,"(Sci-Fi, Adventure, Crime, Children)","(Comedy, Animation, Romance, Drama, Action)",0.615385,0.903226,1.393111
255315,"(Action, Animation, Romance)","(Comedy, Sci-Fi, Adventure, Drama, Thriller, C...",0.615385,0.949153,1.393111
247962,"(Action, Animation, Romance)","(Sci-Fi, Adventure, Thriller, Crime, Children)",0.615385,0.949153,1.393111
252173,"(Sci-Fi, Drama, Crime, Children)","(Comedy, Adventure, Animation, Romance, Action)",0.615385,0.903226,1.393111
