In [47]:
from collections import defaultdict
from typing import List, Callable
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules

In [48]:
# Helper function to make horizontal "baskets"
def transform_df_to_baskets(movies_df: pd.DataFrame) -> List[List[str]]:
    baskets = defaultdict(list)
    for _, row in movies_df.iterrows():
        baskets[row['userId']].append(row['title'])
    return list(baskets.values())

In [49]:
movies = pd.read_csv('data/movies.csv')
ratings = pd.read_csv('data/ratings.csv')

In [50]:
movies_rating = pd.merge(ratings, movies)[['userId', 'rating', 'title']].sort_values(by='userId')
movies_rating.head(10)

Unnamed: 0,userId,rating,title
0,1,2.5,Dangerous Minds (1995)
117,1,2.0,Escape from New York (1981)
165,1,4.0,Cinema Paradiso (Nuovo cinema Paradiso) (1989)
403,1,2.0,Cape Fear (1991)
211,1,2.0,"Deer Hunter, The (1978)"
259,1,2.0,Ben-Hur (1959)
305,1,2.0,Gandhi (1982)
849,1,3.0,Blazing Saddles (1974)
84,1,3.0,Sleepers (1996)
806,1,1.0,Time Bandits (1981)


In [51]:
def recommend_movies(baskets: List[List[str]], min_support: float, min_threshold: float, filter: Callable) -> pd.DataFrame:
    encoder = TransactionEncoder()
    encoded_array = encoder.fit_transform(baskets)
    encoded_df = pd.DataFrame(encoded_array, columns=encoder.columns_)
    
    frequent_itemsets = fpgrowth(encoded_df, min_support=min_support, use_colnames=True)
    
    rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=min_threshold)

    print(f'Frequent itemsets: {frequent_itemsets.shape}')
    print(f'Rules: {rules.shape}')

    return rules[rules['antecedents'].apply(filter)].drop('antecedents', axis=1)
    

## Bad movies

In [52]:
bad_movies = movies_rating[movies_rating.rating <= 2]
bad_movies_baskets = transform_df_to_baskets(bad_movies)

recommend_movies(baskets=bad_movies_baskets, 
                 min_support=0.01, 
                 min_threshold=0.5, 
                 filter=lambda x: x.issuperset({'Mask, The (1994)'}))

Frequent itemsets: (1152, 2)
Rules: (371, 10)


Unnamed: 0,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
24,(Ace Ventura: Pet Detective (1994)),0.051839,0.090301,0.026756,0.516129,5.715651,0.022075,1.880045,0.87015
25,(Ace Ventura: Pet Detective (1994)),0.016722,0.090301,0.011706,0.7,7.751852,0.010196,3.03233,0.885811
26,(Ace Ventura: Pet Detective (1994)),0.013378,0.090301,0.010033,0.75,8.305556,0.008825,3.638796,0.891525
30,(Ace Ventura: Pet Detective (1994)),0.011706,0.090301,0.010033,0.857143,9.492063,0.008976,6.367893,0.905245
32,(Armageddon (1998)),0.016722,0.046823,0.010033,0.6,12.814286,0.00925,2.382943,0.937642
33,(Ace Ventura: When Nature Calls (1995)),0.011706,0.058528,0.010033,0.857143,14.644898,0.009348,6.590301,0.942752
293,(Ace Ventura: Pet Detective (1994)),0.011706,0.090301,0.010033,0.857143,9.492063,0.008976,6.367893,0.905245


## Good movies

In [53]:
good_movies = movies_rating[movies_rating.rating >= 4]
good_movies_baskets = transform_df_to_baskets(good_movies)

recommend_movies(baskets=good_movies_baskets, 
                 min_support=0.08, 
                 min_threshold=0.5, 
                 filter=lambda x: x.issuperset({'Pulp Fiction (1994)', 'Reservoir Dogs (1992)'}))

Frequent itemsets: (1635, 2)
Rules: (3914, 10)


Unnamed: 0,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
1946,"(Godfather, The (1972))",0.135618,0.265276,0.089419,0.659341,2.485492,0.053442,2.156771,0.691437
1950,"(Silence of the Lambs, The (1991))",0.135618,0.356185,0.09389,0.692308,1.943676,0.045584,2.092399,0.561686
1953,(American Beauty (1999)),0.135618,0.268256,0.087928,0.648352,2.416911,0.051548,2.080896,0.678229
1961,(Fargo (1996)),0.135618,0.274218,0.083458,0.615385,2.244147,0.046269,1.887034,0.641379
1965,"(Usual Suspects, The (1995))",0.135618,0.257824,0.080477,0.593407,2.301594,0.045511,1.825351,0.654246
1969,(Fight Club (1999)),0.135618,0.235469,0.081967,0.604396,2.566769,0.050033,1.932563,0.706176
1972,"(Shawshank Redemption, The (1994))",0.135618,0.408346,0.090909,0.67033,1.641574,0.03553,1.794685,0.452148
2039,(Goodfellas (1990)),0.135618,0.149031,0.083458,0.615385,4.129231,0.063246,2.212519,0.876724
