In [9]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [15]:
df = pd.read_csv('film_actor_basket.csv')
df

Unnamed: 0,film_id,title,actor_id,actor_name
0,735,Robbers Joon,142,Jada Ryder
1,738,Rocketeer Mother,119,Warren Jackman
2,31,Apache Divine,15,Cuba Olivier
3,489,Juggler Hardly,33,Milla Peck
4,1000,Zorro Ark,178,Lisa Monroe
...,...,...,...,...
79,103,Bucket Brotherhood,92,Kirsten Akroyd
80,369,Goodfellas Salute,110,Susan Davis
81,489,Juggler Hardly,122,Salma Nolte
82,730,Ridgemont Submarine,5,Johnny Lollobrigida


SELECT DISTINCT
    f.film_id,
    f.title,
    a.actor_id,
    a.first_name || ' ' || a.last_name AS actor_name
FROM film f
JOIN film_actor fa ON f.film_id = fa.film_id
JOIN actor a ON fa.actor_id = a.actor_id
JOIN (
    SELECT i.film_id
    FROM rental r
    JOIN inventory i ON r.inventory_id = i.inventory_id
    GROUP BY i.film_id
    HAVING COUNT(*) > 30
) popular_films ON f.film_id = popular_films.film_id;


In [11]:
transactions = df.groupby('film_id')['actor_name'].apply(list).tolist()

In [12]:
te = TransactionEncoder()
te_ary = te.fit_transform(transactions)
df_te = pd.DataFrame(te_ary, columns=te.columns_)

In [13]:
frequent_itemsets = apriori(df_te, min_support=0.05, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

In [None]:
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(5))

            antecedents           consequents  support  confidence  lift
0  (Angela Witherspoon)       (Alan Dreyfuss)   0.0625         1.0  16.0
1       (Alan Dreyfuss)  (Angela Witherspoon)   0.0625         1.0  16.0
2       (Alan Dreyfuss)       (Kirk Jovovich)   0.0625         1.0   8.0
3       (Kirk Jovovich)       (Alan Dreyfuss)   0.0625         0.5   8.0
4       (Alan Dreyfuss)         (Mae Hoffman)   0.0625         1.0  16.0
