In [21]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [22]:
netflix_df = pd.read_csv('../data/netflix-pre-processed.csv')
netflix_df.head()

Unnamed: 0.1,Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,day_added,year_added,month_added
0,0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,No Cast,United States,2021-09-25,2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",25,2021,9
1,1,s2,TV Show,Blood & Water,No Director,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,2021-09-24,2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",24,2021,9
2,2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",No Country,2021-09-24,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,24,2021,9
3,3,s4,TV Show,Jailbirds New Orleans,No Director,No Cast,No Country,2021-09-24,2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",24,2021,9
4,4,s5,TV Show,Kota Factory,No Director,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,2021-09-24,2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,24,2021,9


In [23]:
netflix_df.shape

(8790, 16)

In [24]:
netflix_df.columns

Index(['Unnamed: 0', 'show_id', 'type', 'title', 'director', 'cast', 'country',
       'date_added', 'release_year', 'rating', 'duration', 'listed_in',
       'description', 'day_added', 'year_added', 'month_added'],
      dtype='object')

In [25]:
# Filter the DataFrame to only include movies
movies_df = netflix_df[netflix_df['type'] == 'Movie']

In [26]:
# Split the director and cast columns into separate rows for each director and cast member
directors_df = movies_df.assign(director=movies_df['director'].str.split(',')).explode('director')
cast_df = directors_df.assign(cast=directors_df['cast'].str.split(',')).explode('cast')

In [27]:
# Convert the listed_in column into a list of genres
genres_df = cast_df.assign(listed_in=cast_df['listed_in'].str.split(',')).explode('listed_in')
genres_df['listed_in'] = genres_df['listed_in'].str.strip()

In [28]:
# Convert the DataFrame into a list of transactions
transactions = genres_df.groupby(['director', 'cast'])['listed_in'].apply(list).reset_index(name='genres')

In [29]:
# Convert the list of transactions into a one-hot encoded format
te = TransactionEncoder()
te_ary = te.fit(transactions['genres']).transform(transactions['genres'])
df = pd.DataFrame(te_ary, columns=te.columns_)

In [30]:
# Find frequent itemsets with a minimum support of 0.01
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)

In [31]:
# Generate association rules with a minimum confidence of 0.5
association_rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.5)

In [32]:
# Print the association rules
print(association_rules)

                                   antecedents             consequents  \
0                             (Anime Features)    (Action & Adventure)   
1                           (Sci-Fi & Fantasy)    (Action & Adventure)   
2                             (Classic Movies)                (Dramas)   
3                         (Independent Movies)                (Dramas)   
4                                     (Dramas)  (International Movies)   
5                       (International Movies)                (Dramas)   
6                              (Sports Movies)                (Dramas)   
7                           (Music & Musicals)  (International Movies)   
8                            (Romantic Movies)  (International Movies)   
9                 (Action & Adventure, Dramas)  (International Movies)   
10              (Comedies, Independent Movies)                (Dramas)   
11                          (Comedies, Dramas)  (International Movies)   
12                (Comedies, Music & M