In [None]:
#Focused on generating various patterns and frequent sets irrespective of the genre


In [3]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [4]:
# Load the dataset
movies_df = pd.read_csv('/kaggle/input/movielens-dataset/movies.csv')

In [5]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [6]:
# Step 2: Create synthetic user watch history
user_history = {
    1: [1, 2, 3, 4, 5],   # User 1: Mix of Animation, Comedy, Drama
    2: [3, 4, 6, 7, 8],   # User 2: Focus on Comedy and Action
    3: [1, 5, 9, 10],     # User 3: Animation and Adventure
    4: [11, 12, 13, 14],  # User 4: Drama and Romance
    5: [15, 16, 17],      # User 5: Action and Crime
    6: [18, 19, 20],      # User 6: Classics and Thrillers
    7: [1, 4, 10],        # User 7: Animation and Adventure
    8: [2, 5, 11],        # User 8: Comedy and Drama
}

In [7]:
# Improved synthetic user viewing history
user_history = {
    1: [1, 2, 3, 4, 5, 6, 10],   # User 1: Mix of Animation, Comedy, Drama, Action
    2: [3, 4, 5, 6, 7, 8, 10],   # User 2: Focus on Comedy and Action with overlaps
    3: [1, 2, 8, 9, 10, 11],     # User 3: Animation and Adventure with some Drama
    4: [4, 5, 6, 10, 12],        # User 4: Strong preference for Comedy and Action
    5: [1, 3, 6, 10, 13],        # User 5: Animation and Action with some Drama
    6: [2, 4, 5, 10],            # User 6: Comedy and Drama with overlaps
    7: [1, 3, 4, 10],            # User 7: Animation and Comedy
    8: [2, 4, 5],                # User 8: Comedy and Drama
    # Adding more users for better patterns
    9: [1, 5, 6],                 # User 9: Animation and Action
    10: [3, 4]                   # User 10: Comedy and Drama
}

In [9]:
# Step 3: Create user-movie interaction matrix
user_movie_matrix = pd.DataFrame(0, index=user_history.keys(), columns=movies_df['movieId'])

for user_id, watched_movies in user_history.items():
    user_movie_matrix.loc[user_id, watched_movies] = 1

In [10]:

# Step 4: Generate frequent itemsets with a minimum support threshold
frequent_itemsets = apriori(user_movie_matrix, min_support=0.25, use_colnames=True)



In [11]:
print(frequent_itemsets)

    support       itemsets
0       0.5            (1)
1       0.4            (2)
2       0.5            (3)
3       0.7            (4)
4       0.6            (5)
5       0.5            (6)
6       0.7           (10)
7       0.3         (1, 3)
8       0.3         (1, 6)
9       0.4        (1, 10)
10      0.3         (2, 4)
11      0.3         (2, 5)
12      0.3        (2, 10)
13      0.4         (3, 4)
14      0.3         (3, 6)
15      0.4        (10, 3)
16      0.5         (4, 5)
17      0.3         (4, 6)
18      0.5        (10, 4)
19      0.4         (5, 6)
20      0.4        (10, 5)
21      0.4        (10, 6)
22      0.3     (1, 10, 3)
23      0.3      (2, 4, 5)
24      0.3     (10, 3, 4)
25      0.3     (10, 3, 6)
26      0.3      (4, 5, 6)
27      0.4     (10, 4, 5)
28      0.3     (10, 4, 6)
29      0.3     (10, 5, 6)
30      0.3  (10, 4, 5, 6)


In [12]:
# Step 5: Generate association rules based on lift
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

In [13]:
print("Generated Association Rules:")
print(rules)

Generated Association Rules:
   antecedents consequents  antecedent support  consequent support  support  \
0          (1)         (3)                 0.5                 0.5      0.3   
1          (3)         (1)                 0.5                 0.5      0.3   
2          (1)         (6)                 0.5                 0.5      0.3   
3          (6)         (1)                 0.5                 0.5      0.3   
4          (1)        (10)                 0.5                 0.7      0.4   
..         ...         ...                 ...                 ...      ...   
83      (5, 6)     (10, 4)                 0.4                 0.5      0.3   
84        (10)   (4, 5, 6)                 0.7                 0.3      0.3   
85         (4)  (10, 5, 6)                 0.7                 0.3      0.3   
86         (5)  (10, 4, 6)                 0.6                 0.3      0.3   
87         (6)  (10, 4, 5)                 0.5                 0.4      0.3   

    confidence      li

In [14]:
# Function to recommend movies based on watched movie IDs
def recommend_movies(watched_movie_ids):
    # Find rules where the antecedent contains the watched movies
    recommendations = rules[rules['antecedents'].apply(lambda x: any(movie in x for movie in watched_movie_ids))]
    
    # Check if recommendations DataFrame is empty before accessing its columns
    if recommendations.empty:
        print("No recommendations found for the given watched movies.")
        return [], []

    # Extract recommended movie IDs from the consequents
    recommended_movie_ids = set()
    for consequents in recommendations['consequents']:
        recommended_movie_ids.update(consequents)

    # Filter out already watched movies
    recommended_movie_ids.difference_update(watched_movie_ids)

    # Get movie titles and genres from recommended IDs
    recommended_movies = movies_df[movies_df['movieId'].isin(recommended_movie_ids)][['movieId','title', 'genres']]

    return recommended_movies

In [15]:
# Example usage of the recommendation function with printing titles and genres
watched_movies_of_user = [1] # Example watched movie ID 


In [16]:
watched_movies_info = movies_df[movies_df['movieId'].isin(watched_movies_of_user)][['movieId','title', 'genres']]
# Print watched movie details
print("Watched Movies:")
print(watched_movies_info.to_string(index=False))

# Get recommendations based on watched movies
recommended_movies_of_user = recommend_movies(watched_movies_of_user)

# Print recommended movie details
print("\nRecommended Movies:")
print(recommended_movies_of_user.to_string(index=False))



Watched Movies:
 movieId            title                                      genres
       1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy

Recommended Movies:
 movieId                   title                    genres
       3 Grumpier Old Men (1995)            Comedy|Romance
       6             Heat (1995)     Action|Crime|Thriller
      10        GoldenEye (1995) Action|Adventure|Thriller
