In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

# Sample Movie Data with metadata
movies_data = {
    'Movie': ['Movie1', 'Movie2', 'Movie3', 'Movie4', 'Movie5'],
    'Genres': ['Action|Adventure', 'Action|Thriller', 'Comedy|Romance', 'Action|Sci-Fi', 'Comedy|Drama'],
    'Director': ['Director1', 'Director2', 'Director1', 'Director3', 'Director2'],
    'Actors': ['Actor1|Actor2', 'Actor3|Actor4', 'Actor5|Actor6', 'Actor1|Actor4', 'Actor2|Actor5']
}

# Sample User Preferences (Genres, Directors, Actors liked by users)
users_data = {
    'User': ['Alice', 'Bob'],
    'Liked_Movies': [['Movie1', 'Movie2'], ['Movie3', 'Movie5']]  # Movies liked by Alice and Bob
}

# Create DataFrames for movies and users
movies_df = pd.DataFrame(movies_data)
users_df = pd.DataFrame(users_data)

# Step 1: Convert categorical features (Genres, Director, Actors) into numerical features
label_encoder = LabelEncoder()

# Encode 'Genres', 'Director', 'Actors' columns
movies_df['Genres'] = label_encoder.fit_transform(movies_df['Genres'])
movies_df['Director'] = label_encoder.fit_transform(movies_df['Director'])
movies_df['Actors'] = label_encoder.fit_transform(movies_df['Actors'])
movies_df['Actors'] = label_encoder.fit_transform(movies_df['Actors'])

# Step 2: Build Feature Vectors for Movies
def build_feature_vector(row):
    return np.array([row['Genres'], row['Director'], row['Actors']])

# Apply the function to each row to create feature vectors
movies_df['Feature_Vector'] = movies_df.apply(build_feature_vector, axis=1)

# Step 3: Build User Profiles based on liked movies
def build_user_profile(user_likes, movies_df):
    # Get the feature vectors of liked movies
    liked_movies = movies_df[movies_df['Movie'].isin(user_likes)]
    
    # Compute the average feature vector for the user, 
    # if user likes two movies take those two movies vectors and average them to get user profile
    user_profile = np.mean(liked_movies['Feature_Vector'].tolist(), axis=0)
    
    return user_profile

# Step 4: Calculate Similarity between User Profile and Movie Feature Vectors
def recommend_movies(user_profile, movies_df, top_n=3):
    # Get the feature vectors for all movies
    movie_feature_vectors = np.array(movies_df['Feature_Vector'].tolist())
    
    # Calculate cosine similarity between the user profile and all movie feature vectors
    cosine_sim = cosine_similarity([user_profile], movie_feature_vectors)[0]
    
    # Get the indices of the top N most similar movies (excluding the movie itself)
    top_n_indices = cosine_sim.argsort()[-top_n-1:-1][::-1]
    
    # Get the movie names
    recommended_movies = movies_df.iloc[top_n_indices]['Movie']
    
    return recommended_movies.tolist()

# Example: Recommend movies for Alice
user_profile_alice = build_user_profile(users_df.iloc[0]['Liked_Movies'], movies_df)
recommended_movies_alice = recommend_movies(user_profile_alice, movies_df, top_n=3)

print("Recommended Movies for Alice:")
print(recommended_movies_alice)

# Example: Recommend movies for Bob
user_profile_bob = build_user_profile(users_df.iloc[1]['Liked_Movies'], movies_df)
recommended_movies_bob = recommend_movies(user_profile_bob, movies_df, top_n=3)

print("\nRecommended Movies for Bob:")
print(recommended_movies_bob)

Recommended Movies for Alice:
['Movie3', 'Movie5', 'Movie4']

Recommended Movies for Bob:
['Movie5', 'Movie2', 'Movie4']


In [7]:
from sklearn.datasets import load_iris



h=load_iris()

print(h)

{'data': array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
     

In [10]:
h.DESCR



In [11]:
h.feature_names


['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [13]:
h.frame
print(h)

{'data': array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
     