In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
organizations_cooperunion_anime_recommendations_database_path = kagglehub.dataset_download('organizations/CooperUnion/anime-recommendations-database')
hernan4444_anime_recommendation_database_2020_path = kagglehub.dataset_download('hernan4444/anime-recommendation-database-2020')
vishalmane10_anime_dataset_2022_path = kagglehub.dataset_download('vishalmane10/anime-dataset-2022')
dbdmobile_myanimelist_dataset_path = kagglehub.dataset_download('dbdmobile/myanimelist-dataset')

print('Data source import complete.')


# <p style='font-family: Signika+Negative; background-color:#FF80AB; font-weight:bold; color:#FFFFFF; border:4px solid #FF4081; border-radius:12px; box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2); padding:8px; text-align:center'>📺 Anime Recommendation System  💫</p>


# <p style='font-family: Signika+Negative; background-color:#FF80AB; font-weight:bold; color:#FFFFFF; border:4px solid #FF4081; border-radius:12px; box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2); padding:8px; text-align:center;'>📖 Table of Contents 🌟</p>


<div style="
    background-color:rgb(255, 221, 234); /* Warna lebih terang dari #FF80AB */
    border-radius: 12px;
    font-size: 16px;
    color: #8E005D; /* Warna teks lebih kontras */
    border: 4px solid #FF4081;
    padding: 12px 15px;
    box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2);
    font-family: 'Signika Negative', sans-serif;
    font-weight: bold;
">


1. Import Library <br>
2. Load Dataset  <br>
3. Preprocessing <br>
4. Content-Based Filtering  <br>
5. Craft Your Recommendation <br>

</div>

# <p style='font-family: Signika+Negative; background-color:#FF80AB; font-weight:bold; color:#FFFFFF; border:4px solid #FF4081; border-radius:15px; box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2); padding:12px; text-align:center;'>➡️ Import Library 📚</p>


In [None]:
import pandas as pd
import numpy as np
import copy
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# <p style='font-family: Signika+Negative; background-color:#FF80AB; font-weight:bold; color:#FFFFFF; border:4px solid #FF4081; border-radius:15px; box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2); padding:12px; text-align:center;'>📚 Load Dataset 📈</p>


In [None]:
# LOAD DATASET

anime_df = pd.read_csv(r'/kaggle/input/anime-recommendations-database/anime.csv')
rating_df = pd.read_csv(r'/kaggle/input/anime-recommendations-database/rating.csv')

print(f'Anime data shape : {anime_df.shape}')
print(f'User data shape : {rating_df.shape}')

In [None]:
display(anime_df)
display(rating_df)

# <p style='font-family: Signika+Negative; background-color:#FF80AB; font-weight:bold; color:#FFFFFF; border:4px solid #FF4081; border-radius:15px; box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2); padding:12px; text-align:center;'>🛠️ Data Preprocessing: Time to Clean and Transform! 🔧</p>


In [None]:
# DELETE DATA IF GENRE AND TYPE ANIME IS NULL
anime_df = anime_df.dropna(subset=['genre', 'type']).reset_index(drop=True)

# FILL THE MISSING RATING VALUES WITH VALUE 0
anime_df = anime_df.fillna(value= 0).reset_index(drop=True)

anime_df.info()

In [None]:
# DROP ALL DUPLICATED DATA
anime_df = anime_df.drop_duplicates(subset=['name']).reset_index(drop = True)
rating_df = rating_df.drop_duplicates().reset_index(drop=True)


anime_df.shape, rating_df.shape

In [None]:
# WE NEED TO CONVERT EPISODES COLUMN FROM OBJECT TO INT DATA TYPE


# PREPROCESS EPISODES
anime_df.loc[anime_df['episodes'] == 'Unknown', 'episodes'] = 0     # ---> 'Unknown' VALUE BECOME 0 IN EPISODES COLUMN

# CHANGE EPISODES DATA TYPE TO INT
anime_df['episodes'] = anime_df['episodes'].astype('int32')

# AFTER CHANGED
anime_df.info()

<div style="
    background-color:rgb(255, 221, 234); /* Warna lebih terang dari #FF80AB */
    border-radius: 15px;
    font-size: 16px;
    color: #8E005D; /* Warna teks lebih kontras */
    border: 4px solid #FF4081;
    padding: 20px 25px;
    box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2);
    font-family: 'Signika Negative', sans-serif;
">
    <h4 style="
        font-size: 20px;
        color: #C2185B; /* Warna judul lebih gelap agar kontras */
        font-weight: bold;
        margin-bottom: 15px;
        text-align: center; /* Judul tetap di tengah */
    ">🧬 Explanation for Code Cell below: 💡
    </h4>

a user who gives a rating of -1 means that the user has watched the anime but has not given a rating (implicit feedback). we will use the rating of -1 as additional information. therefore, I will do imputation by changing the rating of -1 with rating of that anime. <br>

In the rating column, there is a rating with a value of -1. That means they have watched the anime but have not given a rating. In that case, we can consider it as implicit feedback. <br>

<h4> <strong>In this case, there are several ways to handle the rating value -1 : </strong></h4>

1. We can change the rating value -1 to its middle value, which is 5 (because the rating range is 0 - 10) <br>
2. replace it with the average value of the user.  <br>
3. replace it with the value of the anime rating. for example, if the anime rating is 8.2, then the user rating is changed to 8.2 (this method applies if we want to recommend anime based on the highest rating consistently). this method is useful when new users who have never watched an anime or have only watched it a little, then we can recommend the anime with the highest rating.  <br>
4. If the rating -1 is less helpful but we don't want to lose information from the data, then we can replace it with a value of 0.  <br>
5. If the rating -1 is not helpful and does not have a significant effect on the model or we have a lot of data that is not worth -1 and a few ratings that are worth -1, then we can delete data that has a rating of -1. <br>

</div>

In [None]:
# CHECK DATAFRAME BEFORE IMPUTATION
rating_df

In [None]:
# CHANGE RATING -1 TO THE AVERAGE OF THE ANIME RATED BY THE USER

#for i, user in combined_df.iterrows():
#
#    if user['user_rating'] != -1:   # IF RATING NOT -1 , SKIP IT!
#        continue
#
    # REPLACE RATING -1 WITH RATING OF THAT ANIME
#    combined_df.loc[i, 'user_rating'] = combined_df.loc[i, 'anime_rating']



# SAME AS ABOVE ,BUT IT OPTIMIZED CODE WITH 100X FASTER
#combined_df.loc[combined_df['user_rating'] == -1, 'user_rating'] = combined_df['anime_rating']

# OTHER OPTION : FILL RATING -1 WITH VALUE 0F 5
#combined_df.loc[combined_df['user_rating'] == -1, 'user_rating'] = 5

# OTHER OPTION : DROP ALL USER THAT IS RATING = -1
rating_df = rating_df[rating_df['rating'] != -1]   # ---> # AFTER MANY ATTEMPT, WE HAVE DECIDED TO REMOVE THE -1 RATING.


# CHECK DATA AFTER IMPUTATION
rating_df

# <p style='font-family: Signika+Negative; background-color:#FF80AB; font-weight:bold; color:#FFFFFF; border:4px solid #FF4081; border-radius:15px; box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2); padding:12px; text-align:center; letter-spacing:2px;'>📺 Content-Based Filtering 🍿</p>


![Content-Based Filtering.png](attachment:702c485a-c483-44a8-82bf-bd8afd81f448.png)

<div style="
    background-color:rgb(255, 221, 234); /* Warna lebih terang dari #FF80AB */
    border-radius: 15px;
    font-size: 16px;
    color: #8E005D; /* Warna teks lebih kontras */
    border: 4px solid #FF4081;
    padding: 20px 25px;
    box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2);
    font-family: 'Signika Negative', sans-serif;
    font-weight: light;
    text-align: left; /* Rata kiri */
    letter-spacing: 1px;
">
    <h4 style="
        font-size: 20px;
        color: #C2185B; /* Warna judul lebih gelap agar kontras */
        font-weight: bold;
        margin-bottom: 15px;
        text-align: center; /* Judul tetap di tengah */
    ">🧬 Explanation: 💡
    </h4>

Content-Based Filtering is a technique in recommendation systems that is used to recommend an item/product based on the products that the user likes. In this system, items that are similar to items that a user has previously liked or rated highly will be recommended to that user. <br> <br>

- <strong>Advantages of Content-Based Filtering: </strong> <br>
  1. <strong>Personalization:</strong> Each user receives recommendations that are highly tailored to their preferences. <br>
  2. <strong>No Need for Other User Data:</strong> This system does not rely on the behavior of other users, so it can still be effective even if the number of users or other data is limited. <br>

<br>
- <strong>Disadvantages: </strong> <br>

  1. <strong>Over-Specialization:</strong> The system might focus too much on items that are similar to what the user already knows, which can sometimes prevent it from introducing new and potentially interesting variations to the user.
  2. <strong>New Item Problem:</strong> If a new item lacks sufficient data or features, it becomes difficult to recommend it to users.
<br><br>

- <strong>Example:</strong> <br>

  1. <strong>Anime Recommendation:</strong> For instance, if a user likes Anime with the genres "action" and "adventure," the system will analyze other Anime with similar genres and recommend those Anime. <br>
  2. <strong>Book Recommendation:</strong> If a user frequently reads science fiction books, the system will recommend other books in the same genre.
  3. There's many more....<br>

<br><br>
<strong>In this method,
we will use cosine similarity to find similarities for each anime and recommend anime that have similarities to the ones the user has watched.</strong>
</div>

In [None]:
# WE WILL USE 4 MAIN FEATURES IN anime_df (anime_id, type, rating, and members)

def preprocessing(anime_df):

    anime_df = anime_df.copy()  # ---> DEEP COPY

    # FUNCTION TO REPLACE SPACES WITH UNDERSCORE FOR GENRE CONSISTING MORE THAN 1 WORD
    def preprocess_genre(genre):
        # SPLIT GENRE BY COMMAS AND SPACE
        genre_list = genre.split(', ')

        # Replace spaces with underscores only in genres that consist of more than one word.
        processed_genres = [g.replace(" ", "_") if len(g.split()) > 1 else g for g in genre_list]

         # COMBINE THE REPROCESSED GENRES WITH COMMAS
        return ', '.join(processed_genres)

    # APPLY PREPROCESS FOR GENRE
    anime_df['genre_preprocessed'] = anime_df['genre'].apply(preprocess_genre)


    # -------------------------------------------------------------------------------------------------------

    # SECOND, REPLACE GENRE CATEGORICAL FEATURE TO NUMERICAL USING TF-IDF

    tfidf = TfidfVectorizer(encoding='utf-8', lowercase = True, stop_words= None, ngram_range= (1,1))    # USE TF-IDF TO TRANSFORM CATEGORICAL GENRE TO NUMERICAL GENRE
    genre_encoded = tfidf.fit_transform(anime_df['genre_preprocessed'])

	# DISPLAY SOME FEATURE INFORMATION
    display(tfidf.get_feature_names_out())

    # CONVERT ENCODED GENRE TO DATAFRAME
    features = tfidf.get_feature_names_out()
    genre_encoded_df = pd.DataFrame(data = genre_encoded.toarray(), columns = features)


    # THIRD, CHANGE TYPE CATEGORICAL USING ONE HOT ENCODING
    type_encoded = pd.get_dummies(data = anime_df['type'])


    # FOURTH, CONCATENATE THEM
    df_encode = pd.concat((genre_encoded_df, type_encoded), axis=1)



    # FIFTH, ADD ANIME RATING COLUMN TO THE DATAFRAME
    df_encode['rating'] = anime_df['rating']

    display(type(anime_df['members']))
    display(anime_df['members'].shape)

    # LAST ADD MEMBERS COLUMN TO THE DATAFRAME
    norm = StandardScaler()
    df_encode['members'] = norm.fit_transform(anime_df[['members']])   # DO NORMALIZATION FOR MEMBERS, CUZ THE VALUE IS HIGH AND LARGE



    # DISPLAY ENCODED DATAFRAME
    return df_encode

In [None]:
# FEATURE EXTRACTION
df_encoded = preprocessing(anime_df)
df_encoded

In [None]:
# CALCULATE COSINE SIMILARITY
similarity = cosine_similarity(X = df_encoded, Y = df_encoded)

# CREATE MAPPING FOR EACH ANIME_ID
anime_idx = pd.Series(anime_df.index, index = anime_df['anime_id'])

similarity.shape , len(anime_idx)

In [None]:
# PREDICT UNWATCHED ANIME BASED ON get_recommendation() FUNCTION
def predict_cb(user_id, anime_id):

    # GET ALL WATCHED ANIME FOR THAT USER
    watched_anime = rating_df[rating_df['user_id'] == user_id][['anime_id', 'rating']]
    watched_anime = watched_anime.sort_values(by = 'rating', ascending = False)         # SORT RATING BY DESCENDING

    # GET TOP 20 MOST LIKED ANIME
    watched_anime = watched_anime[:20]     # ---> WE ONLY WANT TAKE TOP 20 ANIME MOST LIKED BASED ON HIGHEST RATING

    # GET 1 TARGET ANIME INDEX
    target_anime = anime_idx[anime_id]

    # PREDICT ALL SCORE BETWEEN TARGET ANIME AND WATCHED ANIME
    sim_scores = []
    for watched in watched_anime['anime_id']:
        # GET REAL INDEX OF WATCHED ANIME
        watched_idx = anime_idx[watched]

        # PREDICT SCORE BETWEEN TARGET ANIME (UNWATCHED ANIME) AND WATCHED ANIME
        predicted_scores = similarity[target_anime][watched_idx]
        sim_scores.append(predicted_scores)

    return np.mean(sim_scores)


In [None]:

# FUNCTION TO ITERATE OVER UNWATCHED ANIME AND RETURN THE MOST HIGHEST SCORE
def get_recommendation(user_id, top_n = 5, anime_df = anime_df):

    # GET ALL ANIME
    all_anime = set(anime_df['anime_id'])

    # GET ANIME THAT HAVE BEEN WATCHED/RATED BY USER
    watched_anime = set(rating_df[rating_df['user_id'] == user_id]['anime_id'])

    # UNWATCHED ANIME
    unwatched_anime = all_anime - watched_anime

    recommendation = []

    # PREDICT ALL UNWATCHED/UNRATED ANIME BASED ON SPESIFIC USER
    for anime_id in unwatched_anime:
        cb_score = predict_cb(user_id, anime_id)    # CALCULATE CONTENT-BASED

        recommendation.append((anime_id, cb_score))

    # SORT SCORES BY DESCENDING
    recommendation = sorted(recommendation, key = lambda x : x[1], reverse = True)

    return recommendation[: top_n]   # --->  RETURN LIST OF TUPLE

In [None]:
# PREDICT MOST RECOMMENDATION ANIME FOR USER 1

user_id = 1
n = 10
predictions = get_recommendation(user_id = user_id, top_n = n)

print(f'Top {n} Most Rated Anime by User {user_id} : ')
# SHOW MOST LIKED ANIME BY USER
liked_anime = rating_df[rating_df['user_id'] == user_id].sort_values(by = 'rating', ascending = False)[:n]
liked_anime = pd.merge(left = anime_df, right = liked_anime, how = 'right', on = 'anime_id')   # --- > JOIN ON ANIME DATA AND DISPLAY THEM

display(liked_anime)


# SEPARATE ANIME_ID AND PREDICTION SCORE INTO SEPARATE VARIABLES
anime_id = [i[0] for i in predictions]
predicted_rating = [i[1] for i in predictions]

# CREATE DATAFRAME
dict = {
    'anime_id' : anime_id,
    'predicted_rating' : predicted_rating
}
predictions_df = pd.DataFrame(dict)

# MERGE DATA
recommendation = pd.merge(left = anime_df, right = predictions_df, how = 'right', on = 'anime_id')

print(f'\nTop {n} Most Recommendation Anime to User {user_id}')
recommendation

In [None]:
# PREDICT MOST RECOMMENDATION ANIME FOR USER 1000

user_id = 1000
n = 10
predictions = get_recommendation(user_id = user_id, top_n = n)

# ------------------------------------------------------------------------

print(f'Top {n} Most Rated Anime by User {user_id} : ')
# SHOW MOST LIKED ANIME BY USER
liked_anime = rating_df[rating_df['user_id'] == user_id].sort_values(by = 'rating', ascending = False)[:n]
liked_anime = pd.merge(left = anime_df, right = liked_anime, how = 'right', on = 'anime_id')   # --- > JOIN ON ANIME DATA AND DISPLAY THEM

display(liked_anime)


# SEPARATE ANIME_ID AND PREDICTION SCORE INTO SEPARATE VARIABLES
anime_id = [i[0] for i in predictions]
predicted_rating = [i[1] for i in predictions]

# CREATE DATAFRAME
dict = {
    'anime_id' : anime_id,
    'predicted_rating' : predicted_rating
}
predictions_df = pd.DataFrame(dict)

# MERGE DATA
recommendation = pd.merge(left = anime_df, right = predictions_df, how = 'right', on = 'anime_id')

print(f'\nTop {n} Most Recommendation Anime to User {user_id}')
recommendation

In [None]:
# PREDICT MOST RECOMMENDATION ANIME FOR USER 23

user_id = 23
n = 10
predictions = get_recommendation(user_id = user_id, top_n = n)

# ------------------------------------------------------------------------

print(f'Top {n} Most Rated Anime by User {user_id} : ')
# SHOW MOST LIKED ANIME BY USER
liked_anime = rating_df[rating_df['user_id'] == user_id].sort_values(by = 'rating', ascending = False)[:n]
liked_anime = pd.merge(left = anime_df, right = liked_anime, how = 'right', on = 'anime_id')   # --- > JOIN ON ANIME DATA AND DISPLAY THEM

display(liked_anime)


# SEPARATE ANIME_ID AND PREDICTION SCORE INTO SEPARATE VARIABLES
anime_id = [i[0] for i in predictions]
predicted_rating = [i[1] for i in predictions]

# CREATE DATAFRAME
dict = {
    'anime_id' : anime_id,
    'predicted_rating' : predicted_rating
}
predictions_df = pd.DataFrame(dict)

# MERGE DATA
recommendation = pd.merge(left = anime_df, right = predictions_df, how = 'right', on = 'anime_id')

print(f'\nTop {n} Most Recommendation Anime to User {user_id}')
recommendation

# <p style='font-family: Signika+Negative; background-color:#FF80AB; font-weight:bold; color:#FFFFFF; border:4px solid #FF4081; border-radius:15px; box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2); padding:12px; text-align:center;'>🎧 Craft Your Recommendation 🎵</p>

<div style="
    background-color:rgb(255, 229, 236); /* Warna lebih terang dari #FF80AB */
    border-radius: 15px;
    font-size: 16px;
    color: #8E005D; /* Warna teks lebih kontras */
    border: 4px solid #FF4081;
    padding: 20px 25px;
    box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2);
    font-family: 'Signika Negative', sans-serif;
    font-weight: bold;
    text-align: center;
">
    
<h4 style="
        font-size: 20px;
        color: #C2185B; /* Warna judul lebih gelap agar kontras */
        font-weight: bold;
        margin-bottom: 10px;">🧬 Explanation: 💡</h4>
    
The code Cell below create and display Anime based on your favorite Genre. You can freely choose your favorite genre.
</div>

In [None]:

# ONLY TAKE ANIME GENRE SIMILAR WITH selected_genre
def filter_anime_by_genre(selected_genre, df):

    # RETURN DATA WITH AT-LEAST 1 SELECTED GENRE
    return df[df['genre'].apply(lambda x : any(genre in x for genre in selected_genre))].reset_index(drop = True)


In [None]:

# FOR PREPROCESS FILTERED DATA
def special_preprocessing(df):

    df = df.copy()   # ---> DEEP COPY

    # FUNCTION TO REPLACE SPACES WITH UNDERSCORE FOR GENRE CONSISTING MORE THAN 1 WORD
    def preprocess_genre(genre):
        # SPLIT GENRE BY COMMAS AND SPACE
        genre_list = genre.split(', ')

        # Replace spaces with underscores only in genres that consist of more than one word.
        processed_genres = [g.replace(" ", "_") if len(g.split()) > 1 else g for g in genre_list]

         # COMBINE THE REPROCESSED GENRES WITH COMMAS
        return ', '.join(processed_genres)

    # APPLY PREPROCESS FOR GENRE
    df['genre_preprocessed'] = df['genre'].apply(preprocess_genre)


    # -------------------------------------------------------------------------------------------------------

    # SECOND, REPLACE GENRE CATEGORICAL FEATURE TO NUMERICAL USING TF-IDF

    tfidf = TfidfVectorizer(encoding='utf-8', lowercase = True, stop_words= None, ngram_range= (1,1))    # USE TF-IDF TO TRANSFORM CATEGORICAL GENRE TO NUMERICAL GENRE
    genre_encoded = tfidf.fit_transform(df['genre_preprocessed'])


    # CONVERT ENCODED GENRE TO DATAFRAME
    features = tfidf.get_feature_names_out()
    df_encode = pd.DataFrame(data = genre_encoded.toarray(), columns = features)


    # LAST ADD MEMBERS COLUMN TO THE DATAFRAME
    #norm = StandardScaler()
    #df_encode['members'] = norm.fit_transform(df[['members']])   # DO NORMALIZATION FOR MEMBERS, CUZ THE VALUE IS HIGH AND LARGE

    df_encode['members'] = df['members']

    # DISPLAY ENCODED DATAFRAME
    return df_encode

In [None]:
# FUNCTION TO GET A ANIME RECOMMENDATION BASED ON SELECTED GENRES
def recommendation_by_genre(selected_genre, type, anime_df):

    if type == None:
        raise ValueError("Type Can't be Null!")

    # SELECT ANIME WITH GENRE SAME AS selected_genre (AT LEAST 1 GENRE ARE SAME)
    filtered_df = filter_anime_by_genre(selected_genre, anime_df)

    # DO PREPROCESS FOR FILTERED ANIME
    df_encoded = special_preprocessing(filtered_df)

    # CALCULATE COSINE TO GET SIMILARITY BETWEEN EACH ANIME
    similarity_matrix = cosine_similarity(df_encoded)

    # CONVERT SELECTED GENRE TO SET
    relevant_genres = set(selected_genre)

    # -----------------------------------------------------------------------------------------------------------

    # FUNCTION TO COUNT THE NUMBER OF IRRELEVANT GENRE FOR EACH ANIME
    def count_irrelevant_genres(genre_string):
        anime_genres = set(genre_string.split(', '))
        return len(anime_genres - relevant_genres)  # COUNT THE NUMBER OF GENRES NOT IN RELEVANT GENRE

    # FUNCTION TO COUNT RELEVANT GENRES ON EACH ANIME
    def count_relevant_genres(genre_string):
        anime_genres = set(genre_string.split(', '))   # ---> GET ALL GENRE
        return len(anime_genres & relevant_genres)     # DO AND LOGIC

    # ------------------------------------------------------------------------------------------------------------


    filtered_df["irrelevant_genre_count"] = filtered_df["genre"].apply(count_irrelevant_genres)
    filtered_df['relevant_genre'] = filtered_df['genre'].apply(count_relevant_genres)

    # COMPUTE AVERAGE OF SIMILARITY EACH ANIME TO OTHER ANIME WITH RELEVANT GENRE
    avg_similarity = similarity_matrix.mean(axis=0)

    # DIVIDE THE AVG SIMILARITY VALUE BY THE NUMBER OF GENRES IS IRRELEVANT
    avg_similarity /= np.where(filtered_df["irrelevant_genre_count"] > 0, filtered_df["irrelevant_genre_count"], 1)

    filtered_df['avg_similarity'] = avg_similarity   # ---> STORE avg_similarity TO DATAFRAME

    # GET ANIME BASED ON ACTUAL GENRE
    relevant_df = filtered_df.sort_values(by = ['relevant_genre', 'avg_similarity', 'members'], ascending = [False, False, False])

    # GET MOST POPULAR ANIME BASED ON RELEVANT GENRE
    popular_df = filtered_df.sort_values(by = ['relevant_genre', 'members', 'avg_similarity'], ascending = [False, False, False])

    # -----------------------------------------------------------------------------------------------------------------------------


    type = type.lower()    # CONVERT TO LOWERCASE

    # IF THEY SELECT ALL TYPE, THEN JUST RETURN IT!
    if type == 'all':
        return (popular_df, relevant_df)

    # FILTER A CHOSEN TYPE OF ANIME
    relevant_df = relevant_df[relevant_df['type'].str.lower() == type]
    popular_df = popular_df[popular_df['type'].str.lower() == type]

    # IF TYPE IS WRONG SPELLED
    if relevant_df.empty :
        raise ValueError(f"There's no Type named {type}! Check Your spelled!")    # ----> RETURN AN ERROR

    return (popular_df, relevant_df)  # ----> RETURN 2 DATAFRAMES IN TUPLE FORM

In [None]:
def display_recommendation(selected_genre, type='all', top_n = 5):

    tuple = recommendation_by_genre(selected_genre, type, anime_df)

    # FUNCTION TO MAKE BACKGROUND COLOR
    def highlight_cells(val):
        color = 'lightblue'  # BACKGROUND COLOR
        return f'background-color: {color}'

    # DISPLAY POPULAR ANIME BASED ON YOUR FAVORITE GENRE
    popular_anime = tuple[0]
    popular_anime = popular_anime[['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members']].head(top_n)

    print(f'\nPopular Anime Based on Your Favorite genre :\nYour favorite genre : {selected_genre}\n')

    # APPLY HIGHLIGHT FOR name AND genre COLUMNS
    popular_anime = popular_anime.style.map(highlight_cells, subset=['name', 'genre'])

    # HIGHLIGHT AND CREATE BORDER
    popular_anime.set_table_styles(
        [{'selector': 'th.col1', 'props': [('background-color', 'lightgray'), ('border', '1px solid black')]},  # Header 'name'
         {'selector': 'th.col2', 'props': [('background-color', 'lightgray'), ('border', '1px solid black')]},  # Header 'genre'
         {'selector': 'td', 'props': [('border', '1px solid black')]},  # OTHER CELL
         ],
        axis=1  # ---> DECLARE TO MODIFY THE COLUMN
    )

    display(popular_anime)   # ---> DISPLAY DATAFRAME

    # ---------------------------------------------------------------------------


    # DISPLAY ANIME BASED ON YOUR SPECIFIC GENRE
    relevant_anime = tuple[1]
    relevant_anime = relevant_anime[['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members']].head(top_n)

    print(f'\n\nAnime Recommendation Based on Your favorite genre :\nYour favorite genre : {selected_genre}')

    # APPLY HIGHLIGHT FOR name AND genre COLUMNS
    relevant_anime = relevant_anime.style.map(highlight_cells, subset = ['name', 'genre'])

    # HIGHLIGHT AND CREATE BORDER
    relevant_anime.set_table_styles(
        [{'selector': 'th.col1', 'props': [('background-color', 'lightgray'), ('border', '1px solid black')]},  # Header 'name'
         {'selector': 'th.col2', 'props': [('background-color', 'lightgray'), ('border', '1px solid black')]},  # Header 'genre'
         {'selector': 'td', 'props': [('border', '1px solid black')]},  # OTHER CELL
         ],
        axis=1  # ---> DECLARE TO MODIFY THE COLUMN
    )

    display(relevant_anime)


## Craft Here  

In [None]:
# U CAN CRAFT YOUR RECOMMENDATION BASED ON YOUR FAVORITE GENRE AND TYPE ANIME

top_n = 10      # ---> GET TOP 10 MOST RECOMMENDATION ANIME
type_anime = 'all'    # ---> SELECT YOUR ANIME TYPE (IT CAN BE TV, MOVIE, OVA, ONA, SPECIAL, MUSIC, OR ALL). IF ALL THEN U CHOOSE ALL TYPE !

selected_genre = ['Action', 'Adventure', 'Shounen']         # ---> SELECT YOUR FAVORITE GENRE

display_recommendation(selected_genre, type_anime, top_n )

In [None]:
# U CAN CRAFT YOUR RECOMMENDATION BASED ON YOUR FAVORITE GENRE AND TYPE ANIME

top_n = 10      # ---> GET TOP 10 MOST RECOMMENDATION ANIME
type_anime = 'special'    # ---> SELECT YOUR ANIME TYPE (IT CAN BE TV, MOVIE, OVA, ONA, SPECIAL, MUSIC, OR ALL). IF ALL THEN U CHOOSE ALL TYPE !

selected_genre = ['Action', 'Shounen']         # ---> SELECT YOUR FAVORITE GENRE

display_recommendation(selected_genre, type_anime, top_n )

In [None]:
# U CAN CREATE YOUR RECOMMENDATION BASED ON YOUR FAVORITE GENRE AND TYPE ANIME

top_n = 10      # ---> GET TOP 10 MOST RECOMMENDATION ANIME
type_anime = 'tv'     # ---> SELECT YOUR ANIME TYPE (IT CAN BE TV, MOVIE, OVA, ONA, SPECIAL, MUSIC, OR ALL). IF ALL THEN U CHOOSE ALL TYPE !

selected_genre = ['Action', 'Adventure', 'Shounen', 'Super Power']         # ---> SELECT YOUR FAVORITE GENRE.


display_recommendation(selected_genre, type_anime, top_n )   # ---> CALL A FUNCTION

In [None]:
# U CAN CREATE YOUR RECOMMENDATION BASED ON YOUR FAVORITE GENRE AND TYPE ANIME

top_n = 10      # ---> GET TOP 10 MOST RECOMMENDATION ANIME
type_anime = 'tv'     # ---> SELECT YOUR ANIME TYPE (IT CAN BE TV, MOVIE, OVA, ONA, SPECIAL, MUSIC, OR ALL). IF ALL THEN U CHOOSE ALL TYPE !

selected_genre = ['Mystery', 'Psychological']         # ---> SELECT YOUR FAVORITE GENRE.


display_recommendation(selected_genre, type_anime, top_n )   # ---> CALL A FUNCTION

In [None]:
# U CAN CREATE YOUR RECOMMENDATION BASED ON YOUR FAVORITE GENRE AND TYPE ANIME

top_n = 10      # ---> GET TOP 10 MOST RECOMMENDATION ANIME
type_anime = 'tv'     # ---> SELECT YOUR ANIME TYPE (IT CAN BE TV, MOVIE, OVA, ONA, SPECIAL, MUSIC, OR ALL). IF ALL THEN U CHOOSE ALL TYPE !

selected_genre = ['Adventure', 'Fantasy', 'Kids']         # ---> SELECT YOUR FAVORITE GENRE.


display_recommendation(selected_genre, type_anime, top_n )    # ---> CALL A FUNCTION

<div style="
    background-color:rgb(255, 229, 236); /* Warna lebih terang dari #FF80AB */
    border-radius: 15px;
    font-size: 16px;
    color: #8E005D; /* Warna teks lebih kontras */
    border: 4px solid #FF4081;
    padding: 20px 25px;
    box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.2);
    font-family: 'Signika Negative', sans-serif;
    font-weight: bold;
    text-align: center;
">
    <h4 style="
        font-size: 20px;
        color: #C2185B; /* Warna judul lebih gelap agar kontras */
        font-weight: bold;
        margin-bottom: 15px;
    ">🧬 Congratulations!: 🥳</h4>
    
🎉 Congratulations! 🎉 You did it! 💪 Now you can craft your own recommendation system! 🤩 Go ahead and have some fun with it! 🚀


**Pls Upvote if u Like this 🤩 . Your feedback will be valuable for me 🌸**

</div>