In [1]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
anime = pd.read_csv("anime.csv")
rating = pd.read_csv("rating.csv")

anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [3]:
# Ganti genre kosong
anime['genre'] = anime['genre'].fillna('')

# TF-IDF untuk genre
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(anime['genre'])

# Cosine similarity antar anime
content_similarity = cosine_similarity(tfidf_matrix)

print("Content-based similarity matrix shape:", content_similarity.shape)

Content-based similarity matrix shape: (12294, 12294)


In [4]:
# Buang rating -1 (belum memberi rating)
rating_clean = rating[rating['rating'] != -1]

# User-Item Matrix
user_item_matrix = rating_clean.pivot_table(
    index='user_id',
    columns='anime_id',
    values='rating'
).fillna(0)

# Similarity antar anime
cf_similarity = cosine_similarity(user_item_matrix.T)

print("Collaborative similarity matrix shape:", cf_similarity.shape)

Collaborative similarity matrix shape: (9927, 9927)


In [5]:
anime_index = pd.Series(anime.index, index=anime['anime_id']).drop_duplicates()

In [6]:
def content_based_recommendation(anime_id, top_n=10):
    idx = anime_index[anime_id]
    sim_scores = list(enumerate(content_similarity[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]

    anime_indices = [i[0] for i in sim_scores]
    scores = [i[1] for i in sim_scores]

    result = anime.iloc[anime_indices][['name', 'genre']]
    result['content_score'] = scores

    return result

In [7]:
def collaborative_recommendation(anime_id, top_n=10):
    if anime_id not in user_item_matrix.columns:
        return pd.DataFrame()

    idx = user_item_matrix.columns.get_loc(anime_id)
    sim_scores = list(enumerate(cf_similarity[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]

    anime_ids = user_item_matrix.columns[[i[0] for i in sim_scores]]
    scores = [i[1] for i in sim_scores]

    result = anime[anime['anime_id'].isin(anime_ids)][['anime_id', 'name']]
    result['cf_score'] = scores

    return result

In [8]:
def hybrid_recommendation(anime_name, alpha=0.6, top_n=10):
    # Cari anime_id dari nama
    anime_row = anime[anime['name'].str.lower() == anime_name.lower()]
    
    if anime_row.empty:
        print("Anime tidak ditemukan.")
        return
    
    anime_id = anime_row.iloc[0]['anime_id']

    # Content-based
    cb = content_based_recommendation(anime_id, top_n=20)

    # Collaborative
    cf = collaborative_recommendation(anime_id, top_n=20)

    if cf.empty:
        cb['final_score'] = cb['content_score']
        return cb.sort_values('final_score', ascending=False).head(top_n)

    # Gabungkan
    hybrid = pd.merge(
        cb,
        cf,
        on='name',
        how='inner'
    )

    # Hitung skor hybrid
    hybrid['final_score'] = (
        alpha * hybrid['cf_score'] +
        (1 - alpha) * hybrid['content_score']
    )

    return hybrid.sort_values('final_score', ascending=False).head(top_n)

In [9]:
result = hybrid_recommendation("Naruto", alpha=0.6, top_n=10)
result

Unnamed: 0,name,genre,content_score,anime_id,cf_score,final_score
0,Dragon Ball Z,"Action, Adventure, Comedy, Fantasy, Martial Ar...",0.936843,813,0.463703,0.652959
