<a href="https://colab.research.google.com/github/rayhanfaris17/recommender_system_for_anime/blob/main/Recommender_System_For_Anime.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Modul 6 Recommender System**

# **Import Dataset**

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
df = pd.read_csv("anime_with_synopsis.csv")
df.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...


# **Preprocessing Data**

In [None]:
#Rename kolom
df = df.rename(columns={'sypnopsis': 'synopsis'})

In [None]:
#Tipe data dari setiap variabel
#Jumlah baris dan kolom
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16214 entries, 0 to 16213
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   MAL_ID    16214 non-null  int64 
 1   Name      16214 non-null  object
 2   Score     16214 non-null  object
 3   Genres    16214 non-null  object
 4   synopsis  16206 non-null  object
dtypes: int64(1), object(4)
memory usage: 633.5+ KB


In [None]:
#Cek data yang kosong
df.isnull().sum()

MAL_ID      0
Name        0
Score       0
Genres      0
synopsis    8
dtype: int64

In [None]:
#Cek data yang duplicate
df.duplicated().sum()

0

In [None]:
#Menghapus data yang kosong
df = df.dropna()
df.isnull().sum()

MAL_ID      0
Name        0
Score       0
Genres      0
synopsis    0
dtype: int64

In [None]:
#Cek kembali dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16206 entries, 0 to 16213
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   MAL_ID    16206 non-null  int64 
 1   Name      16206 non-null  object
 2   Score     16206 non-null  object
 3   Genres    16206 non-null  object
 4   synopsis  16206 non-null  object
dtypes: int64(1), object(4)
memory usage: 759.7+ KB


# **Feature Extraction "Genres" Menggunakan TF-IDF**

In [None]:
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['Genres'])
tfidf_matrix.shape

(16206, 46)

# **Menghitung Cosine Similarity**

In [None]:
cosine_sim = cosine_similarity(tfidf_matrix)
cosine_sim.shape

(16206, 16206)

# **Pembuatan Content Based Recommender**

In [None]:
def get_recommendations(title, cosine_sim, df):
    #Mencari indeks anime berdasarkan judul
    idx = df[df['Name'] == title].index[0]

    #Mengambil skor cosine similarity untuk anime dengan indeks yang sama
    sim_scores = list(enumerate(cosine_sim[idx]))

    #Mengurutkan anime berdasarkan skor cosine similarity secara menurun
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    #Mengambil indeks anime teratas (karena sudah terurut menurun)
    top_anime_indices = [i[0] for i in sim_scores]

    #Mengembalikan judul anime yang direkomendasikan (kecuali anime dengan indeks yang sama)
    return df['Name'].iloc[top_anime_indices[1:]]

# **Contoh Penggunaan**

In [None]:
anime_title = 'Paprika'
recommendations = get_recommendations(anime_title, cosine_sim, df)

print(f"Anime yang direkomendasikan untuk {anime_title}:")
table_recommendation = pd.DataFrame(recommendations)
table_recommendation.head()

Anime yang direkomendasikan untuk Paprika:


Unnamed: 0,Name
824,Higurashi no Naku Koro ni
15171,Higurashi no Naku Koro ni Gou
16211,Higurashi no Naku Koro ni Sotsu
13265,Yakusoku no Neverland
6598,Higurashi no Naku Koro ni Kaku: Outbreak


In [None]:
anime_title = 'Naruto'
recommendations = get_recommendations(anime_title, cosine_sim, df)

print(f"Anime yang direkomendasikan untuk {anime_title}:")
table_recommendation = pd.DataFrame(recommendations)
table_recommendation.head()

Anime yang direkomendasikan untuk Naruto:


Unnamed: 0,Name
1508,Naruto: Shippuuden
11640,Boruto: Jump Festa 2016 Special
206,Rekka no Honoo
5518,Naruto: Honoo no Chuunin Shiken! Naruto vs. Ko...
6158,Naruto: Shippuuden Movie 6 - Road to Ninja


In [None]:
anime_title = 'Monster'
recommendations = get_recommendations(anime_title, cosine_sim, df)

print(f"Anime yang direkomendasikan untuk {anime_title}:")
table_recommendation = pd.DataFrame(recommendations)
table_recommendation.head()

Anime yang direkomendasikan untuk Monster:


Unnamed: 0,Name
6598,Higurashi no Naku Koro ni Kaku: Outbreak
2495,Death Note: Rewrite
10449,B: The Beginning
13404,B: The Beginning Succession
289,Mousou Dairinin
