# Import Library and Data

In [88]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity

In [89]:
df = pd.read_csv('data/anime.csv')
df.dropna(inplace=True)
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Military, Shounen",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen",TV,51,9.16,151266


In [90]:
df.columns

Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')

In [91]:
df['genre'].value_counts()

Hentai                                                   816
Comedy                                                   521
Music                                                    297
Kids                                                     197
Comedy, Slice of Life                                    174
                                                        ... 
Adventure, Comedy, Horror, Shounen, Supernatural           1
Comedy, Harem, Romance, School, Seinen, Slice of Life      1
Comedy, Ecchi, Sci-Fi, Shounen                             1
Adventure, Shounen, Sports                                 1
Hentai, Slice of Life                                      1
Name: genre, Length: 3229, dtype: int64

In [92]:
df['genre']

0                                Drama, Romance, School, Supernatural
1         Action, Adventure, Drama, Fantasy, Magic, Military, Shounen
2        Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen
3                                                    Sci-Fi, Thriller
4        Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen
                                     ...                             
12289                                                          Hentai
12290                                                          Hentai
12291                                                          Hentai
12292                                                          Hentai
12293                                                          Hentai
Name: genre, Length: 12017, dtype: object

In [93]:
# Filter data agar tidak mengandung genre yang tidak diinginkan
unwanted_genres = ['Hentai', 'Ecchi', 'Harem', 'Yuri', 'Yaoi']
df = df[df['genre'].apply(lambda x: all(genre not in x for genre in unwanted_genres))]


In [94]:
clean_df = df.loc[:, ['name', 'genre', 'type', 'episodes', 'rating']]

In [95]:
clean_df = clean_df.loc[clean_df['episodes'] !='Unknown', :]
clean_df['episodes'] = clean_df['episodes'].astype('float64')


In [96]:
clean_df['genre'] = clean_df['genre'].apply(lambda x: x.split(', '))

In [97]:
# iterate over the list of Genres column and create new column named base on the list values
for index, row in clean_df.iterrows():
    for genre in row['genre']:
        clean_df.at[index, genre] = 1

clean_df.fillna(0, inplace=True)

In [98]:
clean_df

Unnamed: 0,name,genre,type,episodes,rating,Drama,Romance,School,Supernatural,Action,...,Police,Psychological,Demons,Josei,Shounen Ai,Game,Dementia,Cars,Kids,Shoujo Ai
0,Kimi no Na wa.,"[Drama, Romance, School, Supernatural]",Movie,1.0,9.37,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy, Magic, Military, Shounen]",TV,64.0,9.26,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Gintama°,"[Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen]",TV,51.0,9.25,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Steins;Gate,"[Sci-Fi, Thriller]",TV,24.0,9.17,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Gintama&#039;,"[Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen]",TV,51.0,9.16,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10891,Zouressha ga Yatte Kita,[Adventure],Movie,1.0,6.06,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10892,Zukkoke Knight: Don De La Mancha,"[Adventure, Comedy, Historical, Romance]",TV,23.0,6.47,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10893,Zukkoke Sannin-gumi no Hi Asobi Boushi Daisakusen,"[Drama, Kids]",OVA,1.0,5.83,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
10894,Zukkoke Sannin-gumi: Zukkoke Jikuu Bouken,"[Comedy, Historical, Sci-Fi]",OVA,1.0,6.13,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [99]:
clean_df[clean_df['name'] == 'Doraemon (1979)']

Unnamed: 0,name,genre,type,episodes,rating,Drama,Romance,School,Supernatural,Action,...,Police,Psychological,Demons,Josei,Shounen Ai,Game,Dementia,Cars,Kids,Shoujo Ai
929,Doraemon (1979),"[Adventure, Comedy, Fantasy, Kids, Sci-Fi, Shounen]",TV,1787.0,7.76,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [100]:
anime_length = ['Short', 'Medium', 'Long']
clean_df['episodes'] = pd.cut(clean_df['episodes'], bins=[0, 12, 26, 2000], labels=anime_length)

In [101]:
# onehotencoding the episodes column but without using pd.get_dummies
for length in anime_length:
    clean_df['episode' + length] = clean_df['episodes'].apply(lambda x: 1 if x == length else 0)
clean_df.drop('episodes', axis=1, inplace=True)

In [102]:
# onehotencoding the type column but without using pd.get_dummies
for t in clean_df['type'].unique():
    clean_df[t] = clean_df['type'].apply(lambda x: 1 if x == t else 0)

In [103]:
clean_df.drop('type', axis=1, inplace=True)

In [104]:
clean_df.drop(['genre', 'rating'] ,axis=1, inplace=True)


In [105]:
clean_df = clean_df.set_index('name')

In [106]:
clean_df

Unnamed: 0_level_0,Drama,Romance,School,Supernatural,Action,Adventure,Fantasy,Magic,Military,Shounen,...,Kids,Shoujo Ai,episodeShort,episodeMedium,episodeLong,Movie,TV,OVA,Special,ONA
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Kimi no Na wa.,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1,0,0,1,0,0,0,0
Fullmetal Alchemist: Brotherhood,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0,0,1,0,1,0,0,0
Gintama°,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0,0,1,0,1,0,0,0
Steins;Gate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,1,0,0,1,0,0,0
Gintama&#039;,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0,0,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zouressha ga Yatte Kita,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1,0,0,1,0,0,0,0
Zukkoke Knight: Don De La Mancha,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,1,0,0,1,0,0,0
Zukkoke Sannin-gumi no Hi Asobi Boushi Daisakusen,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1,0,0,0,0,1,0,0
Zukkoke Sannin-gumi: Zukkoke Jikuu Bouken,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1,0,0,0,0,1,0,0


calculate the cosine

In [54]:
from sklearn.metrics.pairwise import cosine_similarity

# Menghitung cosine similarity
cosine_sim = cosine_similarity(clean_df, clean_df)

In [55]:
def get_recommendations(title, cosine_sim=cosine_sim):
    # Mendapatkan indeks dari anime yang dicari
    idx = clean_df[clean_df['name'] == title].index[0]

    # Mendapatkan skor similarity dari semua anime dengan anime yang dicari
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Mengurutkan anime berdasarkan similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Mendapatkan 10 anime teratas yang paling mirip
    sim_scores = sim_scores[1:11]

    # Mendapatkan nama anime dan skor similarity dari indeks
    anime_indices = [i[0] for i in sim_scores]
    similarity_scores = [i[1] for i in sim_scores]

    # Menggabungkan nama anime dan skor similarity dalam satu DataFrame
    recommendations = pd.DataFrame({
        'Anime': clean_df['name'].iloc[anime_indices].values,
        'Similarity Score': similarity_scores
    })

    return recommendations

In [46]:
def get_recommendations(title, cosine_sim=cosine_sim):
    # Mengecek apakah judul ada dalam indeks
    if title not in clean_df.index:
        return f"Anime dengan judul '{title}' tidak ditemukan."

    # Mendapatkan indeks dari anime yang dicari
    idx = clean_df.index.get_loc(title)

    # Mendapatkan skor similarity dari semua anime dengan anime yang dicari
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Mengurutkan anime berdasarkan similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Mendapatkan 10 anime teratas yang paling mirip
    sim_scores = sim_scores[1:11]

    # Mendapatkan nama anime dan skor similarity dari indeks
    anime_indices = [i[0] for i in sim_scores]
    similarity_scores = [i[1] for i in sim_scores]

    # Menggabungkan nama anime dan skor similarity dalam satu DataFrame
    recommendations = pd.DataFrame({
        'Anime': clean_df.iloc[anime_indices].index,
        'Similarity Score': similarity_scores
    })

    return recommendations

NameError: name 'cosine_sim' is not defined

In [65]:
# Contoh penggunaan
print(get_recommendations('Doraemon'))

                                                          Anime  \
0                                           Saru Getchu: On Air   
1                                           Pokemon Housoukyoku   
2                                       Fushigi na Koala Blinky   
3                            Samurai Girl Real Bout High School   
4                                     Grimm Masterpiece Theater   
5                                  Grimm Masterpiece Theater II   
6  Digimon Xros Wars: Aku no Death General to Nanatsu no Oukoku   
7                                               Ou Dorobou Jing   
8                                            Mahou Senshi Louie   
9                      Pokemon Best Wishes! Season 2: Episode N   

   Similarity Score  
0          0.935414  
1          0.925820  
2          0.925820  
3          0.925820  
4          0.857143  
5          0.857143  
6          0.857143  
7          0.857143  
8          0.857143  
9          0.857143  


In [70]:
# getting iloc of the anime titled 'Doraemon'
clean_df.index.get_loc('Doraemon')

1493

In [71]:
clean_df.index.get_loc('Saru Getchu: On Air')

4593

In [75]:
df[df['name'] == 'Saru Getchu: On Air']

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
5118,1872,Saru Getchu: On Air,"Adventure, Comedy, Fantasy, Kids, Romance, Shounen",TV,26,6.55,632


In [76]:
df[df['name'] == 'Doraemon']

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
1587,501,Doraemon,"Adventure, Comedy, Fantasy, Kids, Shounen",TV,26,7.49,9366


testing dummy data

In [108]:
# Data dummy baru
new_anime_data = {
    'name': ['Detective Conan'],
    'genre': ["Adventure, Comedy, Mystery"],
    'type': ['TV'],
    'episodes': [1190],
}

new_anime_df = pd.DataFrame(new_anime_data)

# Preprocessing data baru
new_anime_df['genre'] = new_anime_df['genre'].apply(lambda x: x.split(', '))
for index, row in new_anime_df.iterrows():
    for genre in row['genre']:
        new_anime_df.at[index, genre] = 1

new_anime_df.fillna(0, inplace=True)

anime_length = ['Short', 'Medium', 'Long']
new_anime_df['episodes'] = pd.cut(new_anime_df['episodes'], bins=[0, 12, 26, 2000], labels=anime_length)

for length in anime_length:
    new_anime_df['episode' + length] = new_anime_df['episodes'].apply(lambda x: 1 if x == length else 0)

new_anime_df.drop('episodes', axis=1, inplace=True)

for t in new_anime_df['type'].unique():
    new_anime_df[t] = new_anime_df['type'].apply(lambda x: 1 if x == t else 0)

new_anime_df.drop('type', axis=1, inplace=True)
new_anime_df.drop(['genre'], axis=1, inplace=True)

new_anime_df = new_anime_df.set_index('name')

In [115]:
new_anime_df

Unnamed: 0_level_0,Adventure,Comedy,Mystery,episodeShort,episodeMedium,episodeLong,TV
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Detective Conan,1.0,1.0,1.0,0,0,1,1


In [109]:
# Tambahkan data baru ke DataFrame utama
clean_df = pd.concat([clean_df, new_anime_df], axis=0)

In [112]:
clean_df

Unnamed: 0_level_0,Drama,Romance,School,Supernatural,Action,Adventure,Fantasy,Magic,Military,Shounen,...,Kids,Shoujo Ai,episodeShort,episodeMedium,episodeLong,Movie,TV,OVA,Special,ONA
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Kimi no Na wa.,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1,0,0,1.0,0,0.0,0.0,0.0
Fullmetal Alchemist: Brotherhood,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0,0,1,0.0,1,0.0,0.0,0.0
Gintama°,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0,0,1,0.0,1,0.0,0.0,0.0
Steins;Gate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,1,0,0.0,1,0.0,0.0,0.0
Gintama&#039;,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0,0,1,0.0,1,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zukkoke Knight: Don De La Mancha,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,1,0,0.0,1,0.0,0.0,0.0
Zukkoke Sannin-gumi no Hi Asobi Boushi Daisakusen,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1,0,0,0.0,0,1.0,0.0,0.0
Zukkoke Sannin-gumi: Zukkoke Jikuu Bouken,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1,0,0,0.0,0,1.0,0.0,0.0
Zumomo to Nupepe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,0,1,0.0,1,0.0,0.0,0.0


In [116]:
clean_df.fillna(0, inplace=True)

In [118]:
clean_df.index.get_loc('Detective Conan')

9934

In [121]:
clean_df.iloc[9934, :]

Drama            0.0
Romance          0.0
School           0.0
Supernatural     0.0
Action           0.0
Adventure        1.0
Fantasy          0.0
Magic            0.0
Military         0.0
Shounen          0.0
Comedy           1.0
Historical       0.0
Parody           0.0
Samurai          0.0
Sci-Fi           0.0
Thriller         0.0
Sports           0.0
Super Power      0.0
Space            0.0
Slice of Life    0.0
Mecha            0.0
Music            0.0
Mystery          1.0
Seinen           0.0
Martial Arts     0.0
Vampire          0.0
Shoujo           0.0
Horror           0.0
Police           0.0
Psychological    0.0
Demons           0.0
Josei            0.0
Shounen Ai       0.0
Game             0.0
Dementia         0.0
Cars             0.0
Kids             0.0
Shoujo Ai        0.0
episodeShort     0.0
episodeMedium    0.0
episodeLong      1.0
Movie            0.0
TV               1.0
OVA              0.0
Special          0.0
ONA              0.0
Name: Detective Conan, dtype: floa

In [122]:
cosine_sim = cosine_similarity(clean_df, clean_df)

In [124]:
# Hitung ulang cosine similarity
cosine_sim = cosine_similarity(clean_df, clean_df)

def get_recommendations(title, cosine_sim=cosine_sim):
    # Mengecek apakah judul ada dalam indeks
    if title not in clean_df.index:
        return f"Anime dengan judul '{title}' tidak ditemukan."

    # Mendapatkan indeks dari anime yang dicari
    idx = clean_df.index.get_loc(title)

    # Mendapatkan skor similarity dari semua anime dengan anime yang dicari
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Mengurutkan anime berdasarkan similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Mendapatkan 10 anime teratas yang paling mirip
    sim_scores = sim_scores[1:11]

    # Mendapatkan nama anime dan skor similarity dari indeks
    anime_indices = [i[0] for i in sim_scores]
    similarity_scores = [i[1] for i in sim_scores]

    # Menggabungkan nama anime dan skor similarity dalam satu DataFrame
    recommendations = pd.DataFrame({
        'Anime': clean_df.iloc[anime_indices].index,
        'Similarity Score': similarity_scores
    })

    return recommendations

# Contoh penggunaan
print(get_recommendations('Detective Conan'))

                            Anime  Similarity Score
0                 Kaiketsu Zorori          0.894427
1                   Montana Jones          0.894427
2       Heisei Inu Monogatari Bow          0.894427
3              Muka Muka Paradise          0.894427
4      Salad Juu Yuushi Tomatoman          0.894427
5  Shin Mitsubachi Maya no Bouken          0.894427
6                          Perman          0.894427
7                  Reporter Blues          0.894427
8       Barbapapa Sekai wo Mawaru          0.894427
9                   Bug tte Honey          0.894427


# taking from database

In [104]:
df = pd.read_csv('./data/anime.csv')
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [10]:
import os
from pymongo import MongoClient
from pymongo.server_api import ServerApi
from dotenv import load_dotenv

load_dotenv()
mongo_uri = os.environ['MONGO_URI']

client = MongoClient(mongo_uri, server_api=ServerApi('1'))

db = client['SPK']

# retrieve all data from anime_meta collection
anime_meta = db.anime_meta.find()

print(anime_meta)



<pymongo.cursor.Cursor object at 0x000001E844B0D1D0>


In [11]:
df_anime_meta = pd.DataFrame(list(anime_meta))
df_anime_meta.head()

Unnamed: 0,_id,title,type,episodes,score,aired_from,aired_to,synopsis,genre,poster,streaming_link
0,6659f90740359a27911f4f9b,Angel Beats!,TV,13.0,8.05,2010-04-03T00:00:00+00:00,2010-06-26T00:00:00+00:00,Death is one of many mysteries that has left h...,"[Drama, Supernatural]",https://cdn.myanimelist.net/images/anime/1244/...,https://kuronime.vip/?s=Angel+Beats!
1,6659f90b40359a27911f4f9c,Blue Lock,TV,24.0,8.26,2022-10-09T00:00:00+00:00,2023-03-26T00:00:00+00:00,Yoichi Isagi was mere moments away from scorin...,[Sports],https://cdn.myanimelist.net/images/anime/1258/...,https://kuronime.vip/?s=Blue+Lock
2,6659f90c40359a27911f4f9d,Boku no Hero Academia,TV,13.0,7.86,2016-04-03T00:00:00+00:00,2016-06-26T00:00:00+00:00,"The appearance of ""quirks,"" newly discovered s...",[Action],https://cdn.myanimelist.net/images/anime/10/78...,https://kuronime.vip/?s=Boku+no+Hero+Academia
3,6659f90f40359a27911f4f9e,Bungou Stray Dogs,TV,12.0,7.81,2016-04-07T00:00:00+00:00,2016-06-23T00:00:00+00:00,"For weeks, Atsushi Nakajima's orphanage has be...","[Action, Mystery, Supernatural]",https://cdn.myanimelist.net/images/anime/3/794...,https://kuronime.vip/?s=Bungou+Stray+Dogs
4,6659f91040359a27911f4f9f,Code Geass: Hangyaku no Lelouch,TV,25.0,8.7,2006-10-06T00:00:00+00:00,2007-07-29T00:00:00+00:00,"In the year 2010, the Holy Empire of Britannia...","[Action, Award Winning, Drama, Sci-Fi]",https://cdn.myanimelist.net/images/anime/1032/...,https://kuronime.vip/?s=Code+Geass:+Hangyaku+n...


In [14]:
df_anime_meta.dropna(inplace=True)

In [15]:
df_anime_meta.isna().sum()

_id               0
title             0
type              0
episodes          0
score             0
aired_from        0
aired_to          0
synopsis          0
genre             0
poster            0
streaming_link    0
dtype: int64

In [16]:
# df_anime_meta = df_anime_meta.loc[:, ['title', 'genre', 'type', 'episodes']]
# df_anime_meta.rename(columns={'title': 'name'}, inplace=True)
# df_anime_meta.to_csv('db.csv', index=False)

load exported csv

In [83]:
import pandas as pd
import ast
df_meta = pd.read_csv('db.csv')
df_meta['genre'] = df_meta['genre'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
df_meta['genre'] = df_meta['genre'].apply(lambda x: ', '.join(x))
df_meta['episodes'] = df_meta['episodes'].astype(int)
df_meta.head()

Unnamed: 0,name,genre,type,episodes
0,Angel Beats!,"Drama, Supernatural",TV,13
1,Blue Lock,Sports,TV,24
2,Boku no Hero Academia,Action,TV,13
3,Bungou Stray Dogs,"Action, Mystery, Supernatural",TV,12
4,Code Geass: Hangyaku no Lelouch,"Action, Award Winning, Drama, Sci-Fi",TV,25


In [84]:
df_meta['genre'] = df_meta['genre'].apply(lambda x: x.split(', '))
# iterate over the list of Genres column and create new column named base on the list values
for index, row in df_meta.iterrows():
    for genre in row['genre']:
        df_meta.at[index, genre] = 1

df_meta.fillna(0, inplace=True)
anime_length = ['Short', 'Medium', 'Long']
df_meta['episodes'] = pd.cut(df_meta['episodes'], bins=[0, 12, 26, 2000], labels=anime_length)
# onehotencoding the episodes column but without using pd.get_dummies
for length in anime_length:
    df_meta['episode' + length] = df_meta['episodes'].apply(lambda x: 1 if x == length else 0)
df_meta.drop('episodes', axis=1, inplace=True)
# onehotencoding the type column but without using pd.get_dummies
for t in df_meta['type'].unique():
    df_meta[t] = df_meta['type'].apply(lambda x: 1 if x == t else 0)
df_meta.drop(['genre', 'type'], axis=1, inplace=True)


In [86]:
df_meta.to_csv('db_processed.csv', index=False)

In [71]:
df_meta

Unnamed: 0,name,Drama,Supernatural,Sports,Action,Mystery,Award Winning,Sci-Fi,Adventure,Comedy,...,Gourmet,Slice of Life,Suspense,Romance,Ecchi,Horror,episodeShort,episodeMedium,episodeLong,TV
0,Angel Beats!,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1
1,Blue Lock,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1
2,Boku no Hero Academia,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1
3,Bungou Stray Dogs,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,1
4,Code Geass: Hangyaku no Lelouch,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,Bungou Stray Dogs 3rd Season,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,1
57,Fugou Keiji: Balance:Unlimited,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,1
58,Bungou Stray Dogs 4th Season,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1
59,Kamisama no Memochou,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,1


In [89]:
new_data = pd.DataFrame({
    'name': ['Banana Fish'],
    'genre': ["Action, Adventure, Drama, Suspense"],
    'type': ['TV'],
    'episodes': [24],
})


new_data['genre'] = new_data['genre'].apply(lambda x: x.split(', '))
for index, row in new_data.iterrows():
    for genre in row['genre']:
        new_data.at[index, genre] = 1
        
new_data.fillna(0, inplace=True)

anime_length = ['Short', 'Medium', 'Long']
new_data['episodes'] = pd.cut(new_data['episodes'], bins=[0, 12, 26, 2000], labels=anime_length)

for length in anime_length:
    new_data['episode' + length] = new_data['episodes'].apply(lambda x: 1 if x == length else 0)

new_data.drop('episodes', axis=1, inplace=True)

for t in new_data['type'].unique():
    new_data[t] = new_data['type'].apply(lambda x: 1 if x == t else 0)
    
new_data.drop('type', axis=1, inplace=True)
new_data.drop(['genre'], axis=1, inplace=True)

# new_data = new_data.set_index('name')
# new_data

In [90]:
df_meta = pd.concat([df_meta, new_data], axis=0)
df_meta

Unnamed: 0,name,Drama,Supernatural,Sports,Action,Mystery,Award Winning,Sci-Fi,Adventure,Comedy,...,Gourmet,Slice of Life,Suspense,Romance,Ecchi,Horror,episodeShort,episodeMedium,episodeLong,TV
0,Angel Beats!,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1
1,Blue Lock,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1
2,Boku no Hero Academia,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1
3,Bungou Stray Dogs,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,1
4,Code Geass: Hangyaku no Lelouch,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Fugou Keiji: Balance:Unlimited,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,1
58,Bungou Stray Dogs 4th Season,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1
59,Kamisama no Memochou,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,1
60,Bungou Stray Dogs 5th Season,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,1


In [91]:
nan_index = df_meta[df_meta.isna()].index
nan_index

Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
       36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
       54, 55, 56, 57, 58, 59, 60,  0],
      dtype='int64')

In [92]:
df_meta.fillna(0, inplace=True)
df_meta.set_index('name', inplace=True)

In [93]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim = cosine_similarity(df_meta, df_meta)


In [94]:
def get_recommendations(title, cosine_sim=cosine_sim):
    # Mendapatkan indeks dari anime yang dicari
    idx = df_meta.index.get_loc(title)

    # Mendapatkan skor similarity dari semua anime dengan anime yang dicari
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Mengubah skor similarity menjadi scalar jika diperlukan
    sim_scores = [(i, score.item() if hasattr(score, 'item') else score) for i, score in sim_scores]

    # Mengurutkan anime berdasarkan similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Mendapatkan 10 anime teratas yang paling mirip
    sim_scores = sim_scores[1:11]

    # Mendapatkan nama anime dan skor similarity dari indeks
    anime_indices = [i[0] for i in sim_scores]
    similarity_scores = [i[1] for i in sim_scores]

    # Menggabungkan nama anime dan skor similarity dalam satu DataFrame
    recommendations = pd.DataFrame({
        'Anime': df_meta.index[anime_indices],
        'Similarity Score': similarity_scores
    })

    return recommendations

In [98]:
def get_recommendations(title, cosine_sim=cosine_sim):
    # Mengecek apakah judul ada dalam indeks
    if title not in df_meta.index:
        return f"Anime dengan judul '{title}' tidak ditemukan."

    # Mendapatkan indeks dari anime yang dicari
    idx = df_meta.index.get_loc(title)

    # Mendapatkan skor similarity dari semua anime dengan anime yang dicari
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Mengurutkan anime berdasarkan similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Mendapatkan 10 anime teratas yang paling mirip
    sim_scores = sim_scores[1:]

    # Mendapatkan nama anime dan skor similarity dari indeks
    anime_indices = [i[0] for i in sim_scores]
    similarity_scores = [i[1] for i in sim_scores]

    # Menggabungkan nama anime dan skor similarity dalam satu DataFrame
    recommendations = pd.DataFrame({
        'Anime': df_meta.iloc[anime_indices].index,
        'Similarity Score': similarity_scores
    })

    return recommendations

In [99]:
print(get_recommendations('Banana Fish'))

                                 Anime  Similarity Score
0                          Steins;Gate          0.730297
1                          Log Horizon          0.730297
2                Boku no Hero Academia          0.707107
3      Code Geass: Hangyaku no Lelouch          0.666667
4   Code Geass: Hangyaku no Lelouch R2          0.666667
..                                 ...               ...
56                            Nekopara          0.204124
57      Chuunibyou demo Koi ga Shitai!          0.204124
58                Kanojo, Okarishimasu          0.204124
59             Masamune-kun no Revenge          0.204124
60                     No Game No Life          0.182574

[61 rows x 2 columns]


In [104]:
df_meta.index.get_loc('Banana Fish')

61

In [103]:
df_meta.iloc[18, :]

Drama            0.0
Supernatural     0.0
Sports           0.0
Action           0.0
Mystery          0.0
Award Winning    0.0
Sci-Fi           0.0
Adventure        0.0
Comedy           1.0
Fantasy          1.0
Gourmet          0.0
Slice of Life    0.0
Suspense         0.0
Romance          0.0
Ecchi            1.0
Horror           0.0
episodeShort     1.0
episodeMedium    0.0
episodeLong      0.0
TV               1.0
Name: No Game No Life, dtype: float64

In [105]:
df_meta.iloc[61, :]

Drama            1.0
Supernatural     0.0
Sports           0.0
Action           1.0
Mystery          0.0
Award Winning    0.0
Sci-Fi           0.0
Adventure        1.0
Comedy           0.0
Fantasy          0.0
Gourmet          0.0
Slice of Life    0.0
Suspense         1.0
Romance          0.0
Ecchi            0.0
Horror           0.0
episodeShort     0.0
episodeMedium    1.0
episodeLong      0.0
TV               1.0
Name: Banana Fish, dtype: float64

### checking dataset

In [106]:
df = pd.read_csv('data/anime.csv')

In [107]:
df[df['name'] == 'A Kite']

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
11380,320,A Kite,"Action, Drama, Hentai, Police",OVA,2,6.72,27941
