In [None]:
# This project is about recommendation system, the basic objective is to recommends products to user based on input
# There are two kinds of recommendation system 1. Content base Filtering and 2. Collaberative Filtering
# Content filter based on the product contents for ex. different books of history
# Collaberative filter based on the preference from other simillar users for ex. books read by other users who also read history book
# In the recommendation systems, we convert the datasets into embedding and store those embeddings in vector database
# The prompt received from User input will be compare against those embeddings to find the cosine similarity which drives the recommendations 


# In this project we are using;
# 1. Anime Recommendations Database from Kaggel : https://www.kaggle.com/datasets/CooperUnion/anime-recommendations-database/data
#    This is animated content datasets of verious generation (Drama, Romance, Supernatural etc) of kinds Movie, TV serial, web-series etc
# 2. Hugging face transfomer librarys for creating embeddings: This allows apis to access pre-train models that support task such as text_processing, generation etc

In [1]:
# Lets import the key libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as plt
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

In [2]:
# Lets import the datasets
df = pd.read_csv('anime.csv')

In [3]:
# lets explore the dataset 
df

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [5]:
# Lets check for the null values if-any
df.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [None]:
# sns.heatmap(df.isnull()) kernal is dying due to huge data :(

In [6]:
# lets drop the na values
df.dropna(inplace = True)

In [11]:
# lets add new column "description" which will combines the columns name, genre, episodes, this column act as a foundation data-source for recommendation system
df['description'] =  df['name'] + ' ' + df['genre'] + ' ' + df['type'] + 'episodes:' + df['episodes']

In [13]:
# lets print the dataframe
df

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,description
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,"Kimi no Na wa. Drama, Romance, School, Superna..."
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,"Fullmetal Alchemist: Brotherhood Action, Adven..."
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262,"Gintama° Action, Comedy, Historical, Parody, S..."
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572,"Steins;Gate Sci-Fi, Thriller TVepisodes:24"
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266,"Gintama&#039; Action, Comedy, Historical, Paro..."
...,...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211,Toushindai My Lover: Minami tai Mecha-Minami H...
12290,5543,Under World,Hentai,OVA,1,4.28,183,Under World Hentai OVAepisodes:1
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219,Violence Gekiga David no Hoshi Hentai OVAepiso...
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175,Violence Gekiga Shin David no Hoshi: Inma Dens...


In [19]:
# Now we will use the Hugging face transformer model to convert the text (description column) into embeddings (numerical representation)
# We will be using sentense-embeddings
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
anime_embeddings = model.encode(df['description'].tolist())

In [25]:
# Now lets create a function to handle user inputs and perform cosine similarity as recommendation system
from sklearn.metrics.pairwise import cosine_similarity

def user_promt(prompt, embeddings, df, top_n=5):
    query_embedding = model.encode([prompt])
    similarities = cosine_similarity(query_embedding, embeddings)
    top_indices = similarities[0].argsort()[-top_n:][::-1]
    return df.iloc[top_indices]

In [28]:
# Now that everything is ready, we can try the recommendation system. 
# Here is an example of acquiring the top five anime recommendations from the user input query.

prompt = " Funney anime that I can watch with friends"
recommendations = user_promt(prompt, anime_embeddings, df)
print(recommendations[['name', 'genre']])

                                                   name  \
4294               SKET Dance: SD Character Flash Anime   
2660                                       Sakura Trick   
4269                      Anime de Wakaru Shinryounaika   
8567                    Funassyi no Funafunafuna Biyori   
1105  Dragon Ball Kai: Mirai ni Heiwa wo! Goku no Ta...   

                                                  genre  
4294                            Comedy, School, Shounen  
2660  Comedy, Romance, School, Seinen, Shoujo Ai, Sl...  
4269                                     Comedy, Seinen  
8567                        Comedy, Kids, Slice of Life  
1105  Action, Comedy, Fantasy, Sci-Fi, Shounen, Supe...  


In [29]:
# Now lets try something different 

prompt = "Horror animer that I cant watch alone"
recommendations = user_promt(prompt, anime_embeddings, df)
print(recommendations[['name', 'genre']])

                                    name                            genre
1249   Ayakashi: Japanese Classic Horror      Fantasy, Historical, Horror
3622                        Kaibutsu-kun                   Comedy, Horror
7805  Nouryou Anime: Denkyuu Ika Matsuri  Dementia, Horror, Psychological
9081          Kaibutsu-kun: Demon no Ken    Comedy, Horror, Kids, Shounen
4152                 Kaibutsu-kun (1980)             Comedy, Horror, Kids
