In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation, BatchNormalization, Input, Embedding, Dot, Dense, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

from wordcloud import wordcloud
%matplotlib inline


In [4]:
pip install matplotlib wordcloud

Collecting matplotlib
  Downloading matplotlib-3.10.1-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting wordcloud
  Downloading wordcloud-1.9.4-cp312-cp312-win_amd64.whl.metadata (3.5 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.2-cp312-cp312-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.57.0-cp312-cp312-win_amd64.whl.metadata (104 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.8-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting pillow>=8 (from matplotlib)
  Downloading pillow-11.2.1-cp312-cp312-win_amd64.whl.metadata (9.1 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Using cached pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)
Downloading matplotlib-3.10.1-cp312-cp312-win_amd64.whl (8.1 MB)
   ---------------------------------------- 0.0/8.

### Reading Animelist.csv File

In [6]:
### scpecifying input directory####
import os

In [7]:
### ".." places you in root directory
INPUT_DIR = os.path.join("..","artifacts","raw")

In [16]:
rating_df = pd.read_csv(INPUT_DIR + "/animelist.csv", low_memory=True, usecols=["user_id", "anime_id", "rating"])

In [17]:
rating_df.head(10)

Unnamed: 0,user_id,anime_id,rating
0,0,67,9
1,0,6702,7
2,0,242,10
3,0,4898,0
4,0,21,10
5,0,24,9
6,0,2104,0
7,0,4722,8
8,0,6098,6
9,0,3125,9


In [18]:
len(rating_df)

5000000

### Data Preprocessing

In [None]:
####Finding users who have rated the most animes, and filtering out users with less than 400 ratings ####
n_ratings = rating_df["user_id"].value_counts()

rating_df = rating_df[rating_df["user_id"].isin(n_ratings[n_ratings >= 400].index)].copy()

In [20]:
len(rating_df)

3246641

In [40]:
min_rating= min(rating_df["rating"]) 
max_rating= max(rating_df["rating"])

avg_rating = np.mean(rating_df["rating"])

In [42]:
avg_rating

np.float64(0.4122732695114729)

In [39]:
##### min/max scaling of rating variable ####
rating_df["rating"] = rating_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values.astype(np.float64)

In [43]:
### Checking for duplicates ####
rating_df.duplicated().sum()

np.int64(0)

In [44]:
rating_df.isnull().sum()

user_id     0
anime_id    0
rating      0
dtype: int64

In [45]:
#### extracting all unique user IDs into a list ####
user_ids = rating_df["user_id"].unique().tolist()

In [None]:
#### Encoding user IDs INTO NUMBERS and stroing into 2 dictionaries, also makes another col "user with encoded user_id in df####
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
user2user_decoded = {i: x for i, x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)


In [61]:
rating_df.head(10)

Unnamed: 0,user_id,anime_id,rating,user
213,2,24833,0.0,0
214,2,235,1.0,0
215,2,36721,0.0,0
216,2,40956,0.0,0
217,2,31933,0.0,0
218,2,5042,0.8,0
219,2,7593,0.8,0
220,2,21,0.9,0
221,2,35446,0.0,0
222,2,24,0.0,0


In [62]:
n_users=len(user2user_decoded)

In [63]:
n_users

4203

In [64]:
#### extracting all unique anime IDs into a list ####
anime_ids = rating_df["anime_id"].unique().tolist()

In [None]:
#### Encoding anime IDs INTO NUMBERS and storing into 2 dictionaries, also makes another col "user with encoded anime_id in df####
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
anime2anime_decoded = {i: x for i, x in enumerate(anime_ids)}
rating_df["anime"] = rating_df["anime_id"].map(anime2anime_encoded)

In [73]:
rating_df.head(10)

Unnamed: 0,user_id,anime_id,rating,user,anime
0,13338,30,0.7,3405,688
1,15967,15583,0.6,4060,1518
2,4967,39586,0.0,1213,4963
3,4055,8039,0.0,997,4097
4,6170,30205,0.0,1552,1690
5,1694,8917,1.0,440,4074
6,16009,33489,0.0,4074,1191
7,4213,461,0.7,1033,4861
8,7215,16009,0.0,1815,1362
9,5964,34103,0.0,1495,1752


In [71]:
n_anime=len(anime2anime_decoded)

In [72]:
#### shuffling data to make sure model doesn't just learn order of dataset, by introducing randomness ####
rating_df = rating_df.sample(frac=1, random_state=42).reset_index(drop=True)

In [74]:
### Split data into input and output features ####

X = rating_df[["user", "anime"]].values  ### these are input cols
y = rating_df["rating"].values  ### this is output col

In [75]:
#### Setting how many rows of data used for test and train #####
test_size = 1000
train_indices = rating_df.shape[0] - test_size

In [76]:
X_train , X_test , y_train, y_test = (
   X[:train_indices], 
   X[train_indices:], 
   y[:train_indices], 
   y[train_indices:]
)

In [80]:
len(X_test)

1000

In [81]:
type(X_train)

numpy.ndarray

In [82]:
X_train_array = [X_train[:, 0], X_train[:, 1]]  ### this is a list of 2 arrays, first array is user_id, second is anime_id
X_test_array = [X_test[:, 0], X_test[:, 1]]     ### same for X_test, also the two arrays are stored in a list

In [83]:
X_train_array

[array([3405, 4060, 1213, ...,  560, 3815, 2122]),
 array([ 688, 1518, 4963, ..., 1461, 1512, 1153])]

### Model Architecture

In [89]:
def RecommenderNet():
    embedding_size = 128  ### size of embedding layer, this is the size of the vector that will represent each user and anime

    user_input = Input(name="user" , shape=[1])  ### input layer for user

    user_embedding = Embedding(name="user_embedding", input_dim=n_users, output_dim=embedding_size)(user_input)  ### embedding layer for user

    anime_input = Input(name="anime" , shape=[1])  ### input layer for anime

    anime_embedding = Embedding(name="anime_embedding", input_dim=n_anime, output_dim=embedding_size)(anime_input)  ### embedding layer for anime

    x = Dot(name="dot_product", normalize=True, axes=2)([user_embedding, anime_embedding])  ### dot product of user and anime embeddings, calculates similarity between user and anime

    x = Flatten()(x)  ### flatten the output of the dot product

    x = Dense(1, kernel_initializer='he_normal')(x)  ### dense layer
    x = BatchNormalization()(x)  ### batch normalization
    x = Activation("sigmoid")(x)  ### activation function

    model = Model([user_input, anime_input], outputs=x)  ### create the model

    model.compile(loss="binary_crossentropy", optimizer='Adam', metrics=["mae" , "mse"])  ### compile the model
    return model  ### return the model

In [90]:
model = RecommenderNet()

In [91]:
model.summary()

In [92]:
start_lr = 0.00001  ### starting learning rate
min_lr = 0.00001  ### minimum learning rate
max_lr = 0.00005  ### maximum learning rate
batch_size = 10000  ### batch size

ramup_epochs = 5  ### number of epochs to ramp up learning rate
sustain_epochs = 0  ### number of epochs to sustain learning rate
exp_decay = 0.8  ### exponential decay rate

### This function will be used to calculate the learning rate for each epoch, and find best learning rate for the model ###
def lrfn(epoch):
    if epoch < ramup_epochs:
        return (max_lr - start_lr) / ramup_epochs * epoch + start_lr
    elif epoch < ramup_epochs + sustain_epochs:
        return max_lr
    else:
        return (max_lr - min_lr) * exp_decay ** (epoch - ramup_epochs - sustain_epochs) + min_lr

In [93]:
lr_callback = LearningRateScheduler(lambda epoch: lrfn(epoch) , verbose=0)  ### learning rate callback
checkpoint_filepath = './weights.weights.h5'  ### checkpoint filepath

ModelCheckpoint = ModelCheckpoint(filepath=checkpoint_filepath, save_weights_only=True, save_best_only=True, monitor='val_loss', mode='min', verbose=1)  ### checkpoint callback

early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min', restore_best_weights=True)  ### early stopping callback, to stop model if no improvement in val_loss for 3 epochs

In [94]:
my_callbacks = [lr_callback, ModelCheckpoint, early_stopping]  ### list of callbacks

In [95]:
###Model training####

history = model.fit(
    x=X_train_array, 
    y=y_train, 
    batch_size=batch_size, 
    epochs=20, 
    verbose=1,
    validation_data=(X_test_array, y_test),
    callbacks=my_callbacks
)

Epoch 1/20
[1m324/325[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 25ms/step - loss: 0.8026 - mae: 0.3824 - mse: 0.1980
Epoch 1: val_loss improved from inf to 0.75908, saving model to ./weights.weights.h5
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 26ms/step - loss: 0.8026 - mae: 0.3824 - mse: 0.1980 - val_loss: 0.7591 - val_mae: 0.3842 - val_mse: 0.1860 - learning_rate: 1.0000e-05
Epoch 2/20
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.7997 - mae: 0.3814 - mse: 0.1968
Epoch 2: val_loss did not improve from 0.75908
[1m325/325[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 25ms/step - loss: 0.7997 - mae: 0.3814 - mse: 0.1968 - val_loss: 0.8138 - val_mae: 0.3950 - val_mse: 0.2050 - learning_rate: 1.8000e-05
Epoch 3/20
[1m324/325[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 24ms/step - loss: 0.7937 - mae: 0.3790 - mse: 0.1944
Epoch 3: val_loss did not improve from 0.75908
[1m325/325[0m [32m━━━━━

In [97]:
###Extract weights from the model####

def extract_weights(name, model):
    weight_layer = model.get_layer(name)  ### get weights from the model
    weights = weight_layer.get_weights()[0]  ### get weights from the layer
    weights = weights / np.linalg.norm(weights, axis=1).reshape((-1,1))  ### normalize the weights
    return weights  ### return the weights

In [98]:
anime_weights = extract_weights("anime_embedding", model)  ### extract anime weights
user_weights = extract_weights("user_embedding", model)  ### extract user weights

### Reading anime.csv

In [99]:
df = pd.read_csv(INPUT_DIR + "/anime.csv", low_memory=True)  ### read anime data

In [101]:
df.head(2)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0


In [103]:
df = df.replace("Unknown", np.nan)  ### replace unknown with null values

In [104]:
def get_anime_name(anime_id):  ### function to get anime name from anime id
    try:
        name = df[df.anime_id == anime_id].eng_version.values[0]  ### get anime name from anime id
        if name is np.nan:
            name = df[df.anime_id == anime_id].Name.values[0]  ### if name is null, get name from original name
    
    except:
        print("Anime not found")  ### if anime not found, print error message
    return name

In [105]:
df["anime_id"] = df["MAL_ID"]  ### rename anime_id to MAL_ID
df["eng_version"] = df["English name"]  ### rename eng_version to English name
df["eng_version"] = df.anime_id.apply(lambda x: get_anime_name(x))  ### get anime name from anime id

In [106]:
df.head(2)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1,anime_id,eng_version
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0,1,Cowboy Bebop
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",,...,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0,5,Cowboy Bebop:The Movie


In [108]:
get_anime_name(62)

'D.C.~Da Capo~'

In [110]:
df.sort_values(by=["Score"],
               inplace=True,
               kind='quicksort',
               na_position='last',
               ascending=False)  ### sort anime by score

In [113]:
df.head(2)

Unnamed: 0,anime_id,eng_version,Score,Genres,Episodes,Type,Premiered,Members
3971,5114,Fullmetal Alchemist:Brotherhood,9.19,"Action, Military, Adventure, Comedy, Drama, Ma...",64,TV,Spring 2009,2248456
15926,40028,Attack on Titan Final Season,9.17,"Action, Military, Mystery, Super Power, Drama,...",16,TV,Winter 2021,733260


In [112]:
df = df[["anime_id", "eng_version", "Score", "Genres", "Episodes", "Type", "Premiered", "Members"]]  ### select only anime_id, eng_version and score columns

In [None]:
##Function lets you search by anime id or name

def get_anime_frame(anime,df):
    if isinstance(anime, int):
        return df[df.anime_id == anime]
    if isinstance(anime, str):
        return df[df.eng_version == anime]

In [119]:

get_anime_frame(25,df)

Unnamed: 0,anime_id,eng_version,Score,Genres,Episodes,Type,Premiered,Members
15,25,Desert Punk,7.42,"Action, Adventure, Comedy, Ecchi, Sci-Fi, Shounen",24,TV,Fall 2004,111734


### Anime_with_synopsis.csv

In [122]:
cols = ["MAL_ID", "Name", "Genres", "sypnopsis"]

In [123]:
synopsis_df = pd.read_csv(INPUT_DIR + "/anime_with_synopsis.csv", low_memory=True, usecols=cols)  ### read anime data

In [124]:
synopsis_df.head(2)

Unnamed: 0,MAL_ID,Name,Genres,sypnopsis
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."


In [125]:
def get_synopsis(anime, synopsis_df):  ### function to get synopsis from anime id
    if isinstance(anime, int):
        return synopsis_df[synopsis_df.MAL_ID == anime].sypnopsis.values[0]  ### get synopsis from anime id
    if isinstance(anime, str):
        return synopsis_df[synopsis_df.Name == anime].sypnopsis.values[0]  ### get synopsis from anime name
    return None  ### if anime not found, return null

In [127]:
get_synopsis('Cowboy Bebop', synopsis_df)  ### get synopsis from anime id

'In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to keep peace in the galaxy, aided in part by outlaw bounty hunters, referred to as "Cowboys." The ragtag team aboard the spaceship Bebop are two such individuals. Mellow and carefree Spike Spiegel is balanced by his boisterous, pragmatic partner Jet Black as the pair makes a living chasing bounties and collecting rewards. Thrown off course by the addition of new members that they meet in their travels—Ein, a genetically engineered, highly intelligent Welsh Corgi; femme fatale Faye Valentine, an enigmatic trickster with memory loss; and the strange computer whiz kid Edward Wong—the crew embarks on thrilling adventures that unravel each member\'s dark and mysterious past little by little. Well-balanced with high density action and light-hearted comedy, Cowboy Bebop is a space Western classic an

### Content based Recommender System

In [128]:
pd.set_option('display.max_colwidth', None)  ### set display option to show full synopsis

In [None]:
###

def find_similair_animes(name, anime_weights, anime2anime_encoded, 
                         anime2anime_decoded, df, synopsis_df, n=10, return_dist=False, neg=False):
    
    try:
       index = get_anime_frame(name, df).anime_id.values[0]  ### get anime id from anime name
       encoded_index = anime2anime_encoded.get(index)  ### get encoded anime id from anime id

       weights = anime_weights

       dists = np.dot(weights, weights[encoded_index])  ### calculate distance between anime weights and encoded anime id
       sorted_dists = np.argsort(dists)

       n=n+1

       if neg:
            closest = sorted_dists[:n]  ### get closest anime ids
       else:
            closest = sorted_dists[-n:]
            

       if return_dist:
          return dists, closest
       
       SimilarityArr = []

       for close in closest:
           decoded_id = anime2anime_decoded.get(close)  ### get decoded anime id from encoded anime id

           synopsis = get_synopsis(decoded_id, synopsis_df)
           anime_frame = get_anime_frame(decoded_id, df)

           anime_name = anime_frame.eng_version.values[0]  ### get anime name from anime id
           genre = anime_frame.Genres.values[0]  ### get anime genre from anime id
           similarity = dists[close]

           SimilarityArr.append({
                "anime_id": decoded_id,
                "name": anime_name,
                "similarity": similarity,
                "synopsis": synopsis,
                "genre": genre
           })
           

       Frame = pd.DataFrame(SimilarityArr).sort_values(by="similarity", ascending=False)  ### create dataframe from similarity array
       return Frame[Frame.anime_id != index].drop(["anime_id"], axis=1)  ### return dataframe without the original anime id
       
    except:
        print("Error:Anime not found")


       

In [152]:
find_similair_animes("Cowboy Bebop", anime_weights, anime2anime_encoded,anime2anime_decoded, df, synopsis_df)  ### find similar animes to Cowboy Bebop

Unnamed: 0,name,similarity,synopsis,genre
9,Kino no Tabi: The Beautiful World - The Animated Series - Haikyo no Kuni - On Your Way,0.417019,No synopsis information has been added to this title. Help improve our database by adding a synopsis here .,Adventure
8,Mobile Suit Gundam: Hathaway's Flash 3,0.333748,No synopsis information has been added to this title. Help improve our database by adding a synopsis here .,"Action, Military, Sci-Fi, Space, Drama, Mecha"
7,Jungle De Ikou!,0.316927,"fter meeting a dancing old man in what she thinks is just a crazy dream, a preteen schoolgirl embarks on a series of fanservice-filled magical girl adventures with a little creature named Ongo who might be evil and a shy girl with mysterious powers named Nami. (Source: ANN)","Action, Ecchi, Adventure, Magic, Comedy"
6,BIGOTRE Capture Mission,0.303764,commercials for Tokyo handbag company BIGOTRE that were posted on their official Instagram account. The commercials simulate mission briefings for a fictitious video game where players capture specific handbags in the SS20 (Spring/Summer 2020) BIGOTRE lookbook lineup.,Game
5,Is It Wrong to Try to Pick Up Girls in a Dungeon? II,0.298232,"It is business as usual in the massive city of Orario, where legions of adventurers gather to explore the monster-infested ""Dungeon."" Among them is the easily flustered yet brave Bell Cranel, the sole member of the Hestia Familia. With the help of his demi-human supporter Liliruca Arde and competent blacksmith Welf Crozzo, Bell has earned the title of Little Rookie by becoming Orario's fastest-growing adventurer thanks to his endeavors within the deeper levels of the Dungeon. Dungeon ni Deai wo Motomeru no wa Machigatteiru Darou ka II continues Bell's adventures as he tries to bring glory to his goddess and protect those he cares about. However, various familias and gods across the city begin to take notice of his achievements and attempt to add him to their ranks.","Action, Adventure, Comedy, Romance, Fantasy"
4,Sengoku Musou,0.297803,"Toyotomi Hideyoshi is on the verge of unifying the realm, and all that remains is the Odawara Castle protected by the ""Lion of Sagami"" Ujiyasu Houjou. Hideyoshi's personally trained generals—Ishida Mitsunari, Katou Kiyomasa, and Fukushima Masanori—as well as Ootani Yoshitsugu, Shima Sakon, and Naoe Kanetsugu are all on the frontline of the battle. Amidst the battle formation are the two young warriors of the Sanada household: older brother Nobuyuki and younger brother Yukimura. As the stalemate begins to break down, Sanada Yukimura rides on a lone horse into the fray. His brother follows him right after. (Source: ANN)","Action, Historical, Samurai"
3,Attractive shop,0.294201,Kyoto Animation commercial for the Kyoto Animation Shop.,Sci-Fi
2,Rainbow Days,0.29084,"Nijiiro Days follows the colorful lives and romantic relationships of four high school boys—Natsuki Hashiba, a dreamer with delusions of love; Tomoya Matsunaga, a narcissistic playboy who has multiple girlfriends; Keiichi Katakura, a kinky sadist who always carries a whip; and Tsuyoshi Naoe, an otaku who has a cosplaying girlfriend. When his girlfriend unceremoniously dumps him on Christmas Eve, Natsuki breaks down in tears in the middle of the street and is offered tissues by a girl in a Santa Claus suit. He instantly falls in love with this girl, Anna Kobayakawa, who fortunately attends the same school as him. Natsuki's pursuit of Anna should have been simple and uneventful; however, much to his dismay, his nosy friends constantly meddle in his relationship, as they strive to succeed in their own endeavors of love.","Comedy, Romance, School, Shoujo, Slice of Life"
1,Gintama.:Slip Arc,0.28806,"Following the grim events of Iga, Kokujou Island, Rakuyou, and multiple fruitless confrontations with the Tenshouin Naraku and Tendoshuu, Gintama.: Porori-hen takes its viewers on a trip down memory lane to when Yorozuya were mostly doing what they did best—odd jobs. The great space hunter Umibouzu has returned to Edo and is livid when he finds out that his daughter Kagura has a boyfriend. He blames Gintoki for being an incompetent guardian, but has the time finally come for him to let go of his daughter? Back with shameless parodies, risqué humor, and lively camaraderie, Gintoki, Kagura, and Shinpachi are faced with unforeseen situations that manage to be both hilarious and emotionally stirring.","Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen"
0,Koutetsujou no Kabaneri: Ran - Hajimaru Michiato,0.287039,"The opening movie/promotional video to the Koutetsujou no Kabaneri: Ran - Hajimaru Michiato mobile game released in December, 2018.",Action


### User Based Recommender System

In [177]:
###These funstions are just to find similar users to a given user###

def find_similair_users(item_input , user_weights , user2user_encoded , user2user_decoded , n=10, return_dist=False, neg=False):
    try:
        index = item_input
        encoded_index = user2user_encoded.get(index)  ### get encoded user id from user id
        
        weights = user_weights

        dists = np.dot(weights, weights[encoded_index])  ### calculate distance between user weights and encoded user id
        sorted_dists = np.argsort(dists)

        n=n+1
        if neg:
            closest = sorted_dists[:n]  ### get closest user ids
        else:
            closest = sorted_dists[-n:]
        
        if return_dist:
            return dists, closest
        
        SimilarityArr = []

        for close in closest:
            similarity = dists[close]  ### get similarity from distance array

            if isinstance(item_input, int):
                decoded_id = user2user_decoded.get(close)
                SimilarityArr.append({
                    "similair_users": decoded_id,
                    "similarity": similarity
                })

        similair_users = pd.DataFrame(SimilarityArr).sort_values(by="similarity", ascending=False)  ### create dataframe from similarity array
        similair_users = similair_users[similair_users.similair_users != index]  ### remove original user id from dataframe
        return similair_users  ### return dataframe without the original user id
    
    except Exception as e:
        print("Error:", e)  ### print error message

In [178]:
find_similair_users(int(11880), user_weights, user2user_encoded, user2user_decoded)  ### find similar users to user id 1

Unnamed: 0,similair_users,similarity
9,11027,0.337425
8,8517,0.280019
7,12611,0.271538
6,7850,0.268681
5,9967,0.260721
4,8037,0.258393
3,9362,0.258349
2,1710,0.255176
1,5163,0.254223
0,10365,0.253166


In [None]:
#### wordcloud functions for visualisation ####

def showWordCloud(all_genres):
    genres_cloud = wordcloud(width=800, height=400, background_color='white', colormap='gnuplot').generate_from_frequencies(all_genres)  ### generate word cloud from genres
    plt.figure(figsize=(10, 8))  ### set figure size
    plt.imshow(genres_cloud, interpolation='bilinear')  ### show word cloud
    plt.axis('off')  ### turn off axis
    plt.show()  ### show plot

In [161]:
from collections import defaultdict

In [167]:
def getFavGenre(frame, plot=False):
    frame.dropna(inplace=False)
    all_genres = defaultdict(int)  ### create empty dictionary for genres

    genres_list=[]

    for genres in frame["Genres"]:
        if isinstance(genres, str):
            for genre in genres.split(","):
                genres_list.append(genre)  ### split genres by comma and strip whitespace
                all_genres[genre.strip()] += 1

    if plot:
        showWordCloud(all_genres)

    return genres_list  ### return dictionary of genres and list of genres

    

In [168]:
z = get_anime_frame(25,df)

In [171]:
getFavGenre(z)

['Action', ' Adventure', ' Comedy', ' Ecchi', ' Sci-Fi', ' Shounen']

In [173]:
### find user preferences for a specific user ####

def get_user_preferences(user_id, df, rating_df, plot=False):

    animes_watched_by_user = rating_df[rating_df.user_id == user_id]  ### get animes watched by user

    user_rating_percentile = np.percentile(animes_watched_by_user.rating , 75)  ### get top 75th percentile rated anime by user

    animes_watched_by_user = animes_watched_by_user[animes_watched_by_user.rating >= user_rating_percentile]  ### filter animes watched by user by rating

    top_animes_by_user = (
        animes_watched_by_user.sort_values(by="rating", ascending=False).anime_id.values
    )

    anime_df_rows = df[df["anime_id"].isin(top_animes_by_user)]  ### get anime dataframe rows for top animes watched by user 
    anime_df_rows = anime_df_rows[["anime_id", "eng_version", "Genres"]]  ### select only anime_id, eng_version and Genres columns

    if plot:
        getFavGenre(anime_df_rows, plot)

    return anime_df_rows  ### return dataframe of top animes watched by user

In [174]:
get_user_preferences(11880, df, rating_df)  ### get user preferences for user id 1

Unnamed: 0,anime_id,eng_version,Genres
3971,5114,Fullmetal Alchemist:Brotherhood,"Action, Military, Adventure, Comedy, Drama, Magic, Fantasy, Shounen"
9913,28977,Gintama Season 4,"Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen"
6474,11061,Hunter x Hunter,"Action, Adventure, Fantasy, Shounen, Super Power"
6006,9969,Gintama Season 2,"Action, Sci-Fi, Comedy, Historical, Parody, Samurai, Shounen"
741,820,Legend of the Galactic Heroes,"Military, Sci-Fi, Space, Drama"
...,...,...,...
2575,2797,Shootfighter Tekken,"Action, Martial Arts, Shounen"
8064,19365,Samurai Flamenco,"Action, Comedy, Parody, Super Power"
510,544,Venus Wars,"Action, Sci-Fi, Adventure, Space"
6864,12929,Saint Seiya Omega,"Action, Adventure, Fantasy, Shounen"


In [None]:
### beginning actual user based recommendation functions ###

def get_user_recommendations(similair_users, user_pref, df, rating_df, synopsis_df, n=10):

    recommended_animes = []  ### create empty list for recommended animes
    anime_list = []  ### create empty list for anime ids

    for user_id in similair_users.similair_users.values:  ### loop through similar users
        pref_list = get_user_preferences(int(user_id), df, rating_df)  ### get user preferences for similar user

        pref_list = pref_list[~pref_list.eng_version.isin(user_pref.eng_version.values)]  ### remove animes that the user has already watched from similar user preferences

        if not pref_list.empty:
            anime_list.append(pref_list.eng_version.values)  ### append anime names to anime list
        
    if anime_list:
            anime_list = pd.DataFrame(anime_list)  ### convert anime list to dataframe

            sorted_list = pd.DataFrame(pd.Series(anime_list.values.ravel()).value_counts()).head(n)  ##ravel flattens results to 1d array

            for i,anime_name in enumerate(sorted_list.index):
                anime_id = df[df.eng_version == anime_name].anime_id.values[0]
                n_user_pref = sorted_list[sorted_list.index == anime_name].values[0][0]

                if isinstance(anime_name, str):
                    frame = get_anime_frame(anime_name, df)  ### get anime frame from anime name
                    anime_id = frame.anime_id.values[0]  ### get anime id from anime frame
                    genre = frame.Genres.values[0]  ### get genre from anime frame
                    synopsis = get_synopsis(int(anime_id), synopsis_df)  ### get synopsis from anime id

                    recommended_animes.append({
                        "n": n_user_pref,
                        "anime_name": anime_name,                   
                        "synopsis": synopsis,
                        "genre": genre
                    })
    
    return pd.DataFrame(recommended_animes).head(n)  ### return dataframe of recommended animes






In [179]:
sim_users = find_similair_users(int(11880), user_weights, user2user_encoded, user2user_decoded)

In [180]:
user_prefs = get_user_preferences(11880, df, rating_df)

In [None]:
get_user_recommendations(sim_users,user_prefs, df, rating_df, synopsis_df)  ### get user recommendations for user id 

Unnamed: 0,n,anime_name,synopsis,genre
0,9,Toradora!,"uuji Takasu is a gentle high school student with a love for housework; but in contrast to his kind nature, he has an intimidating face that often gets him labeled as a delinquent. On the other hand is Taiga Aisaka, a small, doll-like student, who is anything but a cute and fragile girl. Equipped with a wooden katana and feisty personality, Taiga is known throughout the school as the ""Palmtop Tiger."" One day, an embarrassing mistake causes the two students to cross paths. Ryuuji discovers that Taiga actually has a sweet side: she has a crush on the popular vice president, Yuusaku Kitamura, who happens to be his best friend. But things only get crazier when Ryuuji reveals that he has a crush on Minori Kushieda—Taiga's best friend! Toradora! is a romantic comedy that follows this odd duo as they embark on a quest to help each other with their respective crushes, forming an unlikely alliance in the process.","Slice of Life, Comedy, Romance, School"
1,8,Steins;Gate,"The self-proclaimed mad scientist Rintarou Okabe rents out a room in a rickety old building in Akihabara, where he indulges himself in his hobby of inventing prospective ""future gadgets"" with fellow lab members: Mayuri Shiina, his air-headed childhood friend, and Hashida Itaru, a perverted hacker nicknamed ""Daru."" The three pass the time by tinkering with their most promising contraption yet, a machine dubbed the ""Phone Microwave,"" which performs the strange function of morphing bananas into piles of green gel. Though miraculous in itself, the phenomenon doesn't provide anything concrete in Okabe's search for a scientific breakthrough; that is, until the lab members are spurred into action by a string of mysterious happenings before stumbling upon an unexpected success—the Phone Microwave can send emails to the past, altering the flow of history. Adapted from the critically acclaimed visual novel by 5pb. and Nitroplus, Steins;Gate takes Okabe through the depths of scientific theory and practicality. Forced across the diverging threads of past and present, Okabe must shoulder the burdens that come with holding the key to the realm of time.","Thriller, Sci-Fi"
2,8,The Devil is a Part-Timer!,"Striking fear into the hearts of mortals, the Demon Lord Satan begins to conquer the land of Ente Isla with his vast demon armies. However, while embarking on this brutal quest to take over the continent, his efforts are foiled by the hero Emilia, forcing Satan to make his swift retreat through a dimensional portal only to land in the human world. Along with his loyal general Alsiel, the demon finds himself stranded in modern-day Tokyo and vows to return and complete his subjugation of Ente Isla—that is, if they can find a way back! Powerless in a world without magic, Satan assumes the guise of a human named Sadao Maou and begins working at MgRonald's—a local fast-food restaurant—to make ends meet. He soon realizes that his goal of conquering Ente Isla is just not enough as he grows determined to climb the corporate ladder and become the ruler of Earth, one satisfied customer at a time! Whether it's part-time work, household chores, or simply trying to pay the rent on time, Hataraku Maou-sama! presents a hilarious view of the most mundane aspects of everyday life, all through the eyes of a hapless demon lord.","Comedy, Demons, Supernatural, Romance, Fantasy"
3,8,Attack on Titan,"Centuries ago, mankind was slaughtered to near extinction by monstrous humanoid creatures called titans, forcing humans to hide in fear behind enormous concentric walls. What makes these giants truly terrifying is that their taste for human flesh is not born out of hunger but what appears to be out of pleasure. To ensure their survival, the remnants of humanity began living within defensive barriers, resulting in one hundred years without a single titan encounter. However, that fragile calm is soon shattered when a colossal titan manages to breach the supposedly impregnable outer wall, reigniting the fight for survival against the man-eating abominations. After witnessing a horrific personal loss at the hands of the invading creatures, Eren Yeager dedicates his life to their eradication by enlisting into the Survey Corps, an elite military unit that combats the merciless humanoids outside the protection of the walls. Based on Hajime Isayama's award-winning manga, Shingeki no Kyojin follows Eren, along with his adopted sister Mikasa Ackerman and his childhood friend Armin Arlert, as they join the brutal war against the titans and race to discover a way of defeating them before the last walls are breached.","Action, Military, Mystery, Super Power, Drama, Fantasy, Shounen"
4,8,Death Note,"shinigami, as a god of death, can kill any person—provided they see their victim's face and write their victim's name in a notebook called a Death Note. One day, Ryuk, bored by the shinigami lifestyle and interested in seeing how a human would use a Death Note, drops one into the human realm. High school student and prodigy Light Yagami stumbles upon the Death Note and—since he deplores the state of the world—tests the deadly notebook by writing a criminal's name in it. When the criminal dies immediately following his experiment with the Death Note, Light is greatly surprised and quickly recognizes how devastating the power that has fallen into his hands could be. With this divine capability, Light decides to extinguish all criminals in order to build a new world where crime does not exist and people worship him as a god. Police, however, quickly discover that a serial killer is targeting criminals and, consequently, try to apprehend the culprit. To do this, the Japanese investigators count on the assistance of the best detective in the world: a young and eccentric man known only by the name of L.","Mystery, Police, Psychological, Supernatural, Thriller, Shounen"
5,8,"No Game, No Life","No Game No Life is a surreal comedy that follows Sora and Shiro, shut-in NEET siblings and the online gamer duo behind the legendary username ""Blank."" They view the real world as just another lousy game; however, a strange e-mail challenging them to a chess match changes everything—the brother and sister are plunged into an otherworldly realm where they meet Tet, the God of Games. The mysterious god welcomes Sora and Shiro to Disboard, a world where all forms of conflict—from petty squabbles to the fate of whole countries—are settled not through war, but by way of high-stake games. This system works thanks to a fundamental rule wherein each party must wager something they deem to be of equal value to the other party's wager. In this strange land where the very idea of humanity is reduced to child's play, the indifferent genius gamer duo of Sora and Shiro have finally found a real reason to keep playing games: to unite the sixteen races of Disboard, defeat Tet, and become the gods of this new, gaming-is-everything world.","Game, Adventure, Comedy, Supernatural, Ecchi, Fantasy"
6,8,Angel Beats!,"Otonashi awakens only to learn he is dead. A rifle-toting girl named Yuri explains that they are in the afterlife, and Otonashi realizes the only thing he can remember about himself is his name. Yuri tells him that she leads the Shinda Sekai Sensen (Afterlife Battlefront) and wages war against a girl named Tenshi. Unable to believe Yuri's claims that Tenshi is evil, Otonashi attempts to speak with her, but the encounter doesn't go as he intended. Otonashi decides to join the SSS and battle Tenshi, but he finds himself oddly drawn to her. While trying to regain his memories and understand Tenshi, he gradually unravels the mysteries of the afterlife.","Action, Comedy, Drama, School, Supernatural"
7,7,Code Geass:Lelouch of the Rebellion R2,"One year has passed since the Black Rebellion, a failed uprising against the Holy Britannian Empire led by the masked vigilante Zero, who is now missing. At a loss without their revolutionary leader, Area 11's resistance group—the Black Knights—find themselves too powerless to combat the brutality inflicted upon the Elevens by Britannia, which has increased significantly in order to crush any hope of a future revolt. Lelouch Lamperouge, having lost all memory of his double life, is living peacefully alongside his friends as a high school student at Ashford Academy. His former partner C.C., unable to accept this turn of events, takes it upon herself to remind him of his past purpose, hoping that the mastermind Zero will rise once again to finish what he started, in this thrilling conclusion to the series.","Action, Military, Sci-Fi, Super Power, Drama, Mecha"
8,7,Nisekoi:False Love,"aku Ichijou, a first-year student at Bonyari High School, is the sole heir to an intimidating yakuza family. Ten years ago, Raku made a promise to his childhood friend. Now, all he has to go on is a pendant with a lock, which can only be unlocked with the key which the girl took with her when they parted. Now, years later, Raku has grown into a typical teenager, and all he wants is to remain as uninvolved in his yakuza background as possible while spending his school days alongside his middle school crush Kosaki Onodera. However, when the American Bee Hive Gang invades his family's turf, Raku's idyllic romantic dreams are sent for a toss as he is dragged into a frustrating conflict: Raku is to pretend that he is in a romantic relationship with Chitoge Kirisaki, the beautiful daughter of the Bee Hive's chief, so as to reduce the friction between the two groups. Unfortunately, reality could not be farther from this whopping lie—Raku and Chitoge fall in hate at first sight, as the girl is convinced he is a pathetic pushover, and in Raku's eyes, Chitoge is about as attractive as a savage gorilla. Nisekoi follows the daily antics of this mismatched couple who have been forced to get along for the sake of maintaining the city's peace. With many more girls popping up his life, all involved with Raku's past somehow, his search for the girl who holds his heart and his promise leads him in more unexpected directions than he expects.","Harem, Comedy, Romance, School, Shounen"
9,7,High School of the Dead,"It happened suddenly: The dead began to rise and Japan was thrown into total chaos. As these monsters begin terrorizing a high school, Takashi Kimuro is forced to kill his best friend when he gets bitten and joins the ranks of the walking dead. Vowing to protect Rei Miyamoto, the girlfriend of the man he just executed, they narrowly escape their death trap of a school, only to be greeted with a society that has already fallen. Soon, Takashi and Rei band together with other students on a journey to find their family members and uncover what caused this overwhelming pandemic. Joining them is Saeko Busujima, the beautiful president of the Kendo Club; Kouta Hirano, an otaku with a fetish for firearms; Saya Takagi, the daughter of an influential politician; and Shizuka Marikawa, their hot school nurse. But will the combined strength of these individuals be enough to conquer this undead apocalypse?","Action, Horror, Supernatural, Ecchi, Shounen"


### Hybrid Recommendation System

In [185]:
def hybrid_recommendation(user_id, user_weight=0.5, content_weight=0.5):  ##user recommendation section
    sim_users = find_similair_users(user_id, user_weights, user2user_encoded, user2user_decoded)  ### find similar users to user id
    user_prefs = get_user_preferences(user_id, df, rating_df)  ### get user preferences for user id
    user_recommended_animes = get_user_recommendations(sim_users,user_prefs, df, rating_df, synopsis_df)  ### get user recommendations for user id

    user_recommended_anime_list = user_recommended_animes["anime_name"].tolist()  ### get anime names from user recommended animes

    ##content recommendation section
    content_recommended_animes = []  ### create empty list for content recommended animes

    for anime in user_recommended_anime_list:  ### loop through user recommended animes
        sim_animes = find_similair_animes(anime, anime_weights, anime2anime_encoded, anime2anime_decoded, df, synopsis_df)

        if sim_animes is not None and not sim_animes.empty:
            content_recommended_animes.extend(sim_animes["name"].tolist())  ### append anime names to content recommended animes
        else:
            print(f"No similar animes found {anime}")

    combined_scores = {}

    for anime in user_recommended_anime_list:
        combined_scores[anime] = combined_scores.get(anime, 0) + user_weight  ### get combined score for user recommended animes
    for anime in content_recommended_animes:
        combined_scores[anime] = combined_scores.get(anime, 0) + content_weight


    sorted_recommendations = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)  ### sort recommendations by score

    return [anime for anime , score in sorted_recommendations[:10]]  ### return sorted recommendations


In [188]:
hybrid_recommendation(13994)

Error:Anime not found
No similar animes found Bakuman.
Error:Anime not found
No similar animes found Elfen Lied
Error:Anime not found
No similar animes found Your Lie in April
Error:Anime not found
No similar animes found Mob Psycho 100
Error:Anime not found
No similar animes found Terror in Resonance


['Nisekoi:False Love',
 'GATE',
 'Bakuman.',
 'Elfen Lied',
 'No Game, No Life',
 'Angel Beats!',
 'Your Lie in April',
 'Mob Psycho 100',
 'Demon Slayer:Kimetsu no Yaiba',
 'Terror in Resonance']