<a href="https://colab.research.google.com/github/sdr999/MangaSensei/blob/main/MangaSensei.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Loading Dataset**

In [None]:
import pandas as pd
from wordcloud import WordCloud
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Load the dataset
df = pd.read_csv("manga_dataset_with_genres.csv")


# **Checking if any null values**

In [None]:
df.isnull().sum()

title          0
description    0
link           0
genres         0
dtype: int64

# **Creating tags to identify similarity**

In [None]:
df["tags"]=df['description']+df['genres']

In [None]:
new=df.drop(columns=['description','genres','link'])

In [None]:
new.head()

Unnamed: 0,title,tags
0,I Want To See You Embarassed,"Akito Shirasawa, a high school boy whose fathe..."
1,Lazy Dungeon Master,"""Come on, kill all those bandits for me alread..."
2,"Bind, Connect",The manga centers on Kanoko and her friend Tsu...
3,Story of a Small Senior In My Company,"-""My senpai from work... is tiny and cute.""\nS..."
4,Sissy,.Paranoid scum attack x gentle and weak. Li Ch...


# **Model Building**

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=500,stop_words='english')

In [None]:
vector = cv.fit_transform(new['tags']).toarray()

In [None]:
vector.shape

(216, 500)

In [None]:
import nltk


In [None]:
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [None]:
def stem(text):
    y = []

    for i in text.split():
       y.append(ps.stem(i))


    return " ".join(y)

In [None]:
new['tags'] = new['tags'].apply(stem)

In [None]:
cv.get_feature_names_out()

array(['10', '66', '666', '66666', 'abandoned', 'abilities', 'ability',
       'able', 'accident', 'accidentally', 'action', 'actually',
       'adventure', 'age', 'ago', 'ai', 'akito', 'anette', 'anime',
       'anonymous', 'appeared', 'appears', 'art', 'arts', 'attack',
       'attention', 'awakened', 'away', 'bad', 'battle', 'beautiful',
       'beauty', 'begins', 'best', 'better', 'big', 'bit', 'black',
       'blood', 'body', 'bom', 'born', 'boy', 'break', 'bring', 'broken',
       'brother', 'building', 'called', 'came', 'castle', 'ce', 'chan',
       'chance', 'change', 'changed', 'character', 'chat', 'chen',
       'cheon', 'childhood', 'choice', 'chosen', 'city', 'class', 'clear',
       'coffee', 'cold', 'come', 'comedy', 'comic', 'company',
       'concubine', 'couldn', 'country', 'couple', 'course', 'crazy',
       'creatures', 'cruel', 'cult', 'cultivation', 'curse', 'cute',
       'cutting', 'daichi', 'dance', 'dao', 'dark', 'dating', 'daughter',
       'day', 'days', 'de

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
similarity = cosine_similarity(vector)

In [None]:
similarity

array([[1.        , 0.14285714, 0.2567763 , ..., 0.1028689 , 0.02969569,
        0.09639254],
       [0.14285714, 1.        , 0.1711842 , ..., 0.3086067 , 0.17817416,
        0.16868694],
       [0.2567763 , 0.1711842 , 1.        , ..., 0.09245003, 0.05337605,
        0.17325923],
       ...,
       [0.1028689 , 0.3086067 , 0.09245003, ..., 1.        , 0.25660012,
        0.31234752],
       [0.02969569, 0.17817416, 0.05337605, ..., 0.25660012, 1.        ,
        0.39072351],
       [0.09639254, 0.16868694, 0.17325923, ..., 0.31234752, 0.39072351,
        1.        ]])

In [None]:
new[new['title'] == 'Apex Future Martial Arts'].index[0]

14

In [None]:
def recommend_manga(manga):
    index = new[new['title'] == manga].index[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    for i in distances[1:6]:
        print(new.iloc[i[0]].title)


In [None]:
recommend_manga('Apex Future Martial Arts')

Reincarnated Into A Warlock 66,666 Years Later
Gangho Apocalypse
Martial Arts Alone
The Dark Magician Transmigrates After 66666 Years
I’m Trapped In This Day For One Thousand Years


# **For data transfering**

In [None]:
import pickle

In [None]:
pickle.dump(new,open('manga_list.pkl','wb'))
pickle.dump(similarity,open('similarity.pkl','wb'))