In [1]:
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib as plt
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv(r"C:\Users\priya\Downloads\Recommendation System\Recommendation System\anime.csv")
df

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [3]:
# Check for missing values
print(df.isnull().sum())

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64


In [4]:
# Handle missing values
# Here we'll drop rows with any missing values for simplicity
df = df.dropna()

# Feature Extraction

In [5]:
# Split genres into list of genres
df['genre'] = df['genre'].apply(lambda x: x.split(', '))

# Binarize the genres
mlb = MultiLabelBinarizer()
genres_encoded = mlb.fit_transform(df['genre'])

# Normalize the ratings
scaler = StandardScaler()
ratings_scaled = scaler.fit_transform(df[['rating']])

# Combine the features into a single DataFrame
features = pd.DataFrame(genres_encoded, columns=mlb.classes_)
features['rating'] = ratings_scaled

# Optional: Drop the original 'genre' and 'rating' columns
df = df.drop(columns=['genre', 'rating'])

In [6]:
features,df

(       Action  Adventure  Cars  Comedy  Dementia  Demons  Drama  Ecchi  \
 0           0          0     0       0         0       0      1      0   
 1           1          1     0       0         0       0      1      0   
 2           1          0     0       1         0       0      0      0   
 3           0          0     0       0         0       0      0      0   
 4           1          0     0       1         0       0      0      0   
 ...       ...        ...   ...     ...       ...     ...    ...    ...   
 12012       0          0     0       0         0       0      0      0   
 12013       0          0     0       0         0       0      0      0   
 12014       0          0     0       0         0       0      0      0   
 12015       0          0     0       0         0       0      0      0   
 12016       0          0     0       0         0       0      0      0   
 
        Fantasy  Game  ...  Slice of Life  Space  Sports  Super Power  \
 0            0     0  ..

In [7]:
df['name'].unique()

array(['Kimi no Na wa.', 'Fullmetal Alchemist: Brotherhood', 'Gintama°',
       ..., 'Violence Gekiga David no Hoshi',
       'Violence Gekiga Shin David no Hoshi: Inma Densetsu',
       'Yasuji no Pornorama: Yacchimae!!'], dtype=object)

In [8]:
def recommend_anime(target_anime, features, df, threshold=0.5):
    # Compute the cosine similarity matrix
    cosine_sim = cosine_similarity(features)
    
    # Get the index of the target anime
    target_index = df[df['name'] == target_anime].index[0]
    
    # Get the similarity scores for the target anime
    similarity_scores = list(enumerate(cosine_sim[target_index]))
    
    # Sort the anime based on similarity scores
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    # Get the indices of the anime with similarity scores above the threshold
    similar_anime_indices = [i[0] for i in similarity_scores if i[1] > threshold and i[0] != target_index]
    
    # Get the names of the similar anime
    similar_anime = df.iloc[similar_anime_indices]['name'].tolist()
    # similar_anime = df.iloc[similar_anime_indices]['name'].tolist()
    
    return similar_anime

# Example 
target_anime = 'Kimi no Na wa.'  # Replace with a valid anime name from the dataset
similar_anime = recommend_anime(target_anime, features, df, threshold=0.5)
print(f"Anime similar to {target_anime}:")
print(similar_anime)




Anime similar to Kimi no Na wa.:
['Kokoro ga Sakebitagatterunda.', 'Clannad: After Story - Mou Hitotsu no Sekai, Kyou-hen', 'Hotarubi no Mori e', 'Clannad: After Story', 'Little Busters!: Refrain', 'Yahari Ore no Seishun Love Comedy wa Machigatteiru. Zoku', 'Mirai ni Mukete: Bousai wo Kangaeru', 'Angel Beats!: Another Epilogue', 'Clannad', 'Koe no Katachi', 'Shigatsu wa Kimi no Uso', 'Sen to Chihiro no Kamikakushi', 'ef: A Tale of Melodies.', 'Kanon (2006)', 'Tsumiki no Ie', 'Clannad: Mou Hitotsu no Sekai, Tomoyo-hen', 'True Tears', 'Kokoro Connect: Michi Random', 'Momo e no Tegami', 'Tokyo Magnitude 8.0', 'Yahari Ore no Seishun Love Comedy wa Machigatteiru.', '&quot;Bungaku Shoujo&quot; Memoire', 'Harmonie', 'Grisaia no Meikyuu: Caprice no Mayu 0', 'Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai.', 'Hourou Musuko Specials', 'Douwa Mondai to Jinken: Anata wa Dou Kangaemasuka', 'Michi', 'Kimikiss Pure Rouge', 'Oshin', 'Aura: Maryuuin Kouga Saigo no Tatakai', 'Cross Game', 'Blac