In [34]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import StandardScaler
import warnings

# Suppress deprecation warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
# Load the dataset
anime_df = pd.read_csv('D:/Data science ass/11/Recommendation System/anime.csv')


In [35]:
# Handling missing values (if any)
anime_df = anime_df.dropna()

# Inspect the data
print(anime_df.head())
print(anime_df.info())


   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  
<class 'pandas.core.frame.DataFrame'>
Index: 12017 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
--- 

In [36]:
anime_df['genre_str'] = anime_df['genre'].apply(lambda x: ' '.join(x.lower().split(',')))

# Vectorize the genre strings
vectorizer = CountVectorizer()
genre_matrix = vectorizer.fit_transform(anime_df['genre_str'])

# Normalize the ratings
# Replace 'rating' with the correct column name for average rating
scaler = StandardScaler()
normalized_ratings = scaler.fit_transform(anime_df[['rating']])

# Combine the genre and normalized rating features
features = pd.concat([pd.DataFrame(genre_matrix.toarray()), pd.DataFrame(normalized_ratings)], axis=1)

# Compute cosine similarity
cosine_sim = cosine_similarity(features)

In [38]:
# Function to get recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the anime that matches the title
    idx = anime_df[anime_df['name'] == title].index[0]

    # Get the pairwise similarity scores of all anime with that anime
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the anime based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the 10 most similar anime
    anime_indices = [i[0] for i in sim_scores[1:11]]

    # Return the top 10 most similar anime
    return anime_df['name'].iloc[anime_indices]

# Example usage
recommendations = get_recommendations('Naruto')
print(recommendations)

615                                    Naruto: Shippuuden
1103    Boruto: Naruto the Movie - Naruto ga Hokage ni...
486                              Boruto: Naruto the Movie
1343                                          Naruto x UT
1472          Naruto: Shippuuden Movie 4 - The Lost Tower
1573    Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...
2458                 Naruto Shippuuden: Sunny Side Battle
2997    Naruto Soyokazeden Movie: Naruto to Mashin to ...
206                                         Dragon Ball Z
515                                Dragon Ball Kai (2014)
Name: name, dtype: object
