In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
data_path = "/content/anime.csv"
try:
    df = pd.read_csv(data_path)
except FileNotFoundError:
    print("Dataset not found. Please ensure the file path is correct.")
    exit()

# Data Preprocessing
df.fillna({'genre': 'Unknown', 'rating': 0, 'episodes': 'Unknown'}, inplace=True)
# Combine features for similarity calculation
df['combined_features'] = df['genre'] + " " + df['type'] + " " + df['episodes'].astype(str)
# Ensure no missing or NaN values in combined_features
df['combined_features'].fillna('Unknown', inplace=True)
# Normalize the rating feature
scaler = MinMaxScaler()
df['normalized_rating'] = scaler.fit_transform(df[['rating']])
# Cosine Similarity
tfidf = TfidfVectorizer(stop_words='english')
# Fit the TF-IDF vectorizer on the combined features
try:
    tfidf_matrix = tfidf.fit_transform(df['combined_features'])
except ValueError as e:
    print(f"Error in TF-IDF vectorization: {e}")
    exit()
# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# Function to recommend anime based on cosine similarity
def recommend_anime(anime_title, top_n=5):
    # Check if the anime title exists in the dataset
    if anime_title not in df['name'].values:
        print(f"Anime '{anime_title}' not found in the dataset.")
        return pd.DataFrame()

    # Get the index of the anime
    idx = df[df['name'] == anime_title].index[0]

    # Compute similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get indices of top-N most similar anime (excluding the input anime)
    sim_scores = sim_scores[1:top_n + 1]
    anime_indices = [i[0] for i in sim_scores]

    # Return the top-N recommendations
    return df.iloc[anime_indices][['name', 'genre', 'rating']]

# Example
print("\nRecommendations for 'Naruto':")
recommendations = recommend_anime('Naruto', top_n=5)
print(recommendations)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['combined_features'].fillna('Unknown', inplace=True)



Recommendations for 'Naruto':
                                                   name  \
1343                                        Naruto x UT   
486                            Boruto: Naruto the Movie   
1472        Naruto: Shippuuden Movie 4 - The Lost Tower   
1573  Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...   
2997  Naruto Soyokazeden Movie: Naruto to Mashin to ...   

                                                  genre  rating  
1343  Action, Comedy, Martial Arts, Shounen, Super P...    7.58  
486   Action, Comedy, Martial Arts, Shounen, Super P...    8.03  
1472  Action, Comedy, Martial Arts, Shounen, Super P...    7.53  
1573  Action, Comedy, Martial Arts, Shounen, Super P...    7.50  
2997  Action, Comedy, Martial Arts, Shounen, Super P...    7.11  
