In [1]:
import pandas as pd

anime_df = pd.read_csv('/content/anime.csv')


In [2]:
anime_df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [3]:
anime_df.describe()

Unnamed: 0,anime_id,rating,members
count,12294.0,12064.0,12294.0
mean,14058.221653,6.473902,18071.34
std,11455.294701,1.026746,54820.68
min,1.0,1.67,5.0
25%,3484.25,5.88,225.0
50%,10260.5,6.57,1550.0
75%,24794.5,7.18,9437.0
max,34527.0,10.0,1013917.0


In [4]:
#filling missig values
anime_df['genre'].fillna('Unknown', inplace=True)
anime_df['type'].fillna('Unknown', inplace=True)
mean_rating = anime_df['rating'].mean()
anime_df['rating'].fillna(mean_rating, inplace=True)


In [5]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Convert 'genre' into numerical format using CountVectorizer
vectorizer = CountVectorizer(tokenizer=lambda x: x.split(', '))
genre_matrix = vectorizer.fit_transform(anime_df['genre'])

# Scaling the ratings between 0 and 1
scaler = MinMaxScaler()
ratings_scaled = scaler.fit_transform(anime_df[['rating']])

# Combine the genre matrix and the scaled ratings into a single feature matrix
features = np.hstack([genre_matrix.toarray(), ratings_scaled])
features




array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.92436975],
       [1.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.91116447],
       [1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.90996399],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.38535414],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.39735894],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.45498199]])

In [6]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate the cosine similarity matrix
cosine_sim = cosine_similarity(features)

# Function to get anime recommendations based on cosine similarity
def recommend_anime(anime_title, cosine_sim=cosine_sim, df=anime_df, num_recommendations=10):

    # Get the index of the anime that matches the title
    idx = df[df['name'].str.lower() == anime_title.lower()].index[0]

    # Get the pairwise similarity scores of all anime with that anime
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the anime based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the most similar anime
    sim_scores = sim_scores[1:num_recommendations+1]

    # Get the anime indices
    anime_indices = [i[0] for i in sim_scores]

    # Return the top most similar anime
    return df[['name', 'genre', 'rating']].iloc[anime_indices]

In [7]:
# Example: Recommend anime similar to "Steins;Gate"
recommendations = recommend_anime("Steins;Gate")
recommendations

Unnamed: 0,name,genre,rating
59,Steins;Gate Movie: Fuka Ryouiki no Déjà vu,"Sci-Fi, Thriller",8.61
126,Steins;Gate: Oukoubakko no Poriomania,"Sci-Fi, Thriller",8.46
196,Steins;Gate: Kyoukaimenjou no Missing Link - D...,"Sci-Fi, Thriller",8.34
10898,Steins;Gate 0,"Sci-Fi, Thriller",6.473902
5126,Under the Dog,"Action, Sci-Fi, Thriller",6.55
5525,Loups=Garous,"Mystery, Sci-Fi, Thriller",6.43
6889,Loups=Garous Pilot,"Mystery, Sci-Fi, Thriller",5.87
9091,Kaitei Toshi no Dekiru made,Sci-Fi,8.0
10414,Subarashii Sekai Ryokou: New York Tabi &quot;C...,Sci-Fi,8.0
1578,Sakasama no Patema: Beginning of the Day,Sci-Fi,7.5


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

train_df, test_df = train_test_split(anime_df, test_size=0.2, random_state=42)

# Function to evaluate the recommendation system
def evaluate_recommendation_system(test_df, cosine_sim=cosine_sim, df=anime_df, top_n=10):
    precision_scores = []
    recall_scores = []

    for _, row in test_df.iterrows():
        # Get the actual anime and its recommended animes
        actual_anime = row['name']
        recommended_animes = recommend_anime(actual_anime, cosine_sim, df, top_n)

        # Check if the actual anime appears in the recommended animes
        recommended_anime_names = recommended_animes['name'].tolist()
        actual_genres = set(row['genre'].split(', '))

        # Precision and Recall
        matched_genres = sum([1 for anime in recommended_anime_names
                              if set(df[df['name'] == anime]['genre'].iloc[0].split(', ')).intersection(actual_genres)])

        precision = matched_genres / top_n
        recall = matched_genres / len(actual_genres)

        precision_scores.append(precision)
        recall_scores.append(recall)

    # Calculate average precision and recall
    avg_precision = np.mean(precision_scores)
    avg_recall = np.mean(recall_scores)
    f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)

    return avg_precision, avg_recall, f1

# Evaluate the system on the test data
precision, recall, f1 = evaluate_recommendation_system(test_df)
print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")



Precision: 0.9998373322488816, Recall: 4.794661012025795, F1-score: 1.6546319596547112


In [None]:
''' High Precision: If the precision score is close to 1, it indicates that many of the recommended anime are also present in the test set,
suggesting the recommendations are relevant.

moderate Precision: A precision score around 0.5 indicates that about half of the recommendations match anime in the test set,
which may be acceptable depending on the application.

Low Precision: A low precision score indicates that the recommended anime do not frequently match those in the test set,
suggesting the recommendation system might need adjustments.'''

In [None]:
''' if the precision score is not satisfactory, consider the following improvements:

Feature Tuning:

Genres: Ensure that the genre feature is well represented.
Ratings: Experiment with different ways of normalizing or weighting the ratings.

Increase Data Quality:
Look into refining the dataset by handling more similAR cases, such as better imputing missing data or excluding outlier ratings.'''

In [None]:
''' Q.1. User-Based vs. Item-Based Collaborative Filtering
User-Based Collaborative Filtering:

Concept: This approach finds users who are similar to the target user (the one for whom recommendations are being made)
 based on their past behavior or preferences.
Working: If User A is similar to User B (based on their rating patterns), then items liked by User B are recommended to User A.

Concept: This approach focuses on finding similarities between items rather than users.
It uses the patterns of item ratings across users to make recommendations.
Working: If Item X is similar to Item Y based on the ratings given by users, and the user likes Item X, the system will recommend Item Y.'''

In [None]:
''' Q.2. How Collaborative Filtering Works

Data Collection: Gather data on users' ratings or interactions with items (e.g., movies, books, products).

Prediction: For user-based, predict the rating a user might give to an item based on the ratings given by similar users.
 For item-based, predict the rating for an item based on the ratings of similar items.

Recommendation: Suggest items to users based on the predicted ratings or preferences.
