# **Recommendation System**


# Data Preprocessing:

In [149]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score


In [150]:
# Load the dataset
anime_df = pd.read_csv('/content/anime.csv')


In [123]:
anime_df

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [151]:
# Handle missing values
anime_df.fillna('', inplace=True)


In [152]:
# Explore the dataset
print(anime_df.head())


   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes rating  \
0               Drama, Romance, School, Supernatural  Movie        1   9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64   9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51   9.25   
3                                   Sci-Fi, Thriller     TV       24   9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51   9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  


# Feature Extraction

In [153]:
# Combine relevant features into a single string
anime_df['combined_features'] = anime_df['genre'] + ' ' + anime_df['type']

In [154]:
# Convert categorical features into numerical representations
vectorizer = CountVectorizer()
feature_matrix = vectorizer.fit_transform(anime_df['combined_features'])

# Recommendation System

In [155]:
# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(feature_matrix)

In [156]:
# Function to get recommendations
def get_recommendations(anime_name, cosine_sim=cosine_sim):
    # Get the index of the anime that matches the name
    idx = anime_df[anime_df['name'] == anime_name].index[0]

    # Get the pairwise similarity scores of all animes with that anime
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the animes based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar animes
    sim_scores = sim_scores[1:11]

    # Get the anime indices
    anime_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar animes
    return anime_df['name'].iloc[anime_indices]


In [157]:
print(get_recommendations('Naruto'))

841                                                Naruto
206                                         Dragon Ball Z
515                                Dragon Ball Kai (2014)
588                                       Dragon Ball Kai
1209                                  Medaka Box Abnormal
1930                                    Dragon Ball Super
2615                                           Medaka Box
3038                                         Tenjou Tenge
486                              Boruto: Naruto the Movie
1103    Boruto: Naruto the Movie - Naruto ga Hokage ni...
Name: name, dtype: object


# Evaluation
Split the dataset into training and testing sets

In [158]:
train_df, test_df = train_test_split(anime_df, test_size=0.2, random_state=42)

In [159]:
# Function to evaluate the recommendation system
def evaluate_recommendations(train_df, test_df, cosine_sim=cosine_sim):
    y_true = []
    y_pred = []

    for anime_name in test_df['name']:
        if anime_name in train_df['name'].values:
            y_true.append(1)
            recommendations = get_recommendations(anime_name, cosine_sim)
            if anime_name in recommendations.values:
                y_pred.append(1)
            else:
                y_pred.append(0)

    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    return precision, recall, f1


In [160]:
# Evaluate the recommendation system
precision, recall, f1 = evaluate_recommendations(train_df, test_df)
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1}')

Precision: 0.0
Recall: 0.0
F1-Score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
