In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns

In [3]:
df = pd.read_csv('anime.csv')

In [4]:
df.dropna(inplace=True)

In [5]:
print(df.head())
print(df.info())

   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  
<class 'pandas.core.frame.DataFrame'>
Int64Index: 12017 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  

In [21]:
print(df.isnull().sum())

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64


In [22]:
print(df.describe())

           anime_id         genre        rating       members
count  12017.000000  12017.000000  12017.000000  1.201700e+04
mean   13638.001165   1935.640426      0.577223  1.834888e+04
std    11231.076675    910.157494      0.122912  5.537250e+04
min        1.000000      0.000000      0.000000  1.200000e+01
25%     3391.000000   1252.000000      0.506603  2.250000e+02
50%     9959.000000   2092.000000      0.588235  1.552000e+03
75%    23729.000000   2784.000000      0.661465  9.588000e+03
max    34519.000000   3228.000000      1.000000  1.013917e+06


In [6]:
features = ['genre', 'rating']

In [7]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['genre'] = le.fit_transform(df['genre'])

In [8]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df[['rating']] = scaler.fit_transform(df[['rating']])

### recommend anime based on cosine similarity:

In [20]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_anime(anime_id, num_recommendations=5):
    anime_features = df.loc[df['anime_id'] == anime_id, features].values
    similarity_scores = cosine_similarity(anime_features, df[features])
    similar_anime_indices = similarity_scores.argsort()[0][-num_recommendations-1:-1][::-1]
    similar_anime = df.iloc[similar_anime_indices][['anime_id', 'name']]
    return similar_anime

In [10]:
def recommend_anime_threshold(anime_id, similarity_threshold=0.7):
    anime_features = df.loc[df['anime_id'] == anime_id, features].values
    similarity_scores = cosine_similarity(anime_features, df[features])
    similar_anime = df[similarity_scores[0] > similarity_threshold][['anime_id', 'name']]
    return similar_anime

In [13]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [15]:
y_pred = recommended_anime['anime_id'].values[:len(y_true)]

In [16]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Assuming you have ground truth data for testing
test_anime_id = test_data['anime_id'].values[0]  # Select a test anime ID
recommended_anime = recommend_anime(test_anime_id, num_recommendations=len(test_data))  # Get recommendations for the test anime

# Prepare the ground truth labels and predicted labels
y_true = test_data[test_data['anime_id'].isin(recommended_anime['anime_id'])]['anime_id'].values
y_pred = recommended_anime['anime_id'].values[:len(y_true)]  # Ensure y_pred has the same length as y_true

precision = precision_score(y_true, y_pred, average='micro')
recall = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

Precision: 0.00
Recall: 0.00
F1-score: 0.00


In [27]:
# Example usage
target_anime_id = 32281
recommendations = recommend_anime_threshold(target_anime_id)

print("Recommendations for anime ID:", target_anime_id)
for _, row in recommendations.iterrows():
    print(f"Anime ID: {row['anime_id']}, Title: {row['name']}")

Recommendations for anime ID: 32281
Anime ID: 32281, Title: Kimi no Na wa.
Anime ID: 5114, Title: Fullmetal Alchemist: Brotherhood
Anime ID: 28977, Title: Gintama°
Anime ID: 9253, Title: Steins;Gate
Anime ID: 9969, Title: Gintama&#039;
Anime ID: 32935, Title: Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou
Anime ID: 11061, Title: Hunter x Hunter (2011)
Anime ID: 820, Title: Ginga Eiyuu Densetsu
Anime ID: 15335, Title: Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare
Anime ID: 15417, Title: Gintama&#039;: Enchousen
Anime ID: 4181, Title: Clannad: After Story
Anime ID: 28851, Title: Koe no Katachi
Anime ID: 918, Title: Gintama
Anime ID: 2904, Title: Code Geass: Hangyaku no Lelouch R2
Anime ID: 28891, Title: Haikyuu!! Second Season
Anime ID: 199, Title: Sen to Chihiro no Kamikakushi
Anime ID: 23273, Title: Shigatsu wa Kimi no Uso
Anime ID: 24701, Title: Mushishi Zoku Shou 2nd Season
Anime ID: 12355, Title: Ookami Kodomo no Ame to Yuki
Anime ID: 1575, Title: Code Geass: Hangya