In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import precision_score, recall_score, f1_score

In [3]:
# Load the dataset
file_path = '/content/anime.csv'
anime_data = pd.read_csv(file_path)

In [4]:
# Step 1: Explore the dataset structure
dataset_info = anime_data.info()
head_data = anime_data.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [5]:
# Step 2: Check for missing values
missing_values = anime_data.isnull().sum()

# Display results to understand the dataset structure, attributes, and missing values
print("Dataset Preview:")
print(anime_data.head())  # Display the head of the dataframe
print("\nMissing Values Summary:")
print(missing_values)  # Display the missing values summary


Dataset Preview:
   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  

Missing Values Summary:
anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members 

In [6]:
# Step 3: Feature Extraction
anime_data['genre'] = anime_data['genre'].fillna('Unknown')  # Handle missing genres
genre_dummies = anime_data['genre'].str.get_dummies(sep=', ')

scaler = MinMaxScaler()
anime_data[['rating', 'members']] = scaler.fit_transform(anime_data[['rating', 'members']].fillna(0))
processed_features = pd.concat([anime_data[['rating', 'members']], genre_dummies], axis=1)


In [7]:
# Display the processed features to the user
print("Processed Features:")
print(processed_features.head())


Processed Features:
   rating   members  Action  Adventure  Cars  Comedy  Dementia  Demons  Drama  \
0   0.937  0.197872       0          0     0       0         0       0      1   
1   0.926  0.782770       1          1     0       0         0       0      1   
2   0.925  0.112689       1          0     0       1         0       0      0   
3   0.917  0.664325       0          0     0       0         0       0      0   
4   0.916  0.149186       1          0     0       1         0       0      0   

   Ecchi  ...  Slice of Life  Space  Sports  Super Power  Supernatural  \
0      0  ...              0      0       0            0             1   
1      0  ...              0      0       0            0             0   
2      0  ...              0      0       0            0             0   
3      0  ...              0      0       0            0             0   
4      0  ...              0      0       0            0             0   

   Thriller  Unknown  Vampire  Yaoi  Yuri  
0   

In [8]:
# Step 4: Recommendation Function
def recommend_anime(target_anime, data, anime_titles, similarity_matrix, top_n=5):
    if target_anime not in anime_titles.values:
        return f"Anime '{target_anime}' not found in the dataset."
    target_index = anime_titles[anime_titles == target_anime].index[0]
    similarity_scores = similarity_matrix[target_index]
    similar_indices = np.argsort(similarity_scores)[::-1][1:top_n+1]  # Exclude itself
    recommendations = [(anime_titles.iloc[idx], similarity_scores[idx]) for idx in similar_indices]
    return recommendations


In [9]:
# Precompute similarity matrix
similarity_matrix = cosine_similarity(processed_features)


In [10]:
# Test the function
anime_titles = anime_data['name']  # Assuming 'name' column contains anime titles
target_anime = anime_titles.iloc[0]  # Replace with any anime title
recommendations = recommend_anime(target_anime, processed_features.values, anime_titles, similarity_matrix, top_n=5)


In [11]:
# Display Recommendations
print(f"Recommendations for '{target_anime}':")
for anime, score in recommendations:
    print(f"{anime}: {score:.2f}")


Recommendations for 'Kimi no Na wa.':
Wind: A Breath of Heart OVA: 0.99
Wind: A Breath of Heart (TV): 0.99
Aura: Maryuuin Kouga Saigo no Tatakai: 0.90
Clannad: After Story - Mou Hitotsu no Sekai, Kyou-hen: 0.89
Kokoro ga Sakebitagatterunda.: 0.89


In [12]:
# Step 5: Evaluation of System
def evaluate_recommendation_system(train_data, test_data, train_titles, test_titles, top_n=5):
    similarity_matrix = cosine_similarity(train_data)

    y_true = []
    y_pred = []

    for test_index in range(len(test_data)):
        true_title = test_titles.iloc[test_index]
        recommendations = recommend_anime(test_index, train_data, train_titles, similarity_matrix, top_n=top_n)
        recommended_titles = [rec[0] for rec in recommendations]
        y_true.append(1 if true_title in recommended_titles else 0)
        y_pred.append(1)  # Always predict 1 for top_n recommendations

    precision = precision_score(y_true, y_pred, average='micro')
    recall = recall_score(y_true, y_pred, average='micro')
    f1 = f1_score(y_true, y_pred, average='micro')

    return precision, recall, f1


In [13]:
# Split the dataset into training and testing sets
train_data, test_data, train_titles, test_titles = train_test_split(
    processed_features, anime_data['name'], test_size=0.2, random_state=42
)


In [17]:
# Apply the evaluation function
precision, recall, f1 = evaluate_recommendation_system(
    train_data.values,
    test_data.values,
    train_titles,
    test_titles,
    top_n=5
)

In [16]:
# Display evaluation results
evaluation_results = {
    "Precision": precision,
    "Recall": recall,
    "F1-Score": f1
}
print("Evaluation Results:", evaluation_results)


Evaluation Results: {'Precision': 0.0, 'Recall': 0.0, 'F1-Score': 0.0}
