In [41]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the dataset
anime_data = pd.read_csv('/content/anime.csv')

# Display basic info about the dataset
print(anime_data.info())

# Handle missing values (if any)
anime_data = anime_data.dropna()  # For simplicity, drop rows with any missing values
# Display basic information about the dataset
print(anime_data.head())
# Convert genres into a format that is suitable for cosine similarity
tfidf = TfidfVectorizer(tokenizer=lambda x: x.split(', '))
genre_matrix = tfidf.fit_transform(anime_data['genre'])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB
None
   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Advent



In [42]:
# Feature selection and preprocessing
selected_features = ['genre', 'rating', 'members']

# Convert categorical features into numerical representations
# One-hot encoding for genre (if necessary)
#anime_data = pd.get_dummies(anime_data, columns=['genre'])
print(anime_data.columns)
# Normalize numerical features
scaler = StandardScaler()
anime_data[['rating', 'members']] = scaler.fit_transform(anime_data[['rating', 'members']])

Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')


In [43]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_anime(anime_title, anime_data, genre_matrix, threshold=0.5, top_n=5):
    try:
        # Filter anime_data for the given anime_title
        anime_entry = anime_data[anime_data['name'] == anime_title]

        if anime_entry.empty:
            print(f'Anime "{anime_title}" not found in dataset.')
            return []

        # Get the index of the anime_entry
        idx = anime_entry.index[0]

        # Compute cosine similarity with genre_matrix and other features
        genre_similarity = cosine_similarity(genre_matrix[idx], genre_matrix)
        numerical_features = anime_data[['rating', 'members']].values
        numerical_similarity = cosine_similarity([anime_entry[['rating', 'members']].values[0]], numerical_features)

        # Combine genre similarity and numerical similarity (adjust weights if necessary)
        combined_similarity = 0.5 * genre_similarity + 0.5 * numerical_similarity

        # Get similarity scores with other anime
        sim_scores = list(enumerate(combined_similarity[0]))

        # Sort anime based on similarity scores
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

        # Select anime with similarity above threshold
        sim_scores = [x for x in sim_scores if x[1] > threshold]

        # Get indices of the top similar anime (excluding the target itself)
        sim_indices = [i for i, _ in sim_scores if i != idx]

        # Return the top N most similar anime names
        return anime_data['name'].iloc[sim_indices].head(top_n).tolist()

    except IndexError as e:
        print(f'IndexError: {e}')
        return []
    except Exception as e:
        print(f'Error: {e}')
        return []

# Example usage of the recommendation system function
anime_title = 'Naruto'
recommendations = recommend_anime(anime_title, anime_data, genre_matrix, threshold=0.3, top_n=10)
print(f'Recommendations for "{anime_title}":')
print(recommendations)

Recommendations for "Naruto":
['Naruto: Shippuuden', 'Dragon Ball Z', 'Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsugu Mono', 'Dragon Ball Super', 'Dragon Ball', 'Naruto: Shippuuden Movie 4 - The Lost Tower', 'Dragon Ball Z Movie 11: Super Senshi Gekiha!! Katsu no wa Ore da', 'Tenjou Tenge', 'Medaka Box', 'Katekyo Hitman Reborn!']


In [58]:
# splitting data
from sklearn.model_selection import train_test_split

# Assuming anime_data and genre_matrix are already defined and processed

# Split dataset into training and testing sets
train_data, test_data = train_test_split(anime_data, test_size=0.2, random_state=42)
# List all column names to check for the presence of 'recommendations'
print(test_data.columns)



print(f"Number of records in training set: {len(train_data)}")
print(f"Number of records in testing set: {len(test_data)}")
# Check for missing values in test_data
print(test_data.isnull().sum())

Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')
Number of records in training set: 9613
Number of records in testing set: 2404
anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64


In [61]:
# Check anime_data
print("Anime Data:")
print(anime_data.columns)
print(anime_data.dtypes)
print(anime_data.isnull().sum())

# Check genre_matrix
print("\nGenre Matrix:")
print(genre_matrix.shape)  # Verify shape
print(genre_matrix[:5, :])  # Print sample rows

# Check test_data
print("\nTest Data:")
print(test_data.columns)
print(test_data.dtypes)
print(test_data.isnull().sum())

# Example of debugging IndexError
try:
    # Code that may raise IndexError
    avg_precision, avg_recall, avg_f1 = evaluate_recommendation_system(test_data, anime_data, genre_matrix, recommend_anime, top_n=10)
    print(f"Precision: {avg_precision:.2f}")
    print(f"Recall: {avg_recall:.2f}")
    print(f"F1-score: {avg_f1:.2f}")
except IndexError as e:
    print(f"IndexError occurred during evaluation: {e}")
except Exception as e:
    print(f"Error occurred during evaluation: {e}")

Anime Data:
Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')
anime_id      int64
name         object
genre        object
type         object
episodes     object
rating      float64
members     float64
dtype: object
anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

Genre Matrix:
(12017, 43)
  (0, 38)	0.5454743149673916
  (0, 27)	0.5202747795380636
  (0, 25)	0.4889208084308644
  (0, 6)	0.43900839236942196
  (1, 32)	0.3538898821489279
  (1, 19)	0.5204784422386617
  (1, 16)	0.4507423555853522
  (1, 8)	0.3196801580056285
  (1, 1)	0.3158062952261171
  (1, 0)	0.2945372198634319
  (1, 6)	0.33468534169720254
  (2, 28)	0.2946163056343774
  (2, 26)	0.573718235214092
  (2, 22)	0.4663814202378221
  (2, 12)	0.39397864964311313
  (2, 3)	0.20868726168499313
  (2, 32)	0.3148212724829932
  (2, 0)	0.2620210043529488
  (3, 39)	0.9056901914001664
  (3, 28)	0.42394018115947685
  (4, 28)	0.2946163056343

In [63]:
def evaluate_recommendation_system(test_data, anime_data, genre_matrix, recommend_func, top_n=10):
    precision_scores = []
    recall_scores = []
    f1_scores = []

    for idx, row in test_data.iterrows():
        try:
            anime_title = row['name']

            # Get actual relevant anime titles (simulated with higher ratings)
            actual_relevant = set(anime_data[anime_data['rating'] >= row['rating']]['name'].tolist())

            # Get recommended anime titles
            recommendations = recommend_func(anime_title, anime_data, genre_matrix, top_n=top_n)
            recommended_set = set(recommendations)

            # Calculate metrics: precision, recall, F1-score
            tp = len(actual_relevant & recommended_set)  # True positives
            fp = len(recommended_set - actual_relevant)  # False positives
            fn = len(actual_relevant - recommended_set)  # False negatives

            precision = tp / (tp + fp) if (tp + fp) > 0 else 0
            recall = tp / (tp + fn) if (tp + fn) > 0 else 0
            f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

            precision_scores.append(precision)
            recall_scores.append(recall)
            f1_scores.append(f1)

        except IndexError:
            # Skip problematic row silently
            pass
        except Exception as e:
            # Handle other exceptions if needed
            print(f"Error occurred at index {idx}: {e}")
            continue  # Optionally continue with next iteration

    # Calculate average scores
    avg_precision = sum(precision_scores) / len(precision_scores) if precision_scores else 0
    avg_recall = sum(recall_scores) / len(recall_scores) if recall_scores else 0
    avg_f1 = sum(f1_scores) / len(f1_scores) if f1_scores else 0

    return avg_precision, avg_recall, avg_f1

# Example usage: Evaluate the recommendation system
try:
    avg_precision, avg_recall, avg_f1 = evaluate_recommendation_system(test_data, anime_data, genre_matrix, recommend_anime, top_n=10)
    print(f"Precision: {avg_precision:.2f}")
    print(f"Recall: {avg_recall:.2f}")
    print(f"F1-score: {avg_f1:.2f}")
except Exception as e:
    print(f"Error occurred during evaluation: {e}")

Precision: 0.50
Recall: 0.00
F1-score: 0.00


Precision: A precision of 0.50 suggests that half of the recommended items are relevant, which could mean the recommendations are somewhat accurate but still have room for improvement.
Recall: A recall of 0.00 indicates that the system failed to recommend any of the relevant items, which is a significant shortcoming because it means the system missed all items that the user would likely find relevant.
F1-score: An F1-score of 0.00 reflects poor overall performance, likely due to the combination of low precision and zero recall. This indicates that the recommendation system needs improvement to provide more accurate and relevant recommendations.


To improve the recommendation system and provide more accurate and relevant recommendations, consider the following strategies:

Enhance Feature Representation:

Genres and Tags: Improve how genres and tags are represented and utilized in the recommendation algorithm. Ensure that each anime's genres and tags are accurately captured and weighted appropriately.
User Preferences: Incorporate more detailed user preferences beyond just ratings. Consider factors like viewing history, favorite genres, and interactions (e.g., likes, bookmarks) to personalize recommendations.
Advanced Algorithms:

Collaborative Filtering: Implement advanced collaborative filtering techniques such as matrix factorization, neighborhood-based methods, or deep learning models (e.g., neural collaborative filtering) to capture complex patterns and relationships in user-item interactions.
Content-Based Filtering: Combine content-based filtering with collaborative filtering to leverage both item features (e.g., genres, ratings) and user behavior for more personalized recommendations.
Data Quality and Preprocessing:

Clean Data: Ensure data quality by handling missing values, outliers, and inconsistencies in the dataset. Use robust preprocessing techniques to standardize and normalize data features.
Feature Engineering: Create new features or transform existing ones (e.g., feature scaling, dimensionality reduction) that enhance the representation of anime and user profiles.
Evaluation and Feedback Loop:

Metrics: Continuously evaluate the recommendation system using metrics like precision, recall, and F1-score. Monitor these metrics over time to assess improvements and identify areas for further enhancement.
User Feedback: Incorporate explicit user feedback (e.g., ratings, reviews, clicks) and implicit feedback (e.g., browsing history, time spent) to iteratively refine recommendations based on real user interactions.

Contextual Recommendations:

Temporal Dynamics: Consider temporal effects in user preferences and item popularity. Adjust recommendations dynamically based on seasonal trends, new releases, or changing user interests.
Contextual Information: Integrate contextual information such as user location, device type, and time of day to deliver more relevant recommendations tailored to specific contexts.

Experimentation and A/B Testing:

Iterative Improvements: Implement a structured approach to experimentation and A/B testing to compare different recommendation algorithms, parameters, and feature combinations. Use insights from experiments to guide further refinements.

Scalability and Performance:

Scalable Solutions: Ensure the recommendation system can handle large datasets efficiently. Consider distributed computing frameworks or cloud-based solutions to scale the system as user base and data volume grow.

By focusing on these areas, we can enhance the recommendation system's accuracy and relevance, ultimately improving user satisfaction and engagement with the platform. Continuous monitoring and adaptation based on user feedback and performance metrics will be crucial in maintaining the system's effectiveness over time.