# Recomendation system

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler

In [20]:
# Load the dataset
anime = pd.read_csv('C:\\db\\anime.csv')
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


# Handle missing values

In [15]:
anime.fillna({'genre': '', 'rating': anime['rating'].mean(), 'episodes': 0}, inplace=True)

# Feature Extraction

In [17]:
# Normalize numerical features
scaler = StandardScaler()
anime['rating'] = scaler.fit_transform(anime[['rating']])

In [18]:
# Combine features
features = pd.concat([genre_df, anime[['rating']]], axis=1)

In [None]:
# Convert 'genre' to numerical representation using one-hot encoding
mlb = MultiLabelBinarizer()
anime['genre'] = anime['genre'].apply(lambda x: x.split(', ') if x else [])
genre_encoded = mlb.fit_transform(anime['genre'])
genre_df = pd.DataFrame(genre_encoded, columns=mlb.classes_)

In [None]:
# Normalize numerical features
scaler = StandardScaler()
anime['rating'] = scaler.fit_transform(anime[['rating']])

In [None]:
# Combine features
features = pd.concat([genre_df, anime[['rating']]], axis=1)

In [None]:
# Compute cosine similarity on training set
train_features = features.loc[train.index]
cos_sim = cosine_similarity(train_features)

# Recommendation function based on cosine similarity
def recommend_anime(anime_id, df=train, features=train_features, cosine_sim=cos_sim, top_n=10):
    idx = df.index[df['anime_id'] == anime_id].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    anime_indices = [i[0] for i in sim_scores]
    return df.iloc[anime_indices]

In [None]:
"""# Evaluation
train, test = train_test_split(anime, test_size=0.2, random_state=42)
def recommend_anime(anime_id, cosine_sim=cos_sim, df=anime, top_n=10):
    idx = df.index[df['anime_id'] == anime_id].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    anime_indices = [i[0] for i in sim_scores]
    return df[['anime_id', 'name']].iloc[anime_indices]
"""
def recommend_anime(anime_id, cosine_sim=cos_sim, df=anime, top_n=10):
    if anime_id not in df['anime_id'].values:
        return pd.DataFrame()  # Return empty DataFrame if anime_id not in training set
    
    idx = df.index[df['anime_id'] == anime_id].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    anime_indices = [i[0] for i in sim_scores]
    return df[['anime_id', 'name']].iloc[anime_indices]

In [None]:
# Example Recommendation
recommended_anime = recommend_anime(anime_id=9963, top_n=10)
print(recommended_anime)

In [24]:
# Split dataset into training and testing sets
train, test = train_test_split(anime, test_size=0.2, random_state=42)

# Initialize lists to store ground truth and predictions
ground_truth = []
predictions = []

# Mean rating to determine relevance
mean_rating = train['rating'].mean()

for _, row in test.iterrows():
    anime_id = row['anime_id']
    
    # Skip anime not present in the training set
    if anime_id not in train['anime_id'].values:
        continue
    
    recommended_anime = recommend_anime(anime_id, top_n=10)
    
    if recommended_anime.empty:
        continue
    
    actual_ratings = test[test['anime_id'].isin(recommended_anime['anime_id'])]['rating']
    true_relevant = (actual_ratings > mean_rating).astype(int).tolist()
    
    ground_truth.extend(true_relevant)
    predictions.extend([1] * len(true_relevant))

# Include non-relevant predictions to avoid all 1s in predictions
if not predictions:
    predictions = [0]

# Include non-relevant items in ground truth to avoid all 1s
if not ground_truth:
    ground_truth = [0]

# Compute precision, recall, and F1-score

from sklearn.metrics import precision_score, recall_score, f1_score
precision = precision_score(ground_truth, predictions, average='macro', zero_division=0)
recall = recall_score(ground_truth, predictions, average='macro', zero_division=0)
f1 = f1_score(ground_truth, predictions, average='macro', zero_division=0)

print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-Score: {f1:.2f}')

Precision: 1.00
Recall: 1.00
F1-Score: 1.00


In [None]:
1. Difference Between User-Based and Item-Based Collaborative Filtering
User-Based Collaborative Filtering:

Concept: This approach recommends items to a user based on the preferences of other users who have similar tastes.
Process:
Identify Similar Users: Calculate the similarity between users using measures like cosine similarity, Pearson correlation, or Euclidean distance based on their ratings of items.
Find Neighbors: Select a subset of users (neighbors) who are most similar to the target user.
Aggregate Preferences: Aggregate the ratings of these neighbors to generate recommendations. This can involve taking a weighted average of the neighbors' ratings for items the target user hasn't rated.
Example: If User A and User B both like item X and User A also likes item Y, then item Y might be recommended to User B.
Item-Based Collaborative Filtering:

Concept: This approach recommends items to a user based on the similarity between items.
Process:
Identify Similar Items: Calculate the similarity between items using measures like cosine similarity, Pearson correlation, or adjusted cosine similarity based on the ratings given by users.
Build Item Profiles: Create a profile for each item based on the ratings it has received from all users.
Generate Recommendations: For a target user, look at the items they have rated highly and recommend similar items they haven't rated yet.
Example: If many users who liked item X also liked item Y, then item Y might be recommended to a user who liked item X.

In [None]:
2. What is Collaborative Filtering, and How Does It Work?
Collaborative Filtering: Collaborative filtering is a technique used in recommendation systems to predict the preferences of a user by collecting preferences from many users. The underlying assumption is that if users agreed in the past, they will agree in the future, and that users' preferences are related to those of others.

How It Works:

Data Collection:

Gather user-item interaction data, which can be explicit (e.g., ratings, likes) or implicit (e.g., clicks, purchase history).
Similarity Computation:

Calculate the similarity between users or items. Common similarity measures include:
Cosine Similarity: Measures the cosine of the angle between two vectors of ratings.
Pearson Correlation: Measures the linear correlation between two sets of ratings.
Jaccard Similarity: Measures the similarity between two sets of binary data.
Neighborhood Selection:

Identify a set of similar users (user-based) or items (item-based) to form a neighborhood.
Prediction Generation:

Aggregate the preferences of the neighbors to predict the rating for an unrated item. This can be done using methods like weighted average, where weights are the similarity scores.
User-Based Prediction: Predict the rating of user ( u ) for item ( i ) by looking at ratings of similar users for item ( i ).
Item-Based Prediction: Predict the rating of user ( u ) for item ( i ) by looking at user's ratings for similar items to ( i ).
Recommendation:

Recommend items with the highest predicted ratings or those that the user is most likely to interact with.
Applications:

Online retail (e.g., recommending products on Amazon)
Streaming services (e.g., movie recommendations on Netflix)
Social media (e.g., suggesting friends or content on Facebook)