In [1]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr

# Utility matrix
data = {
    'Northanger Abby': [5, 1, 1, None],
    'Wuthering Heights': [4, 2, 2, 4],
    'Oroonoko': [3, 4, 3, 3],
    "Bondswoman’s Narrative": [4, 5, None, 1]
}
users = ['Alex', 'Loren', 'Taylor', 'Ainsley']
df = pd.DataFrame(data, index=users)

# Helper functions
def user_mean_centered_ratings(user):
    return df.loc[user] - df.loc[user].mean(skipna=True)

def pearson_sim(user1, user2):
    common_ratings = df.loc[[user1, user2]].dropna(axis=1)
    if len(common_ratings.columns) < 2:
        return 0
    return pearsonr(common_ratings.loc[user1], common_ratings.loc[user2])[0]

# (a) User-based collaborative filtering with Pearson and mean-centering
def predict_user_based(target_user, target_item):
    # Compute similarities with other users
    similarities = {}
    for user in users:
        if user != target_user and not pd.isna(df.loc[user, target_item]):
            sim = pearson_sim(target_user, user)
            similarities[user] = sim
    
    # Select users with positive similarity
    similarities = {k:v for k,v in similarities.items() if v > 0}
    if not similarities:
        return df[target_item].mean()  # Fallback to global mean
    
    # Calculate weighted sum of mean-centered ratings
    numerator = 0
    denominator = 0
    target_mean = df.loc[target_user].mean(skipna=True)
    for user, sim in similarities.items():
        user_mean = df.loc[user].mean(skipna=True)
        rating = df.loc[user, target_item]
        numerator += sim * (rating - user_mean)
        denominator += abs(sim)
    
    if denominator == 0:
        return target_mean
    return target_mean + (numerator / denominator)

# (b) Item-based collaborative filtering with adjusted cosine similarity
def adjusted_cosine(item1, item2):
    # Subtract user mean for each rating
    common_users = df[[item1, item2]].dropna().index
    if len(common_users) == 0:
        return 0
    adjusted_ratings = []
    for user in common_users:
        user_mean = df.loc[user].mean(skipna=True)
        adj1 = df.loc[user, item1] - user_mean
        adj2 = df.loc[user, item2] - user_mean
        adjusted_ratings.append((adj1, adj2))
    a = np.array([x[0] for x in adjusted_ratings])
    b = np.array([x[1] for x in adjusted_ratings])
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-10)

def predict_item_based(target_user, target_item):
    # Find similar items
    similarities = {}
    for item in df.columns:
        if item != target_item and not pd.isna(df.loc[target_user, item]):
            sim = adjusted_cosine(target_item, item)
            similarities[item] = sim
    
    # Calculate weighted average
    numerator = 0
    denominator = 0
    for item, sim in similarities.items():
        rating = df.loc[target_user, item]
        numerator += sim * rating
        denominator += abs(sim)
    
    if denominator == 0:
        return df[target_item].mean()
    return numerator / denominator

# Predict missing values
print("Question 1a: User-based predictions")
print("Taylor's Bondswoman’s Narrative:", predict_user_based('Taylor', "Bondswoman’s Narrative"))
print("Ainsley's Northanger Abby:", predict_user_based('Ainsley', 'Northanger Abby'))

print("\nQuestion 1b: Item-based predictions")
print("Taylor's Bondswoman’s Narrative:", predict_item_based('Taylor', "Bondswoman’s Narrative"))
print("Ainsley's Northanger Abby:", predict_item_based('Ainsley', 'Northanger Abby'))

Question 1a: User-based predictions
Taylor's Bondswoman’s Narrative: 4.0
Ainsley's Northanger Abby: 2.3333333333333335

Question 1b: Item-based predictions
Taylor's Bondswoman’s Narrative: -0.753552340806314
Ainsley's Northanger Abby: -0.1721605090644537


In [2]:
# (a) Sample dictionary
dataset = {
    'Rahul': {'Special Ops': 5, 'Criminal Justice': 3, 'Panchayat': 3, 'Sacred Games': 3, 'Apharan': 2, 'Mirzapur': 3},
    'Rishabh': {'Special Ops': 5, 'Criminal Justice': 3, 'Sacred Games': 5, 'Panchayat': 5, 'Mirzapur': 3, 'Apharan': 3},
    'Sonali': {'Special Ops': 2, 'Panchayat': 5, 'Sacred Games': 3, 'Mirzapur': 4},
    'Ritvik': {'Panchayat': 5, 'Mirzapur': 4, 'Sacred Games': 4},
    'Harshita': {'Special Ops': 4, 'Criminal Justice': 4, 'Panchayat': 4, 'Mirzapur': 3, 'Apharan': 2},
    'Shubhi': {'Special Ops': 3, 'Panchayat': 4, 'Mirzapur': 3, 'Sacred Games': 5, 'Apharan': 3},
    'Shaurya': {'Panchayat': 4, 'Apharan': 1, 'Sacred Games': 4}
}

# (b) Unique web series
def unique_series(data):
    series = set()
    for user in data.values():
        series.update(user.keys())
    return sorted(series)
print("\nQuestion 2b: Unique series:", unique_series(dataset))

# (c) Cosine similarity between two items
def cosine_sim(item1, item2, data):
    # Collect ratings for users who rated both items
    common_users = []
    for user, ratings in data.items():
        if item1 in ratings and item2 in ratings:
            common_users.append((ratings[item1], ratings[item2]))
    if not common_users:
        return 0
    a = np.array([x[0] for x in common_users])
    b = np.array([x[1] for x in common_users])
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-10)

# (d) Similarity between target and others
def item_similarities(target_item, data):
    items = unique_series(data)
    similarities = {}
    for item in items:
        if item != target_item:
            similarities[item] = cosine_sim(target_item, item, data)
    return similarities

# (f) Seen and unseen series
def seen_unseen(user, data):
    seen = set(data[user].keys())
    all_series = unique_series(data)
    unseen = [s for s in all_series if s not in seen]
    return seen, unseen

# (e & g) Recommender function
def recommend(user, data, top_n=3):
    seen, unseen = seen_unseen(user, data)
    item_scores = {}
    for seen_item in seen:
        sims = item_similarities(seen_item, data)
        for unseen_item, sim in sims.items():
            if unseen_item in unseen:
                if unseen_item not in item_scores:
                    item_scores[unseen_item] = 0
                item_scores[unseen_item] += sim * data[user][seen_item]
    # Sort by score
    sorted_items = sorted(item_scores.items(), key=lambda x: x[1], reverse=True)
    return [item[0] for item in sorted_items[:top_n]]

# Example usage
print("\nQuestion 2e/g: Recommendations for Ritvik:", recommend('Ritvik', dataset))


Question 2b: Unique series: ['Apharan', 'Criminal Justice', 'Mirzapur', 'Panchayat', 'Sacred Games', 'Special Ops']

Question 2e/g: Recommendations for Ritvik: ['Criminal Justice', 'Apharan', 'Special Ops']


In [3]:
from sklearn.decomposition import TruncatedSVD, PCA
from sklearn.impute import SimpleImputer

data = np.array([[5, 3, 0, 1], [4, 0, 0, 1], [1, 1, 0, 5], [0, 3, 4, 0]])

# (a) Matrix factorization (using SVD as approximation)
def matrix_factorization_prediction(data, rank=2):
    # Impute missing values (assuming 0 is missing)
    imputer = SimpleImputer(missing_values=0, strategy='mean')
    data_imputed = imputer.fit_transform(data)
    # Perform SVD
    U, sigma, Vt = np.linalg.svd(data_imputed, full_matrices=False)
    # Reconstruct matrix
    reconstructed = U[:, :rank] @ np.diag(sigma[:rank]) @ Vt[:rank, :]
    return reconstructed

# (b) SVD prediction
svd_reconstructed = matrix_factorization_prediction(data)
print("\nQuestion 3b SVD Predictions:\n", svd_reconstructed)

# (c) PCA prediction
def pca_prediction(data, n_components=2):
    # Center data
    data_centered = data - np.mean(data, axis=0)
    # Impute missing (0s)
    imputer = SimpleImputer(missing_values=0, strategy='mean')
    data_imputed = imputer.fit_transform(data_centered)
    # Apply PCA
    pca = PCA(n_components=n_components)
    reduced = pca.fit_transform(data_imputed)
    reconstructed = pca.inverse_transform(reduced)
    return reconstructed

pca_reconstructed = pca_prediction(data)
print("\nQuestion 3c PCA Predictions:\n", pca_reconstructed)


Question 3b SVD Predictions:
 [[4.83112638 3.10573185 4.14043801 0.89496859]
 [4.0222121  2.63300547 3.716315   1.15259687]
 [0.92159828 1.15258946 3.97766205 4.99704519]
 [3.5578411  2.4781175  4.1357998  2.30420504]]

Question 3c PCA Predictions:
 [[ 2.32008098  0.04686596 -0.88138367 -1.22292937]
 [ 1.71700535 -0.29886649 -1.14306647 -0.1795867 ]
 [-1.55063606 -1.08860774 -0.96661685  3.11689966]
 [-2.48645026  1.34060827  2.99106698 -1.71438359]]
