<a href="https://colab.research.google.com/github/mohammed21kamall/Graduation-Project/blob/main/collaborative.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

def dcg(relevance_scores, b=2):
    dcg_value = 0
    for i, rel in enumerate(relevance_scores):
        if i < b:
            dcg_value += rel
        else:
            dcg_value += rel / np.log2(i + 1)
    return dcg_value

def ndcg(relevance_scores):
    ideal_relevance = sorted(relevance_scores, reverse=True)
    return dcg(relevance_scores) / dcg(ideal_relevance)

def hlu(relevance_scores, d, h=2):
    hlu_value = 0
    for i, score in enumerate(relevance_scores):
        r_id = max(score - d, 0)
        hlu_value += r_id / (2 ** (i / h))
    return hlu_value / len(relevance_scores)

# Example user ratings
user_ratings = [1, 0, 1, 1, 0]  # For DCG and NDCG
average_rating = 3  # For HLU example
relevance_scores = [3, 4, 2, 5, 1]  # Example relevance scores for HLU
d = np.mean(relevance_scores)

# Calculations
print("DCG:", dcg(user_ratings))
print("NDCG:", ndcg(user_ratings))
print("HLU:", hlu(relevance_scores, d))


DCG: 2.1309297535714578
NDCG: 0.8099531166420328
HLU: 0.282842712474619


In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Example user-item rating matrix
data = {
    'User1': {'Item1': 5, 'Item2': 3, 'Item3': 4, 'Item4': np.nan},
    'User2': {'Item1': 3, 'Item2': 1, 'Item3': 2, 'Item4': 3},
    'User3': {'Item1': 4, 'Item2': 2, 'Item3': np.nan, 'Item4': 5},
    'User4': {'Item1': 3, 'Item2': 3, 'Item3': 1, 'Item4': 4},
    'User5': {'Item1': np.nan, 'Item2': 4, 'Item3': 5, 'Item4': 2}
}

df = pd.DataFrame(data).T
print("User-Item Rating Matrix:")
print(df)

# Fill missing values with the user's average rating
df_filled = df.apply(lambda row: row.fillna(row.mean()), axis=1)
print("\nUser-Item Matrix with Filled NaN values:")
print(df_filled)

# Calculate cosine similarity between users
user_similarity = cosine_similarity(df_filled)
user_similarity_df = pd.DataFrame(user_similarity, index=df.index, columns=df.index)
print("\nUser Similarity Matrix:")
print(user_similarity_df)

def predict_rating(user, item, k=2):
    if np.isnan(df.loc[user, item]):
        # Get the indices of the k most similar users
        similar_users = user_similarity_df[user].sort_values(ascending=False).index[1:k+1]
        # Calculate the predicted rating using weighted sum
        numerator = sum(user_similarity_df.loc[user, other_user] * df.loc[other_user, item]
                        for other_user in similar_users if not np.isnan(df.loc[other_user, item]))
        denominator = sum(user_similarity_df.loc[user, other_user]
                          for other_user in similar_users if not np.isnan(df.loc[other_user, item]))
        if denominator == 0:
            return df.loc[user].mean()  # Fallback to the user's average rating if no similar users have rated the item
        return numerator / denominator
    else:
        return df.loc[user, item]

# Predict ratings for all missing values
predictions = df.apply(lambda row: row.index.map(lambda item: predict_rating(row.name, item)), axis=1)
predicted_ratings = pd.DataFrame(predictions.tolist(), index=df.index, columns=df.columns)

print("\nPredicted Ratings:")
print(predicted_ratings)


User-Item Rating Matrix:
       Item1  Item2  Item3  Item4
User1    5.0    3.0    4.0    NaN
User2    3.0    1.0    2.0    3.0
User3    4.0    2.0    NaN    5.0
User4    3.0    3.0    1.0    4.0
User5    NaN    4.0    5.0    2.0

User-Item Matrix with Filled NaN values:
          Item1  Item2     Item3  Item4
User1  5.000000    3.0  4.000000    4.0
User2  3.000000    1.0  2.000000    3.0
User3  4.000000    2.0  3.666667    5.0
User4  3.000000    3.0  1.000000    4.0
User5  3.666667    4.0  5.000000    2.0

User Similarity Matrix:
          User1     User2     User3     User4     User5
User1  1.000000  0.975321  0.976802  0.915475  0.939233
User2  0.975321  1.000000  0.990991  0.916380  0.845524
User3  0.976802  0.990991  1.000000  0.921262  0.872624
User4  0.915475  0.916380  0.921262  1.000000  0.795970
User5  0.939233  0.845524  0.872624  0.795970  1.000000

Predicted Ratings:
          Item1  Item2     Item3     Item4
User1  5.000000    3.0  4.000000  4.000759
User2  3.000000    1.0

In [4]:
import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds
from sklearn.metrics import mean_squared_error

# Example user-item rating matrix
data = {
    'User1': {'Item1': 5, 'Item2': 3, 'Item3': 4, 'Item4': np.nan},
    'User2': {'Item1': 3, 'Item2': 1, 'Item3': 2, 'Item4': 3},
    'User3': {'Item1': 4, 'Item2': 2, 'Item3': np.nan, 'Item4': 5},
    'User4': {'Item1': 3, 'Item2': 3, 'Item3': 1, 'Item4': 4},
    'User5': {'Item1': np.nan, 'Item2': 4, 'Item3': 5, 'Item4': 2}
}

df = pd.DataFrame(data).T
print("User-Item Rating Matrix:")
print(df)

# Fill NaN values with the mean of each user's ratings
df_filled = df.apply(lambda x: x.fillna(x.mean()), axis=1)
print("\nUser-Item Matrix with Filled NaN values:")
print(df_filled)

# Convert the DataFrame to a NumPy array
R = df_filled.values

# Perform Singular Value Decomposition
U, sigma, Vt = svds(R, k=2)  # k is the number of latent factors

# Convert sigma to a diagonal matrix
sigma = np.diag(sigma)

# Reconstruct the predicted rating matrix
predicted_ratings = np.dot(np.dot(U, sigma), Vt)

# Convert the predicted ratings to a DataFrame
predicted_df = pd.DataFrame(predicted_ratings, columns=df.columns, index=df.index)
print("\nPredicted Ratings:")
print(predicted_df)

# Function to predict rating
def predict_rating(user, item):
    if np.isnan(df.loc[user, item]):
        return predicted_df.loc[user, item]
    else:
        return df.loc[user, item]

# Predict ratings for all missing values
predictions = df.apply(lambda row: row.index.map(lambda item: predict_rating(row.name, item)), axis=1)
predicted_ratings = pd.DataFrame(predictions.tolist(), index=df.index, columns=df.columns)

print("\nPredicted Ratings with Filled NaNs:")
predicted_ratings.head()


User-Item Rating Matrix:
       Item1  Item2  Item3  Item4
User1    5.0    3.0    4.0    NaN
User2    3.0    1.0    2.0    3.0
User3    4.0    2.0    NaN    5.0
User4    3.0    3.0    1.0    4.0
User5    NaN    4.0    5.0    2.0

User-Item Matrix with Filled NaN values:
          Item1  Item2     Item3  Item4
User1  5.000000    3.0  4.000000    4.0
User2  3.000000    1.0  2.000000    3.0
User3  4.000000    2.0  3.666667    5.0
User4  3.000000    3.0  1.000000    4.0
User5  3.666667    4.0  5.000000    2.0

Predicted Ratings:
          Item1     Item2     Item3     Item4
User1  4.563644  3.251073  4.068051  4.213206
User2  2.699448  1.584915  1.774392  3.092043
User3  4.318044  2.655268  3.062645  4.733084
User4  3.223603  1.692475  1.744820  4.047576
User5  3.850677  3.652193  5.131313  1.942277

Predicted Ratings with Filled NaNs:


Unnamed: 0,Item1,Item2,Item3,Item4
User1,5.0,3.0,4.0,4.213206
User2,3.0,1.0,2.0,3.0
User3,4.0,2.0,3.062645,5.0
User4,3.0,3.0,1.0,4.0
User5,3.850677,4.0,5.0,2.0
