<a href="https://colab.research.google.com/github/kirmanioussema12/Systeme-de-recommendation/blob/main/Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
train_data = pd.read_csv("u1.base", sep="\t", names=["user_id", "item_id", "rating", "timestamp"])
test_data = pd.read_csv("u1.test", sep="\t", names=["user_id", "item_id", "rating", "timestamp"])
train_data = train_data.drop(columns=["timestamp"])
test_data = test_data.drop(columns=["timestamp"])

In [None]:
# Générer une matrice utilisateur-item pour les données d'apprentissage
vote_matrix = train_data.pivot(index="user_id", columns="item_id", values="rating").fillna(0)


In [None]:
vote_matrix_nan = vote_matrix.replace(0, np.nan)
print(vote_matrix_nan)

item_id  1     2     3     4     5     6     7     8     9     10    ...  \
user_id                                                              ...   
1         5.0   3.0   4.0   3.0   3.0   NaN   4.0   1.0   5.0   NaN  ...   
2         4.0   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   2.0  ...   
3         NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...   
4         NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...   
5         NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...   
...       ...   ...   ...   ...   ...   ...   ...   ...   ...   ...  ...   
939       NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   5.0   NaN  ...   
940       NaN   NaN   NaN   2.0   NaN   NaN   4.0   5.0   3.0   NaN  ...   
941       5.0   NaN   NaN   NaN   NaN   NaN   4.0   NaN   NaN   NaN  ...   
942       NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...   
943       NaN   5.0   NaN   NaN   NaN   NaN   NaN   NaN   3.0   NaN  ...   

item_id  16

In [None]:
def pearson_similarity_with_nan(user1, user2, min_common_items=7):
    # Récupérer les indices des items co-évalués sans NaN
    common_items = vote_matrix_nan.loc[user1].notna() & vote_matrix_nan.loc[user2].notna()

    # Vérifier qu'il y a suffisamment d'items en commun
    if common_items.sum() < min_common_items:
        return 0  # Retourne 0 si moins de 7 items en commun

    # Extraire les votes communs et calculer la similarité de Pearson
    ratings1 = vote_matrix_nan.loc[user1, common_items]
    ratings2 = vote_matrix_nan.loc[user2, common_items]
    return ratings1.corr(ratings2)

# Calculer la matrice de similarité en utilisant cette fonction
num_users = vote_matrix_nan.shape[0]
similarity_matrix = np.zeros((num_users, num_users))

for u in range(num_users):
    for v in range(u + 1, num_users):
        sim = pearson_similarity_with_nan(u + 1, v + 1)
        similarity_matrix[u, v] = sim
        similarity_matrix[v, u] = sim

np.fill_diagonal(similarity_matrix, 1)  # La diagonale est 1 car la similarité avec soi-même est maximale



  c /= stddev[:, None]
  c /= stddev[None, :]


In [None]:
def get_k_nearest_neighbors(user_id, k):
    user_index = user_id - 1  # Pour obtenir l'index dans la matrice
    similarities = similarity_matrix[user_index]
    sorted_neighbors = np.argsort(similarities)[::-1]  # Tri par similarité décroissante
    sorted_neighbors = [neighbor for neighbor in sorted_neighbors if similarities[neighbor] > 0]
    return sorted_neighbors[:k]


In [None]:
# Predict rating function for a single user-item pair remains unchanged
def predict_rating(user_id, item_id, k):
    neighbors = get_k_nearest_neighbors(user_id, k)
    numerator = 0
    denominator = 0

    for neighbor in neighbors:
        neighbor_id = neighbor + 1  # Mapping index to user_id
        rating = vote_matrix_nan.loc[neighbor_id, item_id]

        if not np.isnan(rating):  # Ignore NaN values
            similarity = similarity_matrix[user_id - 1, neighbor]
            numerator += similarity * rating
            denominator += similarity

    if denominator == 0:
        return np.nan  # Return NaN if no neighbors voted for this item
    return numerator / denominator

# Function to predict all NaN values for all users and items
def predict_all_ratings(vote_matrix_nan, k):
    # Copy the matrix to avoid modifying the original matrix
    predicted_matrix = vote_matrix_nan.copy()

    # Iterate over each user and item
    for user_id in vote_matrix_nan.index:
        for item_id in vote_matrix_nan.columns:
            # Predict only if the current rating is NaN
            if pd.isna(vote_matrix_nan.loc[user_id, item_id]):
                predicted_matrix.loc[user_id, item_id] = predict_rating(user_id, item_id, k)

    return predicted_matrix

# Set number of neighbors (e.g., k=10)
k = 10

# Call the function to predict all NaN values in the vote matrix
predicted_matrix = predict_all_ratings(vote_matrix_nan, k)

# Display the matrix with all predicted values filled in
print(predicted_matrix)
user_id = 3  # Remplacer par un identifiant d'utilisateur
item_id = 7  # Remplacer par un identifiant d'item
print(f"Prédiction de vote pour l'utilisateur {user_id} et l'item {item_id}: {predicted_rating}")

item_id      1         2        3         4     5     6         7     \
user_id                                                                
1        5.000000  3.000000  4.00000  3.000000   3.0   NaN  4.000000   
2        4.000000  2.000000      NaN  3.517010   3.0   NaN  4.486163   
3        3.507951  3.000000  3.53474  3.000000   5.0   NaN  3.578473   
4        4.000000  3.303905  4.00000  2.742059   4.0   NaN  4.198756   
5        4.444125  3.000000      NaN  3.000000   2.0   NaN  4.659708   
...           ...       ...      ...       ...   ...   ...       ...   
939      3.246592  3.000000  3.00000  3.000000   NaN   NaN  4.658906   
940      3.000000       NaN      NaN  2.000000   4.0   NaN  4.000000   
941      5.000000  4.000000  1.00000  1.000000   NaN   NaN  4.000000   
942      4.343375       NaN  4.00000       NaN   4.0   NaN  4.483315   
943      4.194672  5.000000      NaN       NaN   NaN   NaN  3.965452   

item_id      8         9         10    ...  1673  1674  1675  1