In [7]:
# Import necessary libraries
import numpy as np
import pandas as pd
from scipy.spatial.distance import mahalanobis

# Function to calculate Mahalanobis distance
def mahalanobis_distance(u, v, VI):
    return mahalanobis(u, v, VI)

# Collaborative filtering function using Mahalanobis distance
def collaborative_filtering(data, user_id, VI):
    # Calculate the Mahalanobis distance between the target user and all other users
    distances = []
    target_user = data.loc[user_id].values
    for other_user_id in data.index:
        if other_user_id != user_id:
            other_user = data.loc[other_user_id].values
            distance = mahalanobis_distance(target_user, other_user, VI)
            distances.append((other_user_id, distance))
    
    # Sort distances
    distances.sort(key=lambda x: x[1])
    
    # Return the sorted list of distances
    return distances


## Load

In [8]:
features = pd.read_csv("../data/features/user_item.csv")
features.head()

Unnamed: 0,profile_id,offer_0,offer_1,offer_2,offer_3,offer_4,offer_5,offer_6,offer_7,offer_8,offer_9
0,0009655768c64bdeb2e877511632db8f,0,4,0,4,0,3,0,3,0,0
1,00116118485d4dfda04fdbaba9a87b5c,0,0,0,8,0,0,0,0,0,0
2,0011e0d4e6b944f998e987f904e8c1e5,4,0,0,0,0,3,3,3,0,2
3,0020c2b971eb4e9188eac86d93036a77,0,4,4,0,0,3,0,0,0,0
4,0020ccbbb6d84e358d3414a3ff76cffd,4,0,0,4,0,3,3,0,0,0


## Train

In [9]:
# Sample data
data = pd.DataFrame({
    'user_id': [1, 2, 3, 4],
    'item_1': [5, 4, 1, 2],
    'item_2': [3, 2, 5, 1],
    'item_3': [1, 2, 3, 4]
}).set_index('user_id')

# Calculate the inverse covariance matrix
VI = np.linalg.inv(np.cov(data.T))

# Use the collaborative filtering function
user_id = 1
distances = collaborative_filtering(data, user_id, VI)

# Display the results
print("Distances from user", user_id, "to other users:")
for other_user_id, distance in distances:
    print(f"User {other_user_id}: {distance}")

Distances from user 1 to other users:
User 3: 2.4494897427831317
User 4: 2.4494897427831317
User 2: 2.4494897427832827
