In [1]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import cosine, euclidean
from scipy.stats import pearsonr

In [2]:
#Sample user rating data on a scale of 5
user1 = np.array([4,5,2,3,4])
user2 = np.array([5,3,2,4,5])


In [3]:
#1. Cosine Similarity; (1 - cosine distance) gives similarity)
cosine_similarity = 1 - cosine(user1, user2)
print(f"Cosine Similarity: {cosine_similarity:.4f}")

Cosine Similarity: 0.9548


In [4]:
#2. Pearson Correlation Similarity which returns correlation coefficient 
pearson_corr, _= pearsonr(user1, user2)
print(f"Pearson Correlation Similarity: {pearson_corr:.4f}")


Pearson Correlation Similarity: 0.4372


In [5]:
#3. Euclidean Distance Similarity
euclidean_distance = euclidean(user1, user2)

#Normalize to similarity range [0,1]
euclidean_similarity = 1 / (1 + euclidean_distance)
print(F"Euclidean Distance Similarity: {euclidean_similarity:.4f}")

Euclidean Distance Similarity: 0.2743


In [18]:
import pandas as pd
ratings = [
    [5, 4, 3, 5, 2],
    [4, 5, 4, 3, 3],
    [3, 4, 5, 2, 4],
    [2, 2, 1, 2, 4]
]
users = ["Limitha", "Harshitha", "Deepika", "Sanjana"]
df = pd.DataFrame(ratings, index=users, columns=["Bahubali","Mufasa","Interstellar","RRR","Mrs"])
print(df)

           Bahubali  Mufasa  Interstellar  RRR  Mrs
Limitha           5       4             3    5    2
Harshitha         4       5             4    3    3
Deepika           3       4             5    2    4
Sanjana           2       2             1    2    4


In [30]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import cosine, euclidean

#User-item rating matrix for 4 users
ratings = np.array([
    [5, 3, 4, 4, 2],  # User A
    [3, 1, 2, 3, 3],  # User B
    [4, 3, 4, 5, 1],  # User C
    [2, 2, 1, 2, 4]   # User D
])
users = ["Limmi", "Charith", "Sai", "Likhitha"]
df = pd.DataFrame(ratings, index=users, columns=["Bahubali","Mufasa","Interstellar","RRR","Mrs"])
print(df)

# Function to compute similarity
def compute_similarity(df):
    num_users = df.shape[0]
    similarity_results = []
    for i in range(num_users):
        for j in range(i + 1, num_users):  # Avoid redundant pairs
            user1, user2 = df.iloc[i], df.iloc[j]

            # Cosine Similarity
            cos_sim = 1 - cosine(user1, user2)

            # Pearson Correlation Similarity
            pearson_sim, _ = pearsonr(user1, user2)

            # Euclidean Distance Similarity
            euc_dist = euclidean(user1, user2)
            euc_sim = 1 / (1 + euc_dist)  # Normalize to [0,1]

            similarity_results.append([users[i], users[j], cos_sim, pearson_sim, euc_sim])

    return pd.DataFrame(similarity_results, columns=["User 1", "User 2", "Cosine Similarity", "Pearson Correlation", "Euclidean Similarity"])

# Compute similarity matrix
similarity_df = compute_similarity(df)

# Display results
print(similarity_df)

          Bahubali  Mufasa  Interstellar  RRR  Mrs
Limmi            5       3             4    4    2
Charith          3       1             2    3    3
Sai              4       3             4    5    1
Likhitha         2       2             1    2    4
    User 1    User 2  Cosine Similarity  Pearson Correlation  \
0    Limmi   Charith           0.929670             0.196116   
1    Limmi       Sai           0.978337             0.838557   
2    Limmi  Likhitha           0.799014            -0.720577   
3  Charith       Sai           0.885465             0.036860   
4  Charith  Likhitha           0.919145             0.408248   
5      Sai  Likhitha           0.725961            -0.812605   

   Euclidean Similarity  
0              0.210897  
1              0.366025  
2              0.161390  
3              0.195194  
4              0.309017  
5              0.150221  
