In [1]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
ratings = pd.read_csv("ratings.csv")
ratings.head()

Unnamed: 0,userId,layerId,rating
0,1,1,5
1,1,3,2
2,1,6,5
3,1,47,5
4,1,50,5


In [3]:
layers = pd.read_csv("layers.csv")
layers.head()

Unnamed: 0,layerId,Name
0,1,ابتسام
1,2,ابتهاج
2,3,ابتهال
3,4,اجتهاد
4,5,ازدهار


In [4]:
n_ratings = len(ratings)
n_layers = len(ratings['layerId'].unique())
n_users = len(ratings['userId'].unique())
  
print(f"Number of ratings: {n_ratings}")
print(f"Number of unique layerId's: {n_layers}")
print(f"Number of unique users: {n_users}")
print(f"Average ratings per user: {round(n_ratings/n_users, 2)}")
print(f"Average ratings per layer: {round(n_ratings/n_layers, 2)}")

Number of ratings: 1405
Number of unique layerId's: 1024
Number of unique users: 14
Average ratings per user: 100.36
Average ratings per layer: 1.37


In [5]:
user_freq = ratings[['userId', 'layerId']].groupby('userId').count().reset_index()
user_freq.columns = ['userId', 'n_ratings']
user_freq.head()

Unnamed: 0,userId,n_ratings
0,1,232
1,2,29
2,3,39
3,4,216
4,5,44


In [6]:
mean_rating = ratings.groupby('layerId')[['rating']].mean()
# Lowest rated layers
lowest_rated = mean_rating['rating'].idxmin()
layers.loc[layers['layerId'] == lowest_rated]

Unnamed: 0,layerId,Name
47,52,أفراح


In [7]:
# Highest rated layers
highest_rated = mean_rating['rating'].idxmax()
layers.loc[layers['layerId'] == highest_rated]

Unnamed: 0,layerId,Name
0,1,ابتسام


In [8]:
# show number of people who rated layers highest
ratings[ratings['layerId']==highest_rated]

Unnamed: 0,userId,layerId,rating
0,1,1,5
516,5,1,5
874,7,1,5


In [9]:
# show number of people who rated layers lowest
ratings[ratings['layerId']==lowest_rated]

Unnamed: 0,userId,layerId,rating
304,4,52,1


In [10]:
## the above layers has very low dataset. We will use bayesian average
layer_stats = ratings.groupby('layerId')[['rating']].agg(['count', 'mean'])
layer_stats.columns = layer_stats.columns.droplevel()

In [11]:
# Now, we create user-item matrix using scipy csr matrix
from scipy.sparse import csr_matrix

In [12]:
def create_matrix(df):
      
    N = len(df['userId'].unique())
    M = len(df['layerId'].unique())
      
    # Map Ids to indices
    user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
    layer_mapper = dict(zip(np.unique(df["layerId"]), list(range(M))))
      
    # Map indices to IDs
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"])))
    layer_inv_mapper = dict(zip(list(range(M)), np.unique(df["layerId"])))
      
    user_index = [user_mapper[i] for i in df['userId']]
    layer_index = [layer_mapper[i] for i in df['layerId']]
  
    X = csr_matrix((df["rating"], (layer_index, user_index)), shape=(M, N))
      
    return X, user_mapper, layer_mapper, user_inv_mapper, layer_inv_mapper

In [13]:
X, user_mapper, layer_mapper, user_inv_mapper, layer_inv_mapper = create_matrix(ratings)

In [14]:
X.data

array([5, 5, 5, ..., 5, 5, 5], dtype=int64)

In [15]:
from sklearn.neighbors import NearestNeighbors

In [16]:
"""
Find similar layers using KNN
"""
def find_similar_layers(layer_id, X, k, metric='cosine', show_distance=False):
      
    neighbour_ids = []
      
    layer_ind = layer_mapper[layer_id]
    layer_vec = X[layer_ind]
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    layer_vec = layer_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(layer_vec, return_distance=show_distance)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(layer_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids

In [19]:
layers_names = dict(zip(layers['layerId'], layers['Name']))
  
layer_id = 10
  
similar_ids = find_similar_layers(layer_id, X, k=10)
layer_name = layers_names[layer_id]

In [20]:

for i in similar_ids:
    print('المحامي'+ '/ ' + layers_names[i])
    

المحامي/ ديمة
المحامي/ عذاري
المحامي/ خاتمة
المحامي/ كريمة
المحامي/ سلوى
المحامي/ رقية
المحامي/ قمر
المحامي/ تغاني
المحامي/ حياة
المحامي/ لبنى
