# Recommender System (suggest best-fit blogs that can help user)

Instead of storing the ratings from users, we want to measure how each user care about a blog by calculating the time user spend on a specific blog (backend processing with flask datetime.utcnow()).

For example: If a user spend many time reading a blog about "How to overcome depression", we will suggest they should also read blogs "How to overcome loneliness" because other users reading the blog about depression also read blogs about loneliness (Perhaps, depression usually links with loneliness). We can find the relatioship by using neighbourhood user-user collaborative filtering

## Update user (column) and blog (row) for utility matrix

This is how the website backend updates the `utility_matrixx.csv`file from which we will perform operations to yield the best-fit suggestion for users

In [21]:
import pandas as pd
import numpy as np

In [22]:
df = np.random.randint(low=1, high=11, size=(3,3))
df = pd.DataFrame(df)
df.to_csv("utility_matrix.csv", index=False)

In [23]:
num_cols = len(df.columns)
num_rows = len(df.index)
num_cols, num_rows

(3, 3)

In [None]:
csv_path = '/content/utility_matrix.csv'
df2 = pd.read_csv(csv_path)

In [None]:
num_users = 4
num_blogs = 4

num_cols = len(df2.columns)
num_rows = len(df2.index)

if num_users > num_cols:
        temp_col = [0] * num_rows
        temp_col = pd.DataFrame(temp_col)
        df2 = pd.concat([df2, temp_col], axis=1)
        df2 = df2.to_numpy()
        df2 = pd.DataFrame(df2)
        df2.to_csv(csv_path, index=False)

if num_blogs > num_rows:
        temp_col = [0] * num_cols
        temp_col = pd.DataFrame(temp_col)
        df2 = pd.concat([df2.T, temp_col], axis=1)
        df2 = df2.T.to_numpy()
        df2 = pd.DataFrame(df2)
        df2.to_csv(csv_path, index=False)

## Neighborhood-Based Collaborative Filtering

<br>
<center><img src=https://machinelearningcoban.com/assets/24_collaborativefiltering/user_cf.png></center>

In [45]:
import pandas as pd
import numpy as np

In [46]:
# import the utility matrix from (data retrieved from website)
# my website will update this utility matrix whenever user register account, create new blog, and read the blog
utility_matrix = pd.read_csv("/content/utility_matrix.csv")
df = utility_matrix

Or you can run this code to make a simulation of data:



```python
np.random.seed(42)

# in my current website, the num_users is 19 and num_blogs is 29.
df = np.random.randint(low=1, high=900, size=(29,19))
df = pd.DataFrame(df)

mask = np.random.choice([0, 1], size=df.shape, p=[0.3, 0.7])
df_with_zeros = df * mask
df_with_zeros
```



In [47]:
utility_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,103,436,861,271,0,0,0,21,615,122,0,215,331,459,88,373,0,872,664
1,0,0,0,0,344,492,414,0,386,192,0,161,460,314,22,0,748,857,561
2,475,59,0,0,476,0,783,190,687,563,876,567,244,832,505,131,485,0,0
3,0,0,0,274,0,601,316,0,242,0,0,565,898,340,92,367,0,0,509
4,776,35,206,81,562,872,388,2,390,566,106,0,822,477,703,402,730,556,0
5,202,0,863,816,271,456,462,727,0,702,296,725,720,749,0,879,0,792,217
6,764,188,0,0,41,157,0,0,65,0,839,521,0,0,648,472,63,139,499
7,593,392,675,0,289,379,773,490,0,41,0,135,0,0,0,33,48,503,407
8,574,0,805,0,684,0,726,0,739,613,462,0,769,5,218,503,767,398,871
9,795,393,0,0,858,554,892,461,0,0,864,743,241,0,96,734,485,407,231


In [48]:
# replace any 0 value to NaN values (which are ? in the table)
utility_matrix.replace(0, np.nan, inplace=True)
utility_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,103.0,436.0,861.0,271.0,,,,21.0,615.0,122.0,,215.0,331.0,459.0,88.0,373.0,,872.0,664.0
1,,,,,344.0,492.0,414.0,,386.0,192.0,,161.0,460.0,314.0,22.0,,748.0,857.0,561.0
2,475.0,59.0,,,476.0,,783.0,190.0,687.0,563.0,876.0,567.0,244.0,832.0,505.0,131.0,485.0,,
3,,,,274.0,,601.0,316.0,,242.0,,,565.0,898.0,340.0,92.0,367.0,,,509.0
4,776.0,35.0,206.0,81.0,562.0,872.0,388.0,2.0,390.0,566.0,106.0,,822.0,477.0,703.0,402.0,730.0,556.0,
5,202.0,,863.0,816.0,271.0,456.0,462.0,727.0,,702.0,296.0,725.0,720.0,749.0,,879.0,,792.0,217.0
6,764.0,188.0,,,41.0,157.0,,,65.0,,839.0,521.0,,,648.0,472.0,63.0,139.0,499.0
7,593.0,392.0,675.0,,289.0,379.0,773.0,490.0,,41.0,,135.0,,,,33.0,48.0,503.0,407.0
8,574.0,,805.0,,684.0,,726.0,,739.0,613.0,462.0,,769.0,5.0,218.0,503.0,767.0,398.0,871.0
9,795.0,393.0,,,858.0,554.0,892.0,461.0,,,864.0,743.0,241.0,,96.0,734.0,485.0,407.0,231.0


In [49]:
# compute the mean value of each column (see the formula on the image above)
mean = utility_matrix.mean(skipna=True)
mean

0     507.904762
1     418.764706
2     577.000000
3     504.222222
4     492.454545
5     474.400000
6     437.789474
7     396.650000
8     421.850000
9     472.545455
10    478.578947
11    438.300000
12    511.863636
13    431.700000
14    448.050000
15    506.428571
16    405.380952
17    487.173913
18    475.000000
dtype: float64

In [50]:
# then we subtract all values from one column by its mean value
utility_matrix = utility_matrix.sub(mean, axis=1)

# Next we fill NaN value = 0 -> the mean value
utility_matrix = utility_matrix.fillna(0)
utility_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,-404.904762,17.235294,284.0,-233.222222,0.0,0.0,0.0,-375.65,193.15,-350.545455,0.0,-223.3,-180.863636,27.3,-360.05,-133.428571,0.0,384.826087,189.0
1,0.0,0.0,0.0,0.0,-148.454545,17.6,-23.789474,0.0,-35.85,-280.545455,0.0,-277.3,-51.863636,-117.7,-426.05,0.0,342.619048,369.826087,86.0
2,-32.904762,-359.764706,0.0,0.0,-16.454545,0.0,345.210526,-206.65,265.15,90.454545,397.421053,128.7,-267.863636,400.3,56.95,-375.428571,79.619048,0.0,0.0
3,0.0,0.0,0.0,-230.222222,0.0,126.6,-121.789474,0.0,-179.85,0.0,0.0,126.7,386.136364,-91.7,-356.05,-139.428571,0.0,0.0,34.0
4,268.095238,-383.764706,-371.0,-423.222222,69.545455,397.6,-49.789474,-394.65,-31.85,93.454545,-372.578947,0.0,310.136364,45.3,254.95,-104.428571,324.619048,68.826087,0.0
5,-305.904762,0.0,286.0,311.777778,-221.454545,-18.4,24.210526,330.35,0.0,229.454545,-182.578947,286.7,208.136364,317.3,0.0,372.571429,0.0,304.826087,-258.0
6,256.095238,-230.764706,0.0,0.0,-451.454545,-317.4,0.0,0.0,-356.85,0.0,360.421053,82.7,0.0,0.0,199.95,-34.428571,-342.380952,-348.173913,24.0
7,85.095238,-26.764706,98.0,0.0,-203.454545,-95.4,335.210526,93.35,0.0,-431.545455,0.0,-303.3,0.0,0.0,0.0,-473.428571,-357.380952,15.826087,-68.0
8,66.095238,0.0,228.0,0.0,191.545455,0.0,288.210526,0.0,317.15,140.454545,-16.578947,0.0,257.136364,-426.7,-230.05,-3.428571,361.619048,-89.173913,396.0
9,287.095238,-25.764706,0.0,0.0,365.545455,79.6,454.210526,64.35,0.0,0.0,385.421053,304.7,-270.863636,0.0,-352.05,227.571429,79.619048,-80.173913,-244.0


In [51]:
# function to calculate cosine similarity
def cosine(a, b):
  # add the epsilon to avoid denominator being 0
  return a.dot(b) / ((np.linalg.norm(a) * np.linalg.norm(b)) + np.finfo(np.float64).eps)

In [52]:
utility_matrix = utility_matrix.values
print(utility_matrix.shape)

(29, 19)


In [53]:
from tqdm.notebook import tqdm
COLS = df.columns.values.tolist().copy()

# now we have to create the user similariry matrix S (look at the image c above)
num_user = utility_matrix.shape[1]
user_to_user_similarity_matrix = np.zeros((num_user, num_user))

for i in tqdm(range(num_user)):
  for j in range(num_user):
    # Take one pair of users
    user_i = utility_matrix[:,i]
    user_j = utility_matrix[:,j]
    # do not need to muliply with zero -> less computation
    index_not_zero = (user_i > 0) & (user_j > 0)
    user_to_user_similarity_matrix[i,j] = cosine(user_i[index_not_zero], user_j[index_not_zero])

user_to_user_similarity_matrix

  0%|          | 0/19 [00:00<?, ?it/s]

array([[1.        , 0.93796209, 0.88236286, 0.72538745, 0.83699427,
        0.85427558, 0.79450579, 0.78522823, 0.81724852, 0.57084762,
        0.98279356, 0.94045664, 0.92237812, 0.8997879 , 0.76042038,
        0.67418561, 0.70986646, 0.89733604, 0.37902442],
       [0.93796209, 1.        , 0.72937378, 0.88515434, 0.95519753,
        0.98564166, 0.        , 0.98180013, 0.7488646 , 0.91685035,
        0.90632905, 1.        , 0.96767719, 0.93825873, 0.79628551,
        0.80018069, 0.6056809 , 0.63144527, 0.92747593],
       [0.88236286, 0.72937378, 1.        , 0.97749832, 0.94964984,
        0.81403033, 0.70401517, 0.99850324, 0.90289074, 0.88802299,
        0.8491619 , 0.97246212, 0.97194572, 0.84272197, 0.71245246,
        0.98171342, 0.90914553, 0.98130968, 0.81882358],
       [0.72538745, 0.88515434, 0.97749832, 1.        , 0.92901558,
        0.94269829, 0.5434879 , 0.78392471, 0.49312809, 0.70859808,
        0.64706495, 0.80023452, 0.6897607 , 0.7611769 , 0.7766559 ,
        0.769

In [54]:
# check the diagnal of the user similarity matrix = 1 -> true
user_to_user_similarity_matrix.diagonal()

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1.])

In [55]:
# Now we take out the position whose value is 0 in the utility matrix
zero_rating_indices = np.where(utility_matrix == 0)

In [56]:
# Now we have to fill in the position whose value is 0 in the utility matrix
for blog, user in zip(zero_rating_indices[0], zero_rating_indices[1]):
  similar_users = user_to_user_similarity_matrix[user]
  blog_time_spent = utility_matrix[blog]
  index = blog_time_spent > 0
  blog_time_spent = blog_time_spent[index]
  similar_users = similar_users[index]
  utility_matrix[blog, user] = np.sum(blog_time_spent * similar_users) / (np.sum(similar_users) + np.finfo(np.float64).eps)

In [57]:
utility_matrix = pd.DataFrame(utility_matrix)
utility_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,-404.904762,17.235294,284.0,-233.222222,180.257445,156.751488,184.790459,-375.65,193.15,-350.545455,151.91,-223.3,-180.863636,27.3,-360.05,-133.428571,192.196212,384.826087,189.0
1,219.222971,181.932083,211.347864,199.97763,-148.454545,17.6,-23.789474,214.448216,-35.85,-280.545455,187.473391,-277.3,-51.863636,-117.7,-426.05,202.86738,342.619048,369.826087,86.0
2,-32.904762,-359.764706,217.625729,208.568416,-16.454545,222.796025,345.210526,-206.65,265.15,90.454545,397.421053,128.7,-267.863636,400.3,56.95,-375.428571,79.619048,196.527116,217.061178
3,192.613615,173.765985,178.474951,-230.222222,178.113093,126.6,-121.789474,180.505148,-179.85,175.227467,183.110487,126.7,386.136364,-91.7,-356.05,-139.428571,177.420518,180.073984,34.0
4,268.095238,-383.764706,-371.0,-423.222222,69.545455,397.6,-49.789474,-394.65,-31.85,93.454545,-372.578947,203.871205,310.136364,45.3,254.95,-104.428571,324.619048,68.826087,177.760108
5,-305.904762,292.419457,286.0,311.777778,-221.454545,-18.4,24.210526,330.35,265.593395,229.454545,-182.578947,286.7,208.136364,317.3,273.744875,372.571429,271.246053,304.826087,-258.0
6,256.095238,-230.764706,182.882154,176.740148,-451.454545,-317.4,183.998499,181.352023,-356.85,178.660459,360.421053,82.7,186.317894,184.898053,199.95,-34.428571,-342.380952,-348.173913,24.0
7,85.095238,-26.764706,98.0,111.913764,-203.454545,-95.4,335.210526,93.35,126.083884,-431.545455,147.843946,-303.3,128.81011,127.559252,124.333089,-473.428571,-357.380952,15.826087,-68.0
8,66.095238,237.538024,228.0,247.270299,191.545455,242.797996,288.210526,247.157287,317.15,140.454545,-16.578947,245.168818,257.136364,-426.7,-230.05,-3.428571,361.619048,-89.173913,396.0
9,287.095238,-25.764706,244.22062,237.151859,365.545455,79.6,454.210526,64.35,247.89943,250.340808,385.421053,304.7,-270.863636,259.689422,-352.05,227.571429,79.619048,-80.173913,-244.0


In [58]:
# add the mean value to get the initial scale
utility_matrix = utility_matrix.values
mean = mean.values
utility_matrix = utility_matrix + mean
utility_matrix = pd.DataFrame(utility_matrix)
utility_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,103.0,436.0,861.0,271.0,672.71199,631.151488,622.579933,21.0,615.0,122.0,630.488947,215.0,331.0,459.0,88.0,373.0,597.577164,872.0,664.0
1,727.127733,600.696789,788.347864,704.199852,344.0,492.0,414.0,611.098216,386.0,192.0,666.052339,161.0,460.0,314.0,22.0,709.295951,748.0,857.0,561.0
2,475.0,59.0,794.625729,712.790638,476.0,697.196025,783.0,190.0,687.0,563.0,876.0,567.0,244.0,832.0,505.0,131.0,485.0,683.701029,692.061178
3,700.518377,592.530691,755.474951,274.0,670.567638,601.0,316.0,577.155148,242.0,647.772921,661.689434,565.0,898.0,340.0,92.0,367.0,582.80147,667.247897,509.0
4,776.0,35.0,206.0,81.0,562.0,872.0,388.0,2.0,390.0,566.0,106.0,642.171205,822.0,477.0,703.0,402.0,730.0,556.0,652.760108
5,202.0,711.184163,863.0,816.0,271.0,456.0,462.0,727.0,687.443395,702.0,296.0,725.0,720.0,749.0,721.794875,879.0,676.627006,792.0,217.0
6,764.0,188.0,759.882154,680.96237,41.0,157.0,621.787973,578.002023,65.0,651.205913,839.0,521.0,698.18153,616.598053,648.0,472.0,63.0,139.0,499.0
7,593.0,392.0,675.0,616.135986,289.0,379.0,773.0,490.0,547.933884,41.0,626.422894,135.0,640.673746,559.259252,572.383089,33.0,48.0,503.0,407.0
8,574.0,656.30273,805.0,751.492521,684.0,717.197996,726.0,643.807287,739.0,613.0,462.0,683.468818,769.0,5.0,218.0,503.0,767.0,398.0,871.0
9,795.0,393.0,821.22062,741.374081,858.0,554.0,892.0,461.0,669.74943,722.886262,864.0,743.0,241.0,691.389422,96.0,734.0,485.0,407.0,231.0


In [59]:
# save the file
utility_matrix.to_csv("filled_utility_matrix.csv", index=False)

In [60]:
# Now the position whose value is 0 has been filled.
# We have to use this result to suggest blogs that user may want to read (may want to spend long time reading)

utility_matrix = pd.read_csv("/content/utility_matrix.csv")
utility_matrix_filled = pd.read_csv("/content/filled_utility_matrix.csv")

In [61]:
utility_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,103,436,861,271,0,0,0,21,615,122,0,215,331,459,88,373,0,872,664
1,0,0,0,0,344,492,414,0,386,192,0,161,460,314,22,0,748,857,561
2,475,59,0,0,476,0,783,190,687,563,876,567,244,832,505,131,485,0,0
3,0,0,0,274,0,601,316,0,242,0,0,565,898,340,92,367,0,0,509
4,776,35,206,81,562,872,388,2,390,566,106,0,822,477,703,402,730,556,0
5,202,0,863,816,271,456,462,727,0,702,296,725,720,749,0,879,0,792,217
6,764,188,0,0,41,157,0,0,65,0,839,521,0,0,648,472,63,139,499
7,593,392,675,0,289,379,773,490,0,41,0,135,0,0,0,33,48,503,407
8,574,0,805,0,684,0,726,0,739,613,462,0,769,5,218,503,767,398,871
9,795,393,0,0,858,554,892,461,0,0,864,743,241,0,96,734,485,407,231


In [62]:
utility_matrix_filled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,103.0,436.0,861.0,271.0,672.71199,631.151488,622.579933,21.0,615.0,122.0,630.488947,215.0,331.0,459.0,88.0,373.0,597.577164,872.0,664.0
1,727.127733,600.696789,788.347864,704.199852,344.0,492.0,414.0,611.098216,386.0,192.0,666.052339,161.0,460.0,314.0,22.0,709.295951,748.0,857.0,561.0
2,475.0,59.0,794.625729,712.790638,476.0,697.196025,783.0,190.0,687.0,563.0,876.0,567.0,244.0,832.0,505.0,131.0,485.0,683.701029,692.061178
3,700.518377,592.530691,755.474951,274.0,670.567638,601.0,316.0,577.155148,242.0,647.772921,661.689434,565.0,898.0,340.0,92.0,367.0,582.80147,667.247897,509.0
4,776.0,35.0,206.0,81.0,562.0,872.0,388.0,2.0,390.0,566.0,106.0,642.171205,822.0,477.0,703.0,402.0,730.0,556.0,652.760108
5,202.0,711.184163,863.0,816.0,271.0,456.0,462.0,727.0,687.443395,702.0,296.0,725.0,720.0,749.0,721.794875,879.0,676.627006,792.0,217.0
6,764.0,188.0,759.882154,680.96237,41.0,157.0,621.787973,578.002023,65.0,651.205913,839.0,521.0,698.18153,616.598053,648.0,472.0,63.0,139.0,499.0
7,593.0,392.0,675.0,616.135986,289.0,379.0,773.0,490.0,547.933884,41.0,626.422894,135.0,640.673746,559.259252,572.383089,33.0,48.0,503.0,407.0
8,574.0,656.30273,805.0,751.492521,684.0,717.197996,726.0,643.807287,739.0,613.0,462.0,683.468818,769.0,5.0,218.0,503.0,767.0,398.0,871.0
9,795.0,393.0,821.22062,741.374081,858.0,554.0,892.0,461.0,669.74943,722.886262,864.0,743.0,241.0,691.389422,96.0,734.0,485.0,407.0,231.0


In [63]:
def convert_sec(t):
  h = t // 3600
  m = (t - h*3600)//60
  s = t - h*3600 - m*60
  return (f'{h} h {m} p {s} s')

In [65]:
current_user_logined_id = 4
for blog, user in zip(zero_rating_indices[0], zero_rating_indices[1]):
    if user == current_user_logined_id - 1:
      print(f'User {user + 1} is expected to spend time spend {convert_sec(utility_matrix_filled.iloc[blog, user])}s reading on blog {blog + 1}')

User 4 is expected to spend time spend 0.0 h 11.0 p 44.1998518261737 ss reading on blog 2
User 4 is expected to spend time spend 0.0 h 11.0 p 52.790637967274506 ss reading on blog 3
User 4 is expected to spend time spend 0.0 h 11.0 p 20.962370445523675 ss reading on blog 7
User 4 is expected to spend time spend 0.0 h 10.0 p 16.135986204366873 ss reading on blog 8
User 4 is expected to spend time spend 0.0 h 12.0 p 31.492521422544996 ss reading on blog 9
User 4 is expected to spend time spend 0.0 h 12.0 p 21.374080857518948 ss reading on blog 10
User 4 is expected to spend time spend 0.0 h 11.0 p 27.69597574876377 ss reading on blog 14
User 4 is expected to spend time spend 0.0 h 11.0 p 23.961202023650912 ss reading on blog 16
User 4 is expected to spend time spend 0.0 h 12.0 p 14.518920996640531 ss reading on blog 17
User 4 is expected to spend time spend 0.0 h 12.0 p 38.19027601784853 ss reading on blog 21
User 4 is expected to spend time spend 0.0 h 12.0 p 28.362799709319916 ss readi

In [67]:
# Next put it into a dict and then sort
dictionary = {}
current_user_logined_id = 4
for blog, user in zip(zero_rating_indices[0], zero_rating_indices[1]):
    if user == current_user_logined_id - 1:
      dictionary[blog + 1] = utility_matrix_filled.iloc[blog, user]

dictionary

{2: 704.1998518261737,
 3: 712.7906379672745,
 7: 680.9623704455237,
 8: 616.1359862043669,
 9: 751.492521422545,
 10: 741.374080857519,
 14: 687.6959757487638,
 16: 683.9612020236509,
 17: 734.5189209966405,
 21: 758.1902760178485,
 25: 748.3627997093199}

In [68]:
# sort the dictionary with second value
dictionary = sorted(dictionary.items(), key=lambda item: item[1], reverse=True)
dictionary

[(21, 758.1902760178485),
 (9, 751.492521422545),
 (25, 748.3627997093199),
 (10, 741.374080857519),
 (17, 734.5189209966405),
 (3, 712.7906379672745),
 (2, 704.1998518261737),
 (14, 687.6959757487638),
 (16, 683.9612020236509),
 (7, 680.9623704455237),
 (8, 616.1359862043669)]

- We do not suggest all these blogs
- The only blogs whose expected time spent is greater than the mean value will be suggested
- Now, we see that there is a big likelihood that user will want to spend more time reading the unread blogs.

In [71]:
# set the initial mean time reading as a threshold
threshold = mean[current_user_logined_id - 1]
threshold

504.22222222222223

In [74]:
result = [i for i, j in dictionary if j >= threshold]
result = result[:3] # We just suggest thres blogs with highest prediction scores
result

[21, 9, 25]

In [77]:
# function in flask backend
def recsys(current_user_logined_id):
  utility_matrix = pd.read_csv("/content/utility_matrix.csv")
  utility_matrix.replace(0, np.nan, inplace=True)
  mean = utility_matrix.mean(skipna=True)
  utility_matrix = utility_matrix.sub(mean, axis=1)
  utility_matrix = utility_matrix.fillna(0)
  utility_matrix = utility_matrix.values

  num_user = utility_matrix.shape[1]
  user_to_user_similarity_matrix = np.zeros((num_user, num_user))

  for i in range(num_user):
    for j in range(num_user):
      user_i = utility_matrix[:,i]
      user_j = utility_matrix[:,j]
      index_not_zero = (user_i > 0) & (user_j > 0)
      user_to_user_similarity_matrix[i,j] = cosine(user_i[index_not_zero], user_j[index_not_zero])

  zero_rating_indices = np.where(utility_matrix == 0)
  for blog, user in zip(zero_rating_indices[0], zero_rating_indices[1]):
    similar_users = user_to_user_similarity_matrix[user]
    blog_time_spent = utility_matrix[blog]
    index = blog_time_spent > 0
    blog_time_spent = blog_time_spent[index]
    similar_users = similar_users[index]
    utility_matrix[blog, user] = np.sum(blog_time_spent * similar_users) / (np.sum(similar_users) + np.finfo(np.float64).eps)

  mean = mean.values
  utility_matrix = utility_matrix + mean
  utility_matrix = pd.DataFrame(utility_matrix)
  utility_matrix.to_csv("filled_utility_matrix.csv", index=False)

  utility_matrix = pd.read_csv("/content/utility_matrix.csv")
  utility_matrix_filled = pd.read_csv("/content/filled_utility_matrix.csv")

  zero_rating_indices = np.where(utility_matrix == 0)
  dictionary = {}
  for blog, user in zip(zero_rating_indices[0], zero_rating_indices[1]):
      if user == current_user_logined_id - 1:
        dictionary[blog + 1] = utility_matrix_filled.iloc[blog, user]

  dictionary = sorted(dictionary.items(), key=lambda item: item[1], reverse=True)
  threshold = mean[current_user_logined_id - 1]
  result = [i for i, j in dictionary if j >= threshold]
  return result[:3]

res = recsys(4)
res

[21, 9, 25]