In [2]:
# Importing required Libraries-

import numpy as np # for arrays and computation purpose
import pandas as pd # for dataframes

# Important note- 

## UserID ---> Users
## ArtistID ---> Items
## FriendID ---> Neighbours
## TagID ---> Features


## 1. Creation of User-Item Rating Data - "R" Matrix

In [3]:
# User - Items Rating -- First Matrix R will be created using this data

df_ua = pd.read_table("hetrec2011-lastfm-2k/user_artists.dat")

df_ua.head(20)

# we will consider userID as our users
# We will consider artistId as our items
# weight -> how many times a user listened artistID (item) , so we will convert into rating as most listened song as highest rated


Unnamed: 0,userID,artistID,weight
0,2,51,13883
1,2,52,11690
2,2,53,11351
3,2,54,10300
4,2,55,8983
5,2,56,6152
6,2,57,5955
7,2,58,4616
8,2,59,4337
9,2,60,4147


In [4]:
# collecting information from this data

print("Number of unique users--", df_ua['userID'].nunique()) ## This feature is our user
print("Number of unique items--",df_ua['artistID'].nunique()) ## This is our Items value
print("Number of unique weight value--",df_ua['weight'].nunique()) ## This can be used to determine rating like more weighted value is more rating

print("Maximum value in weight column--",df_ua['weight'].min())
print("Minimum value in weight column--",df_ua['weight'].max())


Number of unique users-- 1892
Number of unique items-- 17632
Number of unique weight value-- 5436
Maximum value in weight column-- 1
Minimum value in weight column-- 352698


In [5]:
# function to  convert weight column into  Rated work

def partition(i):
    if i> 0:
        return 1

# Applying partition function to column name weight
df_ua['weight'] = df_ua['weight'].apply(partition) 

df_ua.head()

Unnamed: 0,userID,artistID,weight
0,2,51,1
1,2,52,1
2,2,53,1
3,2,54,1
4,2,55,1


In [6]:
# Converting the above dataframe in our Desired R matrix - users rating matrix
# I am using table in Pandas instead of Matrix
# making rows as users , columns as items and values in the matrix are Rating

R = df_ua.pivot(index = "userID" , columns = 'artistID' , values = 'weight')

R.head(100)

artistID,1,2,3,4,5,6,7,8,9,10,...,18736,18737,18738,18739,18740,18741,18742,18743,18744,18745
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,,,,,,,,,,,...,,,,,,,,,,
101,,,,,,,,1.0,,,...,,,,,,,,,,
102,,,,,,,,,,,...,,,,,,,,,,
103,,,,,,,,,,,...,,,,,,,,,,


In [7]:
# converting the Nan values (means user didn't give rating to that item is represented as Nan) into zero values as mentioned in the paper

R =R.fillna(0)

R

artistID,1,2,3,4,5,6,7,8,9,10,...,18736,18737,18738,18739,18740,18741,18742,18743,18744,18745
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2096,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2097,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 2. Creation of Social Network among Users - "A" Matrix

In [8]:
# Social network infromation--   to determine vector N which contain neighbours of target user

df_uf = pd.read_table("hetrec2011-lastfm-2k/user_friends.dat")

df_uf.head()

Unnamed: 0,userID,friendID
0,2,275
1,2,428
2,2,515
3,2,761
4,2,831


In [9]:
# to convert this into our desired A matrix we need to first add a column which will represent relationship between users as 1
df_uf['relation'] =1

# to convert our data frame into desired adjacency matrix - where rows = users , column = neighbours , value = 1, if both are neighbours

A = df_uf.pivot(index = "userID" , columns = 'friendID' , values = 'relation')

# converting Nan values as 0
A  = A.fillna(0)

A

friendID,2,3,4,5,6,7,8,9,10,11,...,2090,2091,2092,2093,2094,2095,2096,2097,2099,2100
userID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2096,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2097,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2099,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 3.Creation of Feature Set of items - "M" matrix

In [10]:
# we will use this dataset to make our desired matrix
df_utt = pd.read_table("hetrec2011-lastfm-2k/user_taggedartists-timestamps.dat")

print(df_utt.head(20))

    userID  artistID  tagID      timestamp
0        2        52     13  1238536800000
1        2        52     15  1238536800000
2        2        52     18  1238536800000
3        2        52     21  1238536800000
4        2        52     41  1238536800000
5        2        63     13  1238536800000
6        2        63     14  1238536800000
7        2        63     23  1238536800000
8        2        63     40  1238536800000
9        2        73     13  1238536800000
10       2        73     14  1238536800000
11       2        73     15  1238536800000
12       2        73     18  1238536800000
13       2        73     20  1238536800000
14       2        73     21  1238536800000
15       2        73     22  1238536800000
16       2        73     26  1238536800000
17       2        94     13  1238536800000
18       2        94     15  1238536800000
19       2        94     20  1238536800000


In [11]:
# Removing unuseful columns
table = df_utt.drop(columns =['userID','timestamp'])

# creation of a new column to check whether a particular tag present in item or not
table['present'] = 1


In [12]:
table['tagID'].value_counts()

73      7503
24      5418
79      5251
18      4672
81      4458
        ... 
7620       1
7748       1
5701       1
1607       1
2049       1
Name: tagID, Length: 9749, dtype: int64

In [13]:
table = table.groupby('tagID').filter(lambda x : (x['tagID'].count()>20).any())

# Removing items with less than 20 tags

In [14]:
table = table.groupby('tagID').filter(lambda x : (x['tagID'].count()<6000).any())

# Removing items with most frequent tags

In [15]:
table['tagID'].value_counts()

24      5418
79      5251
18      4672
81      4458
130     4228
        ... 
4944      21
1646      21
789       21
5009      21
5076      21
Name: tagID, Length: 867, dtype: int64

In [16]:
# Coverting the above dataframe into our desired matrix with rows as items , column as feature and values as 1 or 0
M =pd.pivot_table(table, values ='present',index ='artistID',columns='tagID',aggfunc = np.max)

# Converting Nan values to 0
M  = M.fillna(0)

M

tagID,1,2,3,4,5,6,7,9,10,13,...,12208,12209,12218,12219,12220,12265,12287,12638,12639,12642
artistID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18737,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18740,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18741,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Algorithm Implementation -


### Important Note - Pandas Dataframes are used here so here , So it follows this-- 

### DataFrame[columns][index or row] 


In [17]:
#  our Target user is u

Hit_Count =0 # For evaluation of Hit Rank
position =0 # For evaluation of ARHR
Cold_s_i = 0 # For evaluation of CSIR

# Counting cold start items - which are rated by leass than or equal to 5 users
Cold_start_items =[]

for j in R.columns:
    times =0
    for i in R.index:
        if R[j][i] ==1:
            times = times+1
    if times<= 5:
        Cold_start_items.append(j)
        
        
# Selecting first 10 users for implementation of algorithm - As training time is high

Q = R.head(10).index.tolist()


for u in Q:
    
    
    ## Step - 1 Creation of neighbor users’ item set------------------------------------------------
    
    
    N = [] # Neighbour List of target user u

    for i in A.columns:
        if A[i][u]==1:
            N.append(i)
   # print("The Neighbour list of user",u, "is given below-")
   # print("N :",N)
    
    
    
    L = [] # List of items Rated by target user's neighbour

    for i in R.columns:
        for j in N:
            if R[i][j]>0:
                L.append(i)
                break
   # print("List of items rated by neighbours of user",u ,"are given below-")       
   # print("L:" , L)
    
    #----------------------------------end of step 1--------------------------------------------------
    
    
    
    
    
    
    
    ## Step - 2  Creation of User feature matrix -----------------------------------------------------
    
    
    # Dataframe for user feature matrix is initialized
    NUF  = pd.DataFrame(index = N , columns = M.columns)
    NUF = NUF.fillna(0)
    NUF.index.name = "artistID"
    
    for i in N:
        for j in L:
            if R[j][i]==1:
                for k in M.columns:
                    if(j in M.index):
                        NUF[k][i] = NUF[k][i] + M[k][j]
    
    # Finding Rated Items for target user
    L_u = []
    for i in R.columns:
        if R[i][u] == 1:
            L_u.append(i)
            
    #----------------------------------Test Data----------------------------------------------
    # Creating Test set  - using leave one mechanism for test user
    
    Test_Case = L_u.pop(0) # taking first item in item list rated by test user
    
    # -----------------------------------------------------------------------------------------
    
    # Creating user feature matrix for Target user
    TUF  = pd.DataFrame(index = (u,) , columns = M.columns)
    TUF = TUF.fillna(0)
    TUF.index.name = "artistID"
    
    for j in L_u:
        for k in M.columns:
                if(j in M.index):
                    TUF[k][u]  = TUF[k][u] + M[k][j]
                    
    # Appending user features values of target user inside main User feature matrix
    UF = NUF.append(TUF)
    
   # print("User Feature matrix - UF :" )
   # print( UF)
    
    # --------------------end of step 2-----------------------------------------------------------
    
    
    
    
    
    
    
    
    # Step 3   Computation of user priority -------------------------------------------------------
    
    # Dividing the row values of user feature matrix with it's row sum
    for i in UF.index:
        UF.loc[i] = UF.loc[i] / UF.loc[i].sum()
    
    
   # print("Normalised  UF :")
   # print(UF)
    
    # Distance Calculation between profile vectors
    
    import math
    D  = pd.DataFrame(index = (u,) , columns = N , dtype = float)
    D = D.fillna(0)
    
    for i in N:
        D[i][2] =  math.dist(UF.loc[u], UF.loc[i])
    
   # print("Distance matrix- Distance between profile vectors of Target user and Neighbours - ")
   # print(D)
                
      
    # Calculating  user priority using Distance -
    # Computing the user priority from distance array using formula - P[i] = 1/ (1 + D[i]^k)
    # As mentioned in paper k = 2 is taken
    
    P  = pd.DataFrame(index = (u,) , columns = N , dtype = float)
    P = P.fillna(0)
    for i in D:
        P[i][u] = 1/ (1 + D[i][u]**2)
    
    
   # print("User Priority Array -")
   # print(P)
    
    # --------------------end of step 3---------------------------------------------------------
    
    
    
    
    
    
    
    # Step 4   Computation of item priority ----------------------------------------------------
    
    P_I  = pd.DataFrame(index = ("Item Priority",) , columns = L , dtype = float)
    P_I = P_I.fillna(0)

    for i in L:
        Sum =0
        for j in P.columns:
            if R[i][j]>0:
                Sum = Sum  + P[j][u]     
                
        P_I[i]["Item Priority"] =Sum
    
    
   # print("Item Priority array -")
   # print(P_I)
    
    # --------------------end of step 4---------------------------------------------------------
    
    
    
    
    
    # Step 5  Recommendation based on the item priority-------------------------------------------
    
    # Recommendable Items - Items that are not present in target user rated list
    
    for i in P_I.columns:
        if (i in L_u):
            P_I.drop(i , inplace = True , axis =1)
    
    P_I = P_I.T  # Transpose of matrix
    
    P_I =P_I.sort_values("Item Priority" , ascending = False)  # Sorting the list
    
    # Taking Top -10 Recommendations
    
    Recommended_Items = P_I.head(10).index.tolist()
    
    

    
    # 1. Hit Rate and Average Reciprocal Hit Rank and CSIP --
    
    for i in Recommended_Items:
        if i== Test_Case:
            Hit_Count = Hit_Count +1
            position = position + 1/ (Recommended_Items.index(i) +1)
            break
            
   
    for i in Cold_start_items:
         if (i in Recommended_Items ):
                Cold_s_i = Cold_s_i +1
            
    
    
    
    
# Evaluation Metrics - Hit-Rate
#Hit_Rate  = Hit_Count / df_ua['userID'].nunique()
Hit_Rate  = Hit_Count / len(Q)

# To Calculate ARHR - we need to find the index of test case in Reccomemded Items
#ARHR = position /df_ua['userID'].nunique()
ARHR = position /len(Q)

# To Calculate CSIP :
CSIP = Cold_s_i/ len(Q)

print("Hit Rate -" , Hit_Rate) 
print("Average Reciprocal Hit Rank -" , ARHR)
print("CSIP -" , CSIP)
        
    

    
    

    
    
    

Hit Rate - 0.3
Average Reciprocal Hit Rank - 0.15333333333333332
CSIP - 0.9


# For Cold Start Users --

In [18]:
#  our Target user is u

Hit_Count =0 # For evaluation of Hit Rank
position =0 # For evaluation of ARHR
Cold_s_i = 0 # For evaluation of CSIR

# Finding cold start items - which are rated by less than or equal to 5 users
Cold_start_items =[]

for j in R.columns:
    times =0
    for i in R.index:
        if R[j][i] ==1:
            times = times+1
    if times<= 5:
        Cold_start_items.append(j)
        
        

# Finding cold start items - which are rated by less than or equal to 5 users
Cold_start_users =[]

for i in R.index:
    number =0
    for j in R.columns:
        if R[j][i]==1:
            number = number +1
    if number <=5:
        Cold_start_users.append(i)
        
# Selecting first 10 cold start users users for implementation of algorithm - As training time is high
Q = Cold_start_users[0 : 10]


for u in Q:
    
    
    ## Step - 1 Creation of neighbor users’ item set------------------------------------------------
    
    
    N = [] # Neighbour List of target user u

    for i in A.columns:
        if A[i][u]==1:
            N.append(i)
   # print("The Neighbour list of user",u, "is given below-")
   # print("N :",N)
    
    
    
    L = [] # List of items Rated by target user's neighbour

    for i in R.columns:
        for j in N:
            if R[i][j]>0:
                L.append(i)
                break
   # print("List of items rated by neighbours of user",u ,"are given below-")       
   # print("L:" , L)
    
    #----------------------------------end of step 1--------------------------------------------------
    
    
    
    
    
    
    
    ## Step - 2  Creation of User feature matrix -----------------------------------------------------
    
    
    # Dataframe for user feature matrix is initialized
    NUF  = pd.DataFrame(index = N , columns = M.columns)
    NUF = NUF.fillna(0)
    NUF.index.name = "artistID"
    
    for i in N:
        for j in L:
            if R[j][i]==1:
                for k in M.columns:
                    if(j in M.index):
                        NUF[k][i] = NUF[k][i] + M[k][j]
    
    # Finding Rated Items for target user
    L_u = []
    for i in R.columns:
        if R[i][u] == 1:
            L_u.append(i)
            
    #----------------------------------Test Data----------------------------------------------
    # Creating Test set  - using leave one mechanism for test user
    
    Test_Case = L_u.pop(0) # taking first item in item list rated by test user
    
    # -----------------------------------------------------------------------------------------
    
    # Creating user feature matrix for Target user
    TUF  = pd.DataFrame(index = (u,) , columns = M.columns)
    TUF = TUF.fillna(0)
    TUF.index.name = "artistID"
    
    for j in L_u:
        for k in M.columns:
                if(j in M.index):
                    TUF[k][u]  = TUF[k][u] + M[k][j]
                    
    # Appending user features values of target user inside main User feature matrix
    UF = NUF.append(TUF)
    
   # print("User Feature matrix - UF :" )
   # print( UF)
    
    # --------------------end of step 2-----------------------------------------------------------
    
    
    
    
    
    
    
    
    # Step 3   Computation of user priority -------------------------------------------------------
    
    # Dividing the row values of user feature matrix with it's row sum
    for i in UF.index:
        UF.loc[i] = UF.loc[i] / UF.loc[i].sum()
    
    
   # print("Normalised  UF :")
   # print(UF)
    
    # Distance Calculation between profile vectors
    
    import math
    D  = pd.DataFrame(index = (u,) , columns = N , dtype = float)
    D = D.fillna(0)
    
    for i in N:
        D[i][2] =  math.dist(UF.loc[u], UF.loc[i])
    
   # print("Distance matrix- Distance between profile vectors of Target user and Neighbours - ")
   # print(D)
                
      
    # Calculating  user priority using Distance -
    # Computing the user priority from distance array using formula - P[i] = 1/ (1 + D[i]^k)
    # As mentioned in paper k = 2 is taken
    
    P  = pd.DataFrame(index = (u,) , columns = N , dtype = float)
    P = P.fillna(0)
    for i in D:
        P[i][u] = 1/ (1 + D[i][u]**2)
    
    
   # print("User Priority Array -")
   # print(P)
    
    # --------------------end of step 3---------------------------------------------------------
    
    
    
    
    
    
    
    # Step 4   Computation of item priority ----------------------------------------------------
    
    P_I  = pd.DataFrame(index = ("Item Priority",) , columns = L , dtype = float)
    P_I = P_I.fillna(0)

    for i in L:
        Sum =0
        for j in P.columns:
            if R[i][j]>0:
                Sum = Sum  + P[j][u]     
                
        P_I[i]["Item Priority"] =Sum
    
    
   # print("Item Priority array -")
   # print(P_I)
    
    # --------------------end of step 4---------------------------------------------------------
    
    
    
    
    
    # Step 5  Recommendation based on the item priority-------------------------------------------
    
    # Recommendable Items - Items that are not present in target user rated list
    
    for i in P_I.columns:
        if (i in L_u):
            P_I.drop(i , inplace = True , axis =1)
    
    P_I = P_I.T  # Transpose of matrix
    
    P_I =P_I.sort_values("Item Priority" , ascending = False)  # Sorting the list
    
    # Taking Top -10 Recommendations
    
    Recommended_Items = P_I.head(10).index.tolist()
    
    

    
    # 1. Hit Rate and Average Reciprocal Hit Rank and CSIP --
    
    for i in Recommended_Items:
        if i== Test_Case:
            Hit_Count = Hit_Count +1
            position = position + 1/ (Recommended_Items.index(i) +1)
            break
            
   
    for i in Cold_start_items:
         if (i in Recommended_Items ):
                Cold_s_i = Cold_s_i +1
            
    
    
    
    
# Evaluation Metrics - Hit-Rate
#Hit_Rate  = Hit_Count / df_ua['userID'].nunique()
Hit_Rate  = Hit_Count / len(Q)

# To Calculate ARHR - we need to find the index of test case in Reccomemded Items
#ARHR = position /df_ua['userID'].nunique()
ARHR = position /len(Q)

# To Calculate CSIP :
CSIP = Cold_s_i/ len(Q)

print("Hit Rate -" , Hit_Rate) 
print("Average Reciprocal Hit Rank -" , ARHR)
print("CSIP -" , CSIP)
        
    

    
    

    
    
    

Hit Rate - 0.4
Average Reciprocal Hit Rank - 0.22111111111111112
CSIP - 0.1
