# Build trust network and trust values between the active user and other users  inferred and computed new trust values between active users and others users

In [1]:
# import pandas library
import pandas as pd
# read rating file, without header and set column name for fields
rating_df = pd.read_csv('ratings.txt', sep=' ', header=None, names = ["userId", "itemId", "rating"])
# read trust file, without header and set column name for fields
trust_df = pd.read_csv('trust.txt', sep=' ',header=None, names = ["truster", "trustee", "value"])
# identify unique user Id from rating file.
userIds = rating_df.userId.unique()

# Below statement create and open file, named as "trustnew". 
# This file basically contains computed new trust value between active users and other users.
# The first column is a trustee, the second is a truster and the third column is the trust values.
f=open("trustnew.txt", "w")

# loop through all the unique User Ids 
for uid in userIds:
    # creates a new dataframe that's empty, this will contains trustee, truster and computed trust values for each user (UID). 
    # this dataframe set to empty for next user(UID) 
    newDF = pd.DataFrame() 
    
    ## by using below logic we will calculate the trusts value, and restrict it till d ≤ 3 (level = 3).
    ## for example User A trust User B ( A -> B), similarly B -> C, C -> D and D -> E, but we will calculate trust relation between
    ## A -> B as direct relation, A->B->C as inderect relation, A->B->C->D as indirect relation, but user E will not be regarded 
    ## as an inferred trusted neighbors due to the constraint d ≤ 3,
    
    # USer ID uid treated as trustee, we identified all truster ( direct relation) and stored in subframe 1 
    subframe1 = trust_df.loc[trust_df['trustee'] == uid]
    # validate, whether we have any truster for trustee, if yes then we will collect it in newDF dataframe, and write into trustnew 
    # file as direct relation. This validation is needed to avoid any unhandled exception raise because of empty subframe.
    # for example, user Id "1" don't have any truster in trust file provided in flimtrust dataset.
    if not subframe1.empty:
        # loop over subframe 1
        for index, row in subframe1.iterrows():
            # d = 1, append direct relation trust value between users A -> B
            newDF = newDF.append({'trustee': row['trustee'], 'truster': row['truster'], 'value': row['value']}, ignore_index=True)
            #print(row['trustee'], row['truster'], row['value'])
            
            # write direct relation trust value between users A -> B, denoted as 1
            val = str(uid) +","+ str(row['truster'])+","+str(row['value']) +"\n"
            f.write(val)
            
            ## d = 2, identify 1st indirect relation between user A -> C ( A-> B -> C) and store in subframe 2
            ## for example, user B treated as trustee, identify the all truster from trust file.
            subframe2 = trust_df.loc[trust_df['trustee'] == row['truster']]
            # validate, whether we have any truster for trustee, if yes then we will collect it in newDF dataframe, and write into trustnew 
            # file as Indirect relation. This validation is needed to avoid any unhandled exception raise because of empty subframe.
            if not subframe2.empty:
                # loop over subframe 2
                for index, row1 in subframe2.iterrows():
                    # below if condition will validate not to include C -> A or B -> A relation.
                    if uid != row1['truster'] and row1['truster'] not in subframe1['truster'].values:
                        # d = 2, append In direct relation trust value between users A -> C ( A-> B -> C)
                        newDF = newDF.append({'trustee': uid, 'truster': row1['truster'], 'value': row1['value']/2}, ignore_index=True)
                        #print(uid, row1['truster'], row1['value']/2)
                        # write Indirect relation trust value between users A -> C, calculated as 0.5
                        val = str(uid) +","+ str(row1['truster'])+","+str(row1['value']/2) +"\n"
                        f.write(val)
                        
    ## d = 3, identify 2nd indirect relation between user A -> D ( A-> B -> C->D) and store in subframe 3
    ## for example, user C treated as trustee, identify the all truster from trust file.
    for index, row2 in newDF.iterrows():
        # below if condition will validate, to consider user at d=2, and treated it as trustee to identify all truster from trust file.
        # for eg, yser C is trustee, D is truster
        if row2['value'] == 0.5:
            subframe3 = trust_df.loc[(trust_df['trustee'] == row2['truster'])] 
             # loop over subframe 3
            if not subframe3.empty:
                for index, row3 in subframe3.iterrows():
                    # below if condition will validate not to include C -> A or D -> A relation
                    if row3['truster'] not in newDF['truster'].values:
                         # write Indirect relation trust value between users A -> D ( A-> B -> C->D), calculated as 0.33
                        val = str(uid) +","+ str(row3['truster'])+","+str(round(row3['value']/3,2)) +"\n"
                        f.write(val)
                        #print(uid, row3['truster'], round(row3['value']/3,2))
# close trustnew file
f.close()

 # Generate ratings of active users and other users, weighted by their trust value

In [58]:
# read newly created trust file, without header and set column name for fields
trust_df = pd.read_csv('trustnew.txt', sep=',',header=None, names = ["trustee", "userId", "value"])

# Below statement create and open file for userId - Item relation with weighted trust value, named as "ratingnew". 
# This file basically contains computed new rating value for users - Items.
# The first column is a user Ids, the second is a Item Ids and the third column is the rating values.
f=open("ratingnew.txt", "w")

# loop through all the unique User Ids 
for uid in userIds:
    # creates a new dataframe that's empty, this will contains user Ids, Items Ids and computed rating values for each user (UID). 
    # this dataframe set to empty for next user(UID)  
    newDF = pd.DataFrame() 
    
    # identiy all rating records for active user uid from rating file. for eg, User A   
    r_df = rating_df.loc[rating_df['userId'] == uid]
    # we assumed that active user believe there own ratings, so write there own rating as is in ratingnew file
    for index, row in r_df.iterrows():
        #print(row['trustee'], row['truster'], row['value'])
        val = str(uid) +","+ str(int(row['itemId']))+","+str(row['rating']) +"\n"
        f.write(val)
        
    # identify all Item Ids for active user. for eg User A. we will use series for validation at later stage to avoid duplicate entries for Item Ids. 
    itemId = rating_df[rating_df['userId']== uid]['itemId'].values    
    
    
    # identiy all trust records for active user uid from trustnew file. for eg, Active User A, Truster B   
    tt_df = trust_df.loc[trust_df['trustee'] == uid]
    # Validate, whether we have any truster for trustee. This validation is needed to avoid any unhandled exception raise because of empty dataframe.
    if not tt_df.empty:
        # loop over
        for index, row in tt_df.iterrows():
            # identify all Item Ids for truster. for eg, Collect all ratings for truster B into newDF dataframe.
            rat_df = rating_df.loc[rating_df['userId'] == row['userId']]
            newDF = newDF.append(rat_df, ignore_index=True) 
        
        # below statement build table, consists of trust value (User A -> User B) and ratings for User B
        result = pd.merge(tt_df, newDF,on = 'userId') 
        
        # calculate new rating weighted by their trust value 
        result['ValRat'] = result['value'] * result['rating'] # (trustvalue * rating ) for each item / movie Id
        #result['cal1'] = result['value'] # (trustvalue) for each item / movie Id
        # group based on item Id
        grp = result.groupby(['itemId'])
        # loop over group
        for name, group in grp:
            # below if condition validate not to create duplicate entries for ratings
            if name not in itemId: 
                g = group.agg({'ValRat':sum, 'value':sum}) # sum of (trustvalue * rating ) , sum of (trustvalue)
                # write new calculated rating for Item Ids ( (trustvalue * rating ) / (trustvalue) for each item Id / Movie Id )
                val = str(uid) +","+ str(name)+ ","+ str(round(g['ValRat']/g['value'],2)) +"\n" 
                f.write(val)  
f.close()