In [2]:
import numba as nb
import pandas as pd
import numpy as np

## Calculate Similarity

In [12]:
def calculate_similarity(user_item_matrix):
    def precompute_metrics(user_item_matrix):
        PCC = no.corrcoef(user_item_matrix)
        DistanceMatrix = calculate_distance(user_item_matrix)
        return PCC, DistanceMatrix
    numerator = 2* PCC * DistanceMatrix
    denominator= PCC + DistanceMatrix
    SimilarityMatrix = np.where(PCC > 0, numerator/denominator ,0)
    PositiveCountSimilarity = np.count_nonzero(similaritymatrix, 0)
    simthreshold = np.sum(SimilarityMatrix, axis=0)/PositiveCountSimilarity
    similarity_dict = {'similarity':similarity_matrix , 'threshold': simthreshold}
    return similarity_dict

## Calculate Confidence

In [6]:
def calculate_confidence(user_item_matrix, similarity_dict):
    similarity_matrix = similarity_dict['similarity']
    similarity_threshold = similarity_dict['threshold']

    count_of_neighbors_for_user = np.sum(similarity_matrix >= sim_threshold, axis=1)
    count_of_rated_items_for_user = np.sum(user_item_matrix != 0, axis=1)
    count_of_neighbors_for_user[count_of_neighbors_for_user==0] = 1
    count_of_rated_items_for_user[count_of_rated_items_for_user==0] =1
    
    NumberUser = len(user_ids)

    common_similar_users_count_matrix = np.sum((similarity_matrix[:,None,:] >= sim_threshold) &
                                               (similarity_matrix[None,:,:] >= sim_threshold), axis =2)
    common_rated_items_count_matrix = np.sum((user_item_matrix[:,None,:] != 0) &
                                               (user_item_matrix[None,:,:] != 0), axis =2)

    common_similar_neighbors_ratio_matrix = common_similar_users_count_matrix/ count_of_neighbors_for_user
    common_rated_items_ratio_matrix = common_rated_items_count_matrix/count_of_rated_items_for_user

    confidence_matrix = np.zeros((NumberUser, NumberUser))

    for i in range(NumberUser):
        for j in range(NumberUser):
            if i!=j:
                common_similar_neighbors_ratio= common_similar_neighbors_ratio_matrix[i,j]
                common_rated_items_ratio= common_rated_items_ratio_matrix[i,j]
                confidence_matrix[i,j] =(common_similar_neighbors_ratio + common_rated_items_ratio)

    positive_count_of_confidence_row= np.count_nonzero(confidence_matrix, axis=0)
    conf_threshold= np.sum(confidence_matrix, axis= 0)/positive_count_of_confidence_row

    confidence_dict= {'confidence': confidence_matrix , 'threshold': conf_threshold}
    return confidence_dict

## Calculate Identical Opinion

In [10]:
def calculate_identical_opinion(user_item_matrix, similarity_dict, confidence_dict):
    
    @nb.njit
    def calculate_sameness_ratio(data_i, data_j, threshold_i, threshold_j , epsilon):
        mask_i =data_i > threshold_i
        mask_j = data_j>threhold_j

        intersect_items=np.intersect1d(np.where(mask_i)[0], np.where(mask_j)[0])
        common_simi_count= len(intersect_items)

        if common_simi_count>0:
            same_simi_value_count = np.count_nonzero((data_i[intersect_items])-(data_j[intersect_items])<=epsilon)
            sameness_ratio =same_simi_value_count/common_simi_count
        else:
            sameness_ratio=0

        return sameness_ratio

    similarity_matrix, sim_threshold = similarity_dict['similarity'], similarity_dict['threshold']
    confidence_matrix, confidence_threshold = confidence_dict['similarity'], confidence_dict['threshold']

    epsilon=0.1
    NumberUser= len(user_ids)
    identical_opinions =np.zeros((NumberUser, NumberUser))

    for i in range(NumberUser):
        for j in range(NumberUser):
            if i!=j:
                similarity_sameness_ratio= calculate_sameness_ratio(similarity_matrix[i], similarity_matrix[j], sim_threhold[i], sim_threhold[j], epsilon)
                confidence_sameness_ratio = calculate_sameness_ratio(confidenc_matrix[i],confidenc_matrix[j],sim_threshold[i], sim_threshold[j], epsilon)
                rating_sameness_ratio= calculate_sameness_ratio(user_item_matrix[i], user_item_matrix[j],0,0, epsilon)
                identical_opinion[i,j] = (similarity_sameness_ratio +confidence_sameness_ratio+rating_sameness_ratio)/3

    positive_count_of_identopinion_row= np.count_nonzero(identical_opinion,axis=0)
    identopinion_threshold =np.sum(identical_opinion, axis=0)/positive_count_of_identopinion_row

    identical_opinion_dict={'opinion': identical_opinion, 'threshold': identopinion_threshold}

    return identical_opinion_dict

## Generate Incipient Trust

In [None]:
def generate_incipient_trust(similarity_dict, confidence_dict, identical_opinion_dict):
    def calculate_direct_trust(similarity_dict, confidence_dict, identical_opinion_dict):
        similarity, sim_threhsold = similarity_dict['similarity'], similarity_dict['threshold']
        confidence, conf_threshold = confidence_dict['confidence'], confidence_dict['threshold']
        opinion, opin_threshold=identical_opinion_dict['opinion'], identical_opinion_dict['threshold']

        direct_trust = np.zeros_like(similarity)
        num_rows, num_cols = similarity.shape

        for i in range(num_rows):
            for j in range(num_cols):
                sim_val= similarity[i,j]
                conf_val = confidence[i,j]
                opinion_val= opinion[i,j]
                s_val=sim_threhsold[i]
                c_val=conf_threshold[i]
                o_val=opin_threshold[i]

                if sim_val>= s_val:
                    if conf_val>= c_val:
                        if opinion_val >=o_val:
                            denominator= sim_val + conf_val+opinion_val
                            direct_trust[i,j]= (3*(sim_val*conf_val*opinion_val))/denominator
                        else:
                            denominator= sim_val + conf_val
                            direct_trust[i,j]=(2*(sim_val*conf_val))/denominator
                    else:
                        if opinion_val>=o_val:
                            denominator = sim_val+opinion_val
                            direct_trust[i,j]=(2*(sim_val*opinion_val))/denominator
                else:
                    if conf_val>=c_val:
                        if opinion_val<=o_val:
                            denominator = conf_val+opinion_val
                            direct_trust[i,j]=(2*(conf_val*opinion_val))/denominator
        positive_counts_per_row = np.sum(direct_trust>0, axis=0)
        dt_threshold= np.sum(direct_trust,axis=0)/np.maximum(positive_counts_per_row,1)
        return direct_trust, dt_threshold
    @nb.njit
    def propagate_direct_trust(direct_trust, dt_threshold):
        num_users=direct_trust.shape[0]
        propagated_trust = np.zeros((num_users,num_users))
        for i in range(num_users):
            for j in range(num_users):
                if i!=j:
                    dt_i_u = direct_trust[i,:]
                    dt_u_j = direct_trust[:,j]
                    threshold_i= dt_threshold[i]
                    threshold_j = dt_treshold[j]

                    co_trust_sum=0
                    co_trust_count = 0


        def calculate_incipient_trust(direct_trust, propagated_trust):
            non_zero_direct = direct_trust>0
            non_zero_propgated= propagated_trust>0

            numerator= np.where(non_zero_direct & non_zero_propagated,direct_trust*propagated_trust,0)

            denominator = direct_trust + propagated_trust
            nonzero_denominator = denominator >0

            intrust= np.where(nonzero_denominator, np.divide(numerator, denominator, out=np.zeros_like(denominator), where=nonzero_denominator),
                             np.where(non_zero_direct, propagated_trust, np.where(non_zero_propagated, direct_trust,0)))
            return np.sqrt(intrust)

        NumberUser = len(user_ids)


        direct_trust, dt_threshold= calculate_direct_trust(similarity_dict, confidence_dict, identical_opinion_dict)
        propagated_trust=propagate_direct_trust(direct_trust, dt_threshold)
        incipient_trust = calculate_incipient_trust(direct_trust, propagated_trust)
        inctrust_trust_dict = incipient_trust >= inctrust_threshold
        incipient_trust_dict = {'trust': incipient_trust, 'threshold': inctrust_threshold, 'network': inctrust_net}
    
        return incipient_trust_dict
            
            
            
                    
                            

In [12]:
def prepare_evaluation_data(user_ids, user_item_df, incipient_trust_dict):
    def test_train_split(user_item_df, k_folds):
        kf = Kfold(n_split= k_folds, shuffle= True, random_state=42)
        for train_index, test_index in kf.split(user_item_df):
            train_indices, test_indices = train_index, test_index
            train_data, test_data = user_item_df.iloc[train_indices], user_item_df.iloc[test_indices]
        return train_data, test_data

    train_data, test_data = test_train_split(user_item_df, k_folds=5)
    trust_df = pd.DataFrame(index = user_ids, columns = user_ids, data=incipient_trust_dict['trust'])
    test_trustees = trust_df.loc[test_data.index][train_data.index]
    test_trustees = np.array(test_trustees)

    train_data = np.array(train_data)
    test_data =np.array(test_data)

    return train_data, test_data, test_trustees

In [3]:
def create_user_item_df(df):
    user_item_df = df.pivot(index='userId', columns='movieId', values='rating')
    return user_item_df.fillna(0)

## Main Function

In [5]:
def main():
    df = pd.read_csv(r'C:\Users\dagas\OneDrive\Desktop\Dataset\ratings.csv')
    movie_ids= np.sort(df['movieID'].unique())
    global user_ids
    user_ids = np.sort(df['userID'].unique())

    global MaxRating, MinRating
    MaxRating, MinRating = 5,1

    start_time=time.time()
    user_item_df = create_user_item_df(df)
    user_item_matrix = user_item_df.to_numpy()
    similarity_dict =calculate_similarity(user_item_matrix)
    confidence_dict = calculate_confidence(user_item_matrix, similarity_dict)
    identical_opinion_dict = calculate_identical_opinion(user_item_matrix, similarity_dict, confidence_dict)
    incipient_trust_dict = generate_incipient_trust(similarity_dict, confidence_dict, identical_opinion_dict)
    return user_item_df