# Trust Profile as seed

In [23]:
import pandas as pd
import networkx as nx
# Step 1: Read trusted profiles and create a set of trusted usernames
trusted_profiles_df = pd.read_csv('d:/falconx/datingappdatascrap/dissimilarity/trusted_profiles_info.csv', index_col=0)
trusted_usernames = set(trusted_profiles_df['username'])
len(trusted_usernames)

123

In [10]:
# Step 2: Read user friends data and build a graph
user_friends_df = pd.read_csv('d:/falconx/datingappdatascrap//normal/user_friends.csv')
G = nx.from_pandas_edgelist(user_friends_df, 'username', 'friend_username')
G.number_of_nodes()

3386

In [44]:
# Step 3: Initialize trust scores
initial_trust_scores = {username: 1 if username in trusted_usernames else 0 for username in G.nodes}

# Step 4: Propagate trust scores using PageRank or a similar algorithm
# Here we use a damping factor of 0.85 for the PageRank algorithm
trust_scores = nx.pagerank(G, personalization=initial_trust_scores, alpha=0.9)
trust_scores_df = pd.DataFrame.from_dict(trust_scores, orient='index').reset_index()
trust_scores_df.columns = ['username', 'trust_score']
trust_scores_df['username'] = trust_scores_df['username'].apply(lambda x: x.lower())
trust_scores_sorted = trust_scores_df.sort_values(by='trust_score', ascending=False)
trust_scores_sorted.to_csv('trust_scores.csv', index=False)
trust_scores_sorted.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3386 entries, 73 to 3050
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   username     3386 non-null   object 
 1   trust_score  3386 non-null   float64
dtypes: float64(1), object(1)
memory usage: 79.4+ KB


In [45]:
trust_scores_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3386 entries, 0 to 3385
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   username     3386 non-null   object 
 1   trust_score  3386 non-null   float64
dtypes: float64(1), object(1)
memory usage: 53.0+ KB


In [46]:
username_to_index_df = pd.read_csv('d:/falconx/datingappdatascrap/dissimilarity/processed_data.csv', index_col=0)
# username_to_index_df = username_to_index_df.dropna(subset = ['username'])
username_to_index_df['username'] = username_to_index_df['username'].apply(lambda x: x.lower())
username_to_index_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 56984 entries, 0 to 56983
Data columns (total 20 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Unnamed: 0.2          56984 non-null  int64  
 1   Unnamed: 0.1          56984 non-null  int64  
 2   Unnamed: 0            56984 non-null  int64  
 3   scam                  56984 non-null  int64  
 4   username              56984 non-null  object 
 5   age                   56980 non-null  float64
 6   gender                56984 non-null  object 
 7   location              55956 non-null  object 
 8   ethnicity             52661 non-null  object 
 9   occupation            34584 non-null  object 
 10  marital_status        55041 non-null  object 
 11  children              51452 non-null  object 
 12  religion              51112 non-null  object 
 13  sexual_orientation    51451 non-null  object 
 14  sex                   56984 non-null  object 
 15  description           12

In [54]:
trust_rank_profile_similarity = pd.merge(trust_scores_df, username_to_index_df, on='username', how = 'left')

In [55]:
trust_rank_profile_similarity.to_csv('trust_rank_profile_similarity.csv', index=False)

# Page Rank

In [27]:
pagerank_profiles_df = pd.read_csv('d:/falconx/datingappdatascrap/normal/pagerank.csv')
pagerank_profiles_df

Unnamed: 0,User,PageRank Score
0,user_78737,0.004997
1,BSharp1013,0.004325
2,Kennethrw,0.003571
3,Louis_M,0.003539
4,jannyN12,0.003300
...,...,...
3381,pionerka,0.000076
3382,johuelj,0.000076
3383,Dl,0.000076
3384,deraj,0.000076


In [49]:
pagerankseed = pagerank_profiles_df[:123]
pagerankseed = set(pagerankseed['User'])
len(pagerankseed)

123

In [50]:
user_friends_df = pd.read_csv('d:/falconx/datingappdatascrap//normal/user_friends.csv')
GP = nx.from_pandas_edgelist(user_friends_df, 'username', 'friend_username')
GP.number_of_nodes()

3386

In [58]:
# Initialize trust scores
reset_trust_scores = {username: 1 if username in pagerankseed else 0 for username in GP.nodes}

# Step 4: Propagate trust scores using PageRank or a similar algorithm
# Here we use a damping factor of 0.85 for the PageRank algorithm
pr_trust_scores = nx.pagerank(G, personalization=reset_trust_scores, alpha=0.9)
pr_trust_scores_df = pd.DataFrame.from_dict(pr_trust_scores, orient='index').reset_index()
pr_trust_scores_df.columns = ['username', 'trust_score']
pr_trust_scores_df['username'] = pr_trust_scores_df['username'].apply(lambda x: x.lower())
pr_trust_scores_sorted = pr_trust_scores_df.sort_values(by='trust_score', ascending=False)
pr_trust_scores_sorted.to_csv('trust_scores.csv', index=False)
pr_trust_scores_sorted.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3386 entries, 73 to 3050
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   username     3386 non-null   object 
 1   trust_score  3386 non-null   float64
dtypes: float64(1), object(1)
memory usage: 79.4+ KB


In [59]:
pr_trust_rank_profile_similarity = pd.merge(pr_trust_scores_df, username_to_index_df, on='username', how = 'left')
pr_trust_rank_profile_similarity.to_csv('pr_trust_rank_profile_similarity.csv', index=False)